Changeset 11 for trunk/lib/nanownlib
- Timestamp:
- 07/16/15 20:40:01 (9 years ago)
- Location:
- trunk/lib/nanownlib
- Files:
-
- 1 added
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/lib/nanownlib/__init__.py
r10 r11 220 220 if p['sent']==1 and (seen[key]['observed'] > p['observed']): #earliest sent 221 221 seen[key] = p 222 suspect += 's' 222 suspect += 's' # duplicated sent packets 223 223 continue 224 224 if p['sent']==0 and (seen[key]['observed'] > p['observed']): #earliest rcvd 225 225 seen[key] = p 226 suspect += 'r' 226 suspect += 'r' # duplicated received packets 227 227 continue 228 228 … … 236 236 suspect,packets = removeDuplicatePackets(packets) 237 237 238 sort_key = lambda d: (d['tcpseq'],d['observed']) 238 sort_key = lambda d: (d['observed'],d['tcpseq']) 239 alt_key = lambda d: (d['tcpseq'],d['observed']) 239 240 sent = sorted((p for p in packets if p['sent']==1 and p['payload_len']>0), key=sort_key) 240 241 rcvd = sorted((p for p in packets if p['sent']==0 and p['payload_len']>0), key=sort_key) 241 242 alt_key = lambda d: (d['observed'],d['tcpseq'])243 242 rcvd_alt = sorted((p for p in packets if p['sent']==0 and p['payload_len']>0), key=alt_key) 244 243 245 244 s_off = trim_sent 246 245 if s_off >= len(sent): 246 suspect += 'd' # dropped packet? 247 247 s_off = -1 248 248 last_sent = sent[s_off] 249 249 250 250 r_off = len(rcvd) - trim_rcvd - 1 251 if r_off <= 0: 251 if r_off < 0: 252 suspect += 'd' # dropped packet? 252 253 r_off = 0 253 254 last_rcvd = rcvd[r_off] 254 255 if last_rcvd != rcvd_alt[r_off]: 255 suspect += 'R' 256 suspect += 'R' # reordered received packets 256 257 257 258 packet_rtt = last_rcvd['observed'] - last_sent['observed'] … … 263 264 last_sent_ack = min(((p['observed'],p) for p in packets 264 265 if p['sent']==0 and p['payload_len']+last_sent['tcpseq']==p['tcpack']))[1] 266 265 267 except Exception as e: 266 268 sys.stderr.write("WARN: Could not find last_sent_ack.\n") … … 301 303 def analyzeProbes(db): 302 304 db.conn.execute("CREATE INDEX IF NOT EXISTS packets_probe ON packets (probe_id)") 305 db.conn.commit() 306 303 307 pcursor = db.conn.cursor() 304 db.conn.commit()305 306 308 pcursor.execute("SELECT tcpts_mean FROM meta") 307 309 try: … … 346 348 db.addTrimAnalyses([analysis]) 347 349 except Exception as e: 348 traceback.print_exc()350 #traceback.print_exc() 349 351 sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % probe_id) 350 352 … … 367 369 analysis['probe_id'] = probe_id 368 370 except Exception as e: 369 print(e)370 sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % p id)371 #traceback.print_exc() 372 sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % probe_id) 371 373 372 374 db.addTrimAnalyses([analysis]) -
trunk/lib/nanownlib/parallel.py
r9 r11 1 1 # 2 2 3 import sys 3 4 import threading 4 5 import queue … … 33 34 self.resultq.put((job_id, self.target(*args))) 34 35 except Exception as e: 35 sys.stderr.write("ERROR: Job '%s' failed with '%s'. Dropping...\n" ,36 sys.stderr.write("ERROR: Job '%s' failed with '%s'. Dropping...\n" % 36 37 (str(job_id),str(e))) 37 38 self.workq.task_done() -
trunk/lib/nanownlib/stats.py
r10 r11 144 144 def ubersummary(values, distance=25): 145 145 left2 = 50-distance 146 left3 = 50-(distance/2.0) 146 147 left1 = left2/2.0 147 left3 = (left2+50)/2.0148 148 right2 = 50+distance 149 right3 = (right2+50)/2.0149 right3 = 50+(distance/2.0) 150 150 right1 = (right2+100)/2.0 151 151 l1,l2,l3,r3,r2,r1 = numpy.percentile(values, (left1,left2,left3,right3,right2,right1)) 152 #print(left1,left2,left3,50,right3,right2,right1)153 152 #print(l1,l2,l3,m,r3,r2,r1) 154 153 return (l1+l2*4+l3+r3+r2*4+r1)/12.0 155 154 #return statistics.mean((l1,l2,l3,m,r3,r2,r1)) 156 155 157 def quadsummary(values, distance=25): 158 left2 = 50-distance 159 left1 = left2/2.0 160 right2 = 50+distance 161 right1 = (right2+100)/2.0 162 l1,l2,r2,r1 = numpy.percentile(values, (left1,left2,right2,right1)) 163 #print(left1,left2,left3,50,right3,right2,right1) 164 #print(l1,l2,l3,m,r3,r2,r1) 165 return (l1+l2+r2+r1)/4.0 166 #return statistics.mean((l1,l2,l3,m,r3,r2,r1)) 167 156 168 157 def quadsummary(values, distance=25): 169 158 left1 = 50-distance … … 177 166 #return statistics.mean((l1,l2,l3,m,r3,r2,r1)) 178 167 179 168 169 def tsvalwmean(subseries): 170 weights = [(s['unusual_packet']+s['other_packet'])**2 for s in subseries] 171 normalizer = sum(weights)/len(weights) 172 return numpy.mean([weights[i]*(subseries[i]['unusual_tsval']-subseries[i]['other_tsval'])/normalizer 173 for i in range(len(weights))]) 174 175 #def tsvalwmean(subseries): 176 # return numpy.mean([(s['unusual_tsval']-s['other_tsval']) for s in subseries]) 177 178 180 179 def weightedMean(derived, weights): 181 180 normalizer = sum(weights.values())/len(weights) … … 186 185 return statistics.mean([w*(derived[k]['long_tsval']-derived[k]['short_tsval'])/normalizer for k,w in weights.items()]) 187 186 187 188 188 189 189 190 def estimateMean(trustFunc, weightFunc, alpha, derived): … … 199 200 200 201 201 #def estimateMedian(trustFunc, weightFunc, alpha, derived):202 # trust = trustValues(derived, trustFunc)203 # weights = weightFunc(derived, trust, alpha)204 205 # return statistics.median([(derived[k]['long']-derived[k]['short']) for k,w in weights.items() if w > 0.0])206 207 def estimateMedian(derived):208 return statistics.median([(d['long']-d['short']) for d in derived.values()])209 210 211 def estimateMidsummary(derived):212 return midsummary([(d['long']-d['short']) for d in derived.values()])213 214 215 def estimateTrimean(derived):216 return trimean([(d['long']-d['short']) for d in derived.values()])217 218 219 def tTest(expected_mean, derived):220 diffs = [(d['long']-d['short']) for d in derived.values()]221 null_tval, null_pval = scipy.stats.ttest_1samp(diffs, 0.0)222 tval, pval = scipy.stats.ttest_1samp(diffs, expected_mean)223 224 if pval < null_pval:225 return 1226 else:227 return 0228 229 230 def diffMedian(derived):231 l = [tc['long']-tc['short'] for s,tc in derived.items()]232 return statistics.median(l)233 234 235 def subsample_ids(db, probe_type, subsample_size=None):236 cursor = db.conn.cursor()237 cursor.execute("SELECT max(c) FROM (SELECT count(sample) c FROM probes WHERE type=? GROUP BY test_case)", (probe_type,))238 population_size = cursor.fetchone()[0]239 #print("population_size:", population_size)240 if subsample_size == None or subsample_size > population_size:241 subsample_size = population_size242 243 start = numpy.random.random_integers(0,population_size-1)244 cursor.execute("SELECT sample FROM probes WHERE type=? GROUP BY sample ORDER BY sample LIMIT ? OFFSET ?", (probe_type,subsample_size,start))245 for row in cursor:246 subsample_size -= 1247 yield row['sample']248 249 if subsample_size > 0:250 cursor.execute("SELECT sample FROM probes WHERE type=? GROUP BY sample ORDER BY sample LIMIT ?", (probe_type,subsample_size))251 for row in cursor:252 yield row['sample']253 254 255 def subsample(db, probe_type, subsample_size=None):256 cursor = db.conn.cursor()257 cursor.execute("SELECT count(test_case) FROM (SELECT test_case FROM probes GROUP BY test_case)")258 num_test_cases = cursor.fetchone()[0]259 260 for sid in subsample_ids(db, probe_type, subsample_size):261 cursor.execute("SELECT test_case,tc_order,time_of_day,reported,userspace_rtt,suspect,packet_rtt,tsval_rtt FROM probes p,analysis a WHERE p.sample=? and a.probe_id=p.id", (sid,))262 probes = cursor.fetchall()263 if len(probes) != num_test_cases:264 sys.stderr.write("WARN: sample %d had %d probes, but %d expected! Discarding...\n" % (sid, len(probes), num_test_cases))265 continue266 yield (sid,[dict(r) for r in probes])267 268 269 def subseries(db, probe_type, unusual_case, size=None, offset=None, field='packet_rtt'):270 population_size = db.populationSize(probe_type)271 272 if size == None or size > population_size:273 size = population_size274 if offset == None or offset >= population_size or offset < 0:275 offset = numpy.random.random_integers(0,population_size-1)276 277 query="""278 SELECT %(field)s AS unusual_case,279 (SELECT avg(%(field)s) FROM probes,analysis280 WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_cases281 FROM (SELECT probes.sample,%(field)s FROM probes,analysis282 WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u283 LIMIT :size OFFSET :offset284 """ % {"field":field}285 286 params = {"probe_type":probe_type, "unusual_case":unusual_case, "offset":offset, "size":size}287 cursor = db.conn.cursor()288 cursor.execute(query, params)289 ret_val = [dict(row) for row in cursor.fetchall()]290 #for row in cursor:291 # size -= 1292 # yield dict(row)293 294 size -= len(ret_val)295 if size > 0:296 params['offset'] = 0297 params['size'] = size298 cursor.execute(query, params)299 ret_val += [dict(row) for row in cursor.fetchall()]300 #for row in cursor:301 # yield dict(row)302 303 return ret_val304 305 306 # if test_cases=None, include all of them. Otherwise, include only the specified test cases.307 def samples2Distributions(samples, field, test_cases=None):308 ret_val = {}309 310 for sid,probes in samples:311 for p in probes:312 if p['test_case'] in ret_val:313 ret_val[p['test_case']].append(p[field])314 elif test_cases == None or p['test_case'] in test_cases:315 ret_val[p['test_case']] = [p[field]]316 317 318 return ret_val319 320 321 def samples2MeanDiffs(samples, field, unusual_case):322 ret_val = {}323 324 for sid,probes in samples:325 unusual_value = None326 for p in probes:327 if p['test_case'] == unusual_case:328 unusual_value = p[field]329 break330 yield statistics.mean([unusual_value-p[field] for p in probes if p['test_case'] != unusual_case])331 332 333 def bootstrap(estimator, db, probe_type, test_cases, subsample_size, num_trials):334 ret_val = []335 for t in range(num_trials):336 ret_val.append(estimator(test_cases, subsample(db, probe_type, subsample_size)))337 338 return ret_val339 340 341 def bootstrap2(estimator, db, probe_type, subsample_size, num_trials):342 ret_val = []343 for t in range(num_trials):344 ret_val.append(estimator(subsample(db, probe_type, subsample_size)))345 346 return ret_val347 348 349 202 def bootstrap3(estimator, db, probe_type, unusual_case, subseries_size, num_trials): 350 203 ret_val = [] … … 355 208 356 209 357 # Returns the test case name that clearly has higher RTT; otherwise, returns None358 def boxTest(params, test_cases, samples):359 if len(test_cases) != 2:360 # XXX: somehow generalize the box test to handle more than 2 cases361 raise Exception()362 dists = samples2Distributions(samples,'packet_rtt', test_cases) #XXX: field from params363 364 tmp1,tmp2 = dists.items()365 test_case1,dist1 = tmp1366 test_case2,dist2 = tmp2367 368 d1_high = numpy.percentile(dist1, params['high'])369 d2_low = numpy.percentile(dist2, params['low'])370 if d1_high < d2_low:371 return test_case2372 373 d1_low = numpy.percentile(dist1, params['low'])374 d2_high = numpy.percentile(dist2, params['high'])375 376 if d2_high < d1_low:377 return test_case1378 379 return None380 381 382 210 # Returns 1 if unusual_case is unusual in the expected direction 383 211 # 0 if it isn't unusual 384 212 # -1 if it is unusual in the wrong direction 385 213 def multiBoxTest(params, greater, samples): 386 uc = [s['unusual_ case'] for s in samples]387 rest = [s['other_ cases'] for s in samples]214 uc = [s['unusual_packet'] for s in samples] 215 rest = [s['other_packet'] for s in samples] 388 216 389 217 uc_high,uc_low = numpy.percentile(uc, (params['high'],params['low'])) … … 407 235 # 0 otherwise 408 236 def summaryTest(f, params, greater, samples): 409 diffs = [s['unusual_ case']-s['other_cases'] for s in samples]237 diffs = [s['unusual_packet']-s['other_packet'] for s in samples] 410 238 411 239 mh = f(diffs, params['distance']) … … 420 248 else: 421 249 return 0 250 422 251 423 252 midsummaryTest = functools.partial(summaryTest, midsummary) … … 451 280 452 281 def kfilter(params, observations): 453 x = numpy.array(observations) 282 x = numpy.array(observations) 454 283 movement = 0 455 est = [] 284 est = [] 456 285 var = [] 457 286 kf = KalmanFilter1D(x0 = quadsummary(x), # initial state 458 #P = 10000, 459 P = 10, # initial variance287 #P = 10000, # initial variance 288 P = 10, # initial variance 460 289 R = numpy.std(x), # msensor noise 461 290 Q = 0) # movement noise … … 471 300 472 301 def kalmanTest(params, greater, samples): 473 diffs = [s['unusual_ case']-s['other_cases'] for s in samples]302 diffs = [s['unusual_packet']-s['other_packet'] for s in samples] 474 303 475 304 m = kfilter(params, diffs)['est'][-1] … … 486 315 487 316 488 def kalmanTest2(params, greater, samples): 489 diffs = [s['unusual_case']-s['other_cases'] for s in samples] 490 491 estimates = [] 492 size = 500 493 for i in range(100): 494 off = random.randrange(0,len(diffs)) 495 sub = diffs[off:size] 496 if len(sub) < size: 497 sub += diffs[0:size-len(sub)] 498 estimates.append(kfilter(params, sub)['est'][-1]) 499 500 m = quadsummary(estimates) 317 def tsvalwmeanTest(params, greater, samples): 318 m = tsvalwmean(samples) 501 319 if greater: 502 320 if m > params['threshold']: … … 509 327 else: 510 328 return 0 511 -
trunk/lib/nanownlib/storage.py
r10 r11 115 115 116 116 117 def subseries(self, probe_type, unusual_case, size=None, offset=None, field='packet_rtt'): 118 cache_key = (probe_type,unusual_case,field) 119 117 def subseries(self, probe_type, unusual_case, size=None, offset=None): 118 cache_key = (probe_type,unusual_case) 120 119 if cache_key not in self._population_cache: 121 120 query=""" 122 SELECT %(field)s AS unusual_case, 123 (SELECT avg(%(field)s) FROM probes,analysis 124 WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_cases 125 FROM (SELECT probes.sample,%(field)s FROM probes,analysis 121 SELECT packet_rtt AS unusual_packet, 122 (SELECT avg(packet_rtt) FROM probes,analysis 123 WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_packet, 124 125 tsval_rtt AS unusual_tsval, 126 (SELECT avg(tsval_rtt) FROM probes,analysis 127 WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_tsval, 128 129 reported AS unusual_reported, 130 (SELECT avg(reported) FROM probes,analysis 131 WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_reported 132 133 FROM (SELECT probes.sample,packet_rtt,tsval_rtt,reported FROM probes,analysis 126 134 WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u 127 """ % {"field":field}135 """ 128 136 129 137 params = {"probe_type":probe_type, "unusual_case":unusual_case} … … 207 215 return [self._insert('trim_analysis', row) for row in analyses] 208 216 209 def addClassifierResult s(self, results):217 def addClassifierResult(self, results): 210 218 ret_val = self._insert('classifier_results', results) 211 219 self.conn.commit() 212 220 return ret_val 213 221 214 def fetchClassifierResult(self, classifier, trial_type, num_observations ):222 def fetchClassifierResult(self, classifier, trial_type, num_observations, params=None): 215 223 query = """ 216 224 SELECT * FROM classifier_results 217 WHERE classifier=? AND trial_type=? AND num_observations=? 218 ORDER BY false_positives+false_negatives 219 LIMIT 1; 225 WHERE classifier=:classifier 226 AND trial_type=:trial_type 227 AND num_observations=:num_observations""" 228 if params != None: 229 query += """ 230 AND params=:params""" 231 query += """ 232 ORDER BY false_positives+false_negatives 233 LIMIT 1 220 234 """ 235 236 qparams = {'classifier':classifier, 'trial_type':trial_type, 237 'num_observations':num_observations,'params':params} 221 238 cursor = self.conn.cursor() 222 cursor.execute(query, (classifier, trial_type, num_observations))239 cursor.execute(query, qparams) 223 240 ret_val = cursor.fetchone() 224 225 241 if ret_val != None: 226 242 ret_val = dict(ret_val) 227 243 return ret_val 228 244 229 245 def deleteClassifierResults(self, classifier, trial_type, num_observations=None): 230 246 params = {"classifier":classifier,"trial_type":trial_type,"num_observations":num_observations}
Note: See TracChangeset
for help on using the changeset viewer.