Changeset 11
- Timestamp:
- 07/16/15 20:40:01 (9 years ago)
- Location:
- trunk
- Files:
-
- 1 added
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/bin/graph
r10 r11 40 40 41 41 42 def differences(db, unusual_case, column='packet_rtt'): 43 cursor = db.conn.cursor() 44 query=""" 45 SELECT %(column)s-(SELECT avg(%(column)s) FROM probes,analysis 46 WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type in ('train','test') AND sample=u.sample) 47 FROM (SELECT probes.sample,%(column)s FROM probes,analysis 48 WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type in ('train','test')) u 49 """ % {"column":column} 50 params = {"unusual_case":unusual_case} 51 cursor.execute(query, params) 52 for row in cursor: 53 yield row[0] 42 def differences(db, unusual_case, rtt_type='packet'): 43 ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train', unusual_case)] 44 ret_val += [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('test', unusual_case)] 45 return ret_val 46 47 def null_differences(db, unusual_case, rtt_type='packet'): 48 ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train_null', unusual_case)] 49 return ret_val 54 50 55 51 … … 91 87 print('packet_rtt diff ubersummary: %f' % ubersummary(diffs)) 92 88 print('packet_rtt diff MAD: %f' % mad(diffs)) 93 print('reported diff trimean: %f' % trimean(reported_diffs)) 94 print('reported diff quadsummary: %f' % quadsummary(reported_diffs)) 95 print('reported diff ubersummary: %f' % ubersummary(reported_diffs)) 96 print('reported diff MAD: %f' % mad(reported_diffs)) 97 98 import cProfile 99 kresults = kfilter({},diffs) 100 #print('packet_rtt diff kfilter: ', numpy.mean(kresults['est']), kresults['var']) 101 print('packet_rtt diff kfilter: ', kresults['est'][-1], kresults['var'][-1]) 102 kresults = kfilter({},reported_diffs) 103 #print('reported diff kfilter: ', numpy.mean(kresults['est']), kresults['var'][-1]) 104 print('reported diff kfilter: ', kresults['est'][-1], kresults['var'][-1]) 89 try: 90 print('reported diff trimean: %f' % trimean(reported_diffs)) 91 print('reported diff quadsummary: %f' % quadsummary(reported_diffs)) 92 print('reported diff ubersummary: %f' % ubersummary(reported_diffs)) 93 print('reported diff MAD: %f' % mad(reported_diffs)) 94 95 import cProfile 96 start = time.time() 97 kresults = kfilter({},diffs) 98 #print('packet_rtt diff kfilter: ', numpy.mean(kresults['est']), kresults['var']) 99 print('packet_rtt diff kfilter: ', kresults['est'][-1], kresults['var'][-1]) 100 kresults = kfilter({},reported_diffs) 101 #print('reported diff kfilter: ', numpy.mean(kresults['est']), kresults['var'][-1]) 102 print('reported diff kfilter: ', kresults['est'][-1], kresults['var'][-1]) 103 print("kfilter time: %f" % (time.time()-start)) 104 except: 105 pass 106 107 print('tsval diff mean: %f' % numpy.mean(differences(db, 'long', 'tsval'))) 108 print('tsval null diff mean: %f' % numpy.mean(null_differences(db, 'long', 'tsval'))) 109 print('tsval diff weighted mean: %f' % tsvalwmean(db.subseries('train','long')+db.subseries('test','long'))) 110 print('tsval null diff weighted mean: %f' % tsvalwmean(db.subseries('train_null','long'))) 105 111 106 112 … … 110 116 #cut_off_high = all_data[int(len(all_data)*0.997)] 111 117 118 119 def plotSingleProbe(probe_id=None): 120 if probe_id == None: 121 cursor = db.conn.cursor() 122 query="""SELECT probe_id FROM analysis WHERE suspect='' ORDER BY probe_id DESC limit 1 OFFSET 10""" 123 cursor.execute(query) 124 probe_id = cursor.fetchone()[0] 125 126 cursor = db.conn.cursor() 127 query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=1""" 128 cursor.execute(query, (probe_id,)) 129 pkts = cursor.fetchall() 130 sent_payload = [row[0] for row in pkts if row[1] != 0] 131 sent_other = [row[0] for row in pkts if row[1] == 0] 132 133 query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=0""" 134 cursor.execute(query, (probe_id,)) 135 pkts = cursor.fetchall() 136 rcvd_payload = [row[0] for row in pkts if row[1] != 0] 137 rcvd_other = [row[0] for row in pkts if row[1] == 0] 138 139 #query="""SELECT reported,time_of_day FROM probes WHERE id=?""" 140 #cursor.execute(query, (probe_id,)) 141 #reported,tod = cursor.fetchone() 142 #userspace_times = [sent_times[0]-reported/3.0, sent_times[0]+reported] 143 144 print("single probe counts:",len(sent_payload),len(sent_other),len(rcvd_payload),len(rcvd_other)) 145 plt.clf() 146 plt.title("Single HTTP Request - Packet Times") 147 sp = plt.eventplot(sent_payload, colors=('red',), lineoffsets=8, linewidths=2, alpha=0.6,label='sent') 148 so = plt.eventplot(sent_other, colors=('red',), lineoffsets=6, linewidths=2, alpha=0.6,label='sent') 149 rp = plt.eventplot(rcvd_payload, colors=('blue',), lineoffsets=4, linewidths=2, alpha=0.6,label='received') 150 ro = plt.eventplot(rcvd_other, colors=('blue',), lineoffsets=2, linewidths=2, alpha=0.6,label='received') 151 #plt.legend((s,r), ('sent','received')) 152 #plt.savefig('../img/http-packet-times.svg') 153 plt.show() 154 155 #plotSingleProbe() 156 157 158 def graphTestResults(): 159 plt.clf() 160 plt.title("Test Results") 161 plt.xlabel('sample size') 162 plt.ylabel('error rate') 163 legend = [] 164 colors = ['red','blue','green','purple','orange','black','brown'] 165 color_id = 0 166 167 cursor = db.conn.cursor() 168 query = """ 169 SELECT classifier FROM classifier_results GROUP BY classifier ORDER BY classifier; 170 """ 171 cursor.execute(query) 172 classifiers = [] 173 for c in cursor: 174 classifiers.append(c[0]) 175 176 for classifier in classifiers: 177 query=""" 178 SELECT params FROM classifier_results 179 WHERE trial_type='test' 180 AND classifier=:classifier 181 AND (false_positives+false_negatives)/2.0 < 5.0 182 ORDER BY num_observations,(false_positives+false_negatives) 183 LIMIT 1 184 """ 185 cursor.execute(query, {'classifier':classifier}) 186 row = cursor.fetchone() 187 if row == None: 188 query=""" 189 SELECT params FROM classifier_results 190 WHERE trial_type='test' and classifier=:classifier 191 ORDER BY (false_positives+false_negatives),num_observations 192 LIMIT 1 193 """ 194 cursor.execute(query, {'classifier':classifier}) 195 row = cursor.fetchone() 196 if row == None: 197 sys.stderr.write("WARN: couldn't find test results for classifier '%s'.\n" % classifier) 198 continue 199 200 best_params = row[0] 201 query=""" 202 SELECT num_observations,(false_positives+false_negatives)/2.0 FROM classifier_results 203 WHERE trial_type='test' 204 AND classifier=:classifier 205 AND params=:params 206 ORDER BY num_observations 207 """ 208 cursor.execute(query, {'classifier':classifier,'params':best_params}) 209 210 num_obs = [] 211 performance = [] 212 for row in cursor: 213 num_obs.append(row[0]) 214 performance.append(row[1]) 215 #print(num_obs,performance) 216 path = plt.scatter(num_obs, performance, color=colors[color_id], s=4, alpha=0.8, linewidths=3.0) 217 plt.plot(num_obs, performance, color=colors[color_id], alpha=0.8) 218 legend.append((classifier,path)) 219 color_id = (color_id+1) % len(colors) 220 221 plt.legend([l[1] for l in legend], [l[0] for l in legend], scatterpoints=1, fontsize='xx-small') 222 plt.show() 223 224 graphTestResults() 225 226 sys.exit(0) 112 227 113 228 plt.clf() … … 122 237 plt.show() 123 238 239 240 241 plt.clf() 242 plt.title("Simple HTTP Request") 243 plt.xlabel('Time of Day') 244 plt.ylabel('') 245 s = plt.scatter(sent_times, [2]*len(sent_times), s=3, color='red', alpha=0.9) 246 r = plt.scatter(rcvd_times, [1]*len(rcvd_times), s=3, color='blue', alpha=0.9) 247 plt.legend((s,r), ('sent','received'), scatterpoints=1) 248 plt.show() 249 250 sys.exit(0) 124 251 short_overtime,long_overtime,diff_overtime = None,None,None 125 252 -
trunk/bin/train
r10 r11 25 25 sys.path.append("%s/../lib" % script_dir) 26 26 27 27 28 from nanownlib import * 28 29 from nanownlib.stats import * 30 from nanownlib.train import * 29 31 from nanownlib.parallel import WorkerThreads 30 32 import nanownlib.storage … … 36 38 #parser.add_argument('-c', dest='cases', type=str, default='{"short":10000,"long":1010000}', 37 39 # help='JSON representation of echo timing cases. Default: {"short":10000,"long":1010000}') 38 parser.add_argument('--retrain', action='append', default=[], help='Force a classifier to be retrained. May be specified multiple times.') 40 parser.add_argument('--unusual-case', action='store', default=None, help='Specify the unusual case and whether it is greater than the other cases. Format: {case name},{1 or 0}') 41 parser.add_argument('--retrain', action='append', default=[], help='Force a classifier to be retrained (and retested). May be specified multiple times.') 39 42 parser.add_argument('--retest', action='append', default=[], help='Force a classifier to be retested. May be specified multiple times.') 40 43 parser.add_argument('session_data', default=None, 41 44 help='Database file storing session information') 42 45 options = parser.parse_args() 46 db = nanownlib.storage.db(options.session_data) 43 47 44 48 45 def trainBoxTest(db, unusual_case, greater, num_observations):46 db.resetOffsets()47 48 def trainAux(low,high,num_trials):49 estimator = functools.partial(multiBoxTest, {'low':low, 'high':high}, greater)50 estimates = bootstrap3(estimator, db, 'train', unusual_case, num_observations, num_trials)51 null_estimates = bootstrap3(estimator, db, 'train_null', unusual_case, num_observations, num_trials)52 53 bad_estimates = len([e for e in estimates if e != 1])54 bad_null_estimates = len([e for e in null_estimates if e != 0])55 56 false_negatives = 100.0*bad_estimates/num_trials57 false_positives = 100.0*bad_null_estimates/num_trials58 return false_positives,false_negatives59 60 #start = time.time()61 wt = WorkerThreads(2, trainAux)62 63 num_trials = 20064 width = 1.065 performance = []66 for low in range(0,50):67 wt.addJob(low, (low,low+width,num_trials))68 wt.wait()69 while not wt.resultq.empty():70 job_id,errors = wt.resultq.get()71 fp,fn = errors72 performance.append(((fp+fn)/2.0, job_id, fn, fp))73 performance.sort()74 #pprint.pprint(performance)75 #print(time.time()-start)76 77 num_trials = 20078 lows = [p[1] for p in performance[0:5]]79 widths = [w/10.0 for w in range(5,65,5)]80 performance = []81 for width in widths:82 false_positives = []83 false_negatives = []84 for low in lows:85 wt.addJob(low,(low,low+width,num_trials))86 wt.wait()87 while not wt.resultq.empty():88 job_id,errors = wt.resultq.get()89 fp,fn = errors90 false_negatives.append(fn)91 false_positives.append(fp)92 93 #print(width, false_negatives)94 #print(width, false_positives)95 #performance.append(((statistics.mean(false_positives)+statistics.mean(false_negatives))/2.0,96 # width, statistics.mean(false_negatives), statistics.mean(false_positives)))97 performance.append((abs(statistics.mean(false_positives)-statistics.mean(false_negatives)),98 width, statistics.mean(false_negatives), statistics.mean(false_positives)))99 performance.sort()100 #pprint.pprint(performance)101 good_width = performance[0][1]102 #print("good_width:",good_width)103 104 105 num_trials = 500106 performance = []107 for low in lows:108 wt.addJob(low, (low,low+good_width,num_trials))109 wt.wait()110 while not wt.resultq.empty():111 job_id,errors = wt.resultq.get()112 fp,fn = errors113 performance.append(((fp+fn)/2.0, job_id, fn, fp))114 performance.sort()115 #pprint.pprint(performance)116 best_low = performance[0][1]117 #print("best_low:", best_low)118 119 120 num_trials = 500121 widths = [good_width+(x/100.0) for x in range(-70,75,5) if good_width+(x/100.0) > 0.0]122 performance = []123 for width in widths:124 wt.addJob(width, (best_low,best_low+width,num_trials))125 wt.wait()126 while not wt.resultq.empty():127 job_id,errors = wt.resultq.get()128 fp,fn = errors129 #performance.append(((fp+fn)/2.0, job_id, fn, fp))130 performance.append((abs(fp-fn), job_id, fn, fp))131 performance.sort()132 #pprint.pprint(performance)133 best_width=performance[0][1]134 #print("best_width:",best_width)135 #print("final_performance:", performance[0][0])136 137 wt.stop()138 params = json.dumps({"low":best_low,"high":best_low+best_width})139 return {'trial_type':"train",140 'num_observations':num_observations,141 'num_trials':num_trials,142 'params':params,143 'false_positives':performance[0][3],144 'false_negatives':performance[0][2]}145 146 147 def trainSummary(summaryFunc, db, unusual_case, greater, num_observations):148 db.resetOffsets()149 stest = functools.partial(summaryTest, summaryFunc)150 151 def trainAux(distance, threshold, num_trials):152 estimator = functools.partial(stest, {'distance':distance,'threshold':threshold}, greater)153 estimates = bootstrap3(estimator, db, 'train', unusual_case, num_observations, num_trials)154 null_estimates = bootstrap3(estimator, db, 'train_null', unusual_case, num_observations, num_trials)155 156 bad_estimates = len([e for e in estimates if e != 1])157 bad_null_estimates = len([e for e in null_estimates if e != 0])158 159 false_negatives = 100.0*bad_estimates/num_trials160 false_positives = 100.0*bad_null_estimates/num_trials161 return false_positives,false_negatives162 163 #determine expected delta based on differences164 mean_diffs = [s['unusual_case']-s['other_cases'] for s in db.subseries('train', unusual_case)]165 threshold = summaryFunc(mean_diffs)/2.0166 #print("init_threshold:", threshold)167 168 wt = WorkerThreads(2, trainAux)169 170 num_trials = 500171 performance = []172 for distance in range(1,50):173 wt.addJob(distance, (distance,threshold,num_trials))174 wt.wait()175 while not wt.resultq.empty():176 job_id,errors = wt.resultq.get()177 fp,fn = errors178 performance.append(((fp+fn)/2.0, job_id, fn, fp))179 180 performance.sort()181 #pprint.pprint(performance)182 good_distance = performance[0][1]183 #print("good_distance:",good_distance)184 185 186 num_trials = 500187 performance = []188 for t in range(80,122,2):189 wt.addJob(threshold*(t/100.0), (good_distance,threshold*(t/100.0),num_trials))190 wt.wait()191 while not wt.resultq.empty():192 job_id,errors = wt.resultq.get()193 fp,fn = errors194 #performance.append(((fp+fn)/2.0, job_id, fn, fp))195 performance.append((abs(fp-fn), job_id, fn, fp))196 performance.sort()197 #pprint.pprint(performance)198 good_threshold = performance[0][1]199 #print("good_threshold:", good_threshold)200 201 202 num_trials = 500203 performance = []204 for d in [good_distance+s for s in range(-4,5) if good_distance+s > -1]:205 wt.addJob(d, (d,good_threshold,num_trials))206 wt.wait()207 while not wt.resultq.empty():208 job_id,errors = wt.resultq.get()209 fp,fn = errors210 performance.append(((fp+fn)/2.0, job_id, fn, fp))211 performance.sort()212 #pprint.pprint(performance)213 best_distance = performance[0][1]214 #print("best_distance:",best_distance)215 216 217 num_trials = 500218 performance = []219 for t in range(90,111):220 wt.addJob(good_threshold*(t/100.0), (best_distance,good_threshold*(t/100.0),num_trials))221 wt.wait()222 while not wt.resultq.empty():223 job_id,errors = wt.resultq.get()224 fp,fn = errors225 #performance.append(((fp+fn)/2.0, job_id, fn, fp))226 performance.append((abs(fp-fn), job_id, fn, fp))227 performance.sort()228 #pprint.pprint(performance)229 best_threshold = performance[0][1]230 #print("best_threshold:", best_threshold)231 232 wt.stop()233 params = json.dumps({'distance':best_distance,'threshold':best_threshold})234 return {'trial_type':"train",235 'num_observations':num_observations,236 'num_trials':num_trials,237 'params':params,238 'false_positives':performance[0][3],239 'false_negatives':performance[0][2]}240 241 242 def trainKalman(db, unusual_case, greater, num_observations):243 db.resetOffsets()244 245 def trainAux(params, num_trials):246 estimator = functools.partial(kalmanTest, params, greater)247 estimates = bootstrap3(estimator, db, 'train', unusual_case, num_observations, num_trials)248 null_estimates = bootstrap3(estimator, db, 'train_null', unusual_case, num_observations, num_trials)249 250 bad_estimates = len([e for e in estimates if e != 1])251 bad_null_estimates = len([e for e in null_estimates if e != 0])252 253 false_negatives = 100.0*bad_estimates/num_trials254 false_positives = 100.0*bad_null_estimates/num_trials255 return false_positives,false_negatives256 257 mean_diffs = [s['unusual_case']-s['other_cases'] for s in db.subseries('train', unusual_case)]258 good_threshold = kfilter({},mean_diffs)['est'][-1]/2.0259 260 wt = WorkerThreads(2, trainAux)261 num_trials = 200262 performance = []263 for t in range(90,111):264 params = {'threshold':good_threshold*(t/100.0)}265 wt.addJob(good_threshold*(t/100.0), (params,num_trials))266 wt.wait()267 while not wt.resultq.empty():268 job_id,errors = wt.resultq.get()269 fp,fn = errors270 #performance.append(((fp+fn)/2.0, job_id, fn, fp))271 performance.append((abs(fp-fn), job_id, fn, fp))272 performance.sort()273 #pprint.pprint(performance)274 best_threshold = performance[0][1]275 #print("best_threshold:", best_threshold)276 params = {'threshold':best_threshold}277 278 wt.stop()279 280 return {'trial_type':"train",281 'num_observations':num_observations,282 'num_trials':num_trials,283 'params':json.dumps(params),284 'false_positives':performance[0][3],285 'false_negatives':performance[0][2]}286 287 288 #determine expected delta based on differences289 classifiers = {'boxtest':{'train':trainBoxTest, 'test':multiBoxTest, 'train_results':[]},290 'midsummary':{'train':functools.partial(trainSummary, midsummary), 'test':midsummaryTest, 'train_results':[]},291 #'ubersummary':{'train':functools.partial(trainSummary, ubersummary), 'test':ubersummaryTest, 'train_results':[]},292 'quadsummary':{'train':functools.partial(trainSummary, quadsummary), 'test':quadsummaryTest, 'train_results':[]},293 'kalman':{'train':trainKalman, 'test':kalmanTest, 'train_results':[]},294 #'_trimean':{'train':None, 'test':trimeanTest, 'train_results':[]},295 }296 297 298 db = nanownlib.storage.db(options.session_data)299 300 import cProfile301 49 302 50 def trainClassifier(db, unusual_case, greater, classifier, retrain=False): … … 324 72 print("number of observations: %d | error: %f | false_positives: %f | false_negatives: %f | train time: %s | params: %s" 325 73 % (num_obs, error, result['false_positives'],result['false_negatives'], train_time, result['params'])) 326 db.addClassifierResult s(result)74 db.addClassifierResult(result) 327 75 classifiers[classifier]['train_results'].append(result) 328 76 … … 355 103 356 104 105 def getResult(classifier, params, num_obs, num_trials): 106 jparams = json.dumps(params, sort_keys=True) 107 result = db.fetchClassifierResult(classifier, 'test', num_obs, jparams) 108 if result: 109 fp = result['false_positives'] 110 fn = result['false_negatives'] 111 else: 112 fp,fn = testAux(params, num_trials, num_obs) 113 result = {'classifier':classifier, 114 'trial_type':"test", 115 'num_observations':num_obs, 116 'num_trials':num_trials, 117 'params':jparams, 118 'false_positives':fp, 119 'false_negatives':fn} 120 db.addClassifierResult(result) 121 return ((fp+fn)/2.0,result) 122 357 123 if retest: 358 124 print("Dropping stored test results...") … … 368 134 369 135 print("initial test") 370 fp,fn = testAux(params, num_trials, num_obs) 371 error = (fp+fn)/2.0 136 error,result = getResult(classifier,params,num_obs,num_trials) 372 137 print("walking up") 373 138 while (error > target_error) and (num_obs < max_obs): … … 375 140 #print("increase_factor:", increase_factor) 376 141 num_obs = min(int(increase_factor*num_obs), max_obs) 377 fp,fn = testAux(params, num_trials, num_obs) 378 error = (fp+fn)/2.0 142 error,result = getResult(classifier,params,num_obs,num_trials) 379 143 380 144 print("walking down") 381 145 while (num_obs > 0): 382 current_best = ( num_obs,error,params,fp,fn)146 current_best = (error,result) 383 147 num_obs = int(0.95*num_obs) 384 fp,fn = testAux(params, num_trials, num_obs) 385 error = (fp+fn)/2.0 148 error,result = getResult(classifier,params,num_obs,num_trials) 386 149 if error > target_error: 387 150 break 388 151 389 test_results.append(current_best) 390 391 test_results.sort() 392 best_obs,error,best_params,fp,fn = test_results[0] 393 394 return {'classifier':classifier, 395 'trial_type':"test", 396 'num_observations':best_obs, 397 'num_trials':num_trials, 398 'params':best_params, 399 'false_positives':fp, 400 'false_negatives':fn} 152 return current_best 401 153 402 154 403 start = time.time() 404 unusual_case,unusual_diff = findUnusualTestCase(db) 405 greater = (unusual_diff > 0) 406 print("unusual_case:", unusual_case) 407 print("unusual_diff:", unusual_diff) 408 end = time.time() 409 print(":", end-start) 155 if options.unusual_case != None: 156 unusual_case,greater = options.unusual_case.split(',') 157 greater = bool(int(greater)) 158 else: 159 start = time.time() 160 unusual_case,unusual_diff = findUnusualTestCase(db) 161 greater = (unusual_diff > 0) 162 print("unusual_case:", unusual_case) 163 print("unusual_diff:", unusual_diff) 164 end = time.time() 165 print(":", end-start) 166 410 167 411 168 for c in sorted(classifiers.keys()): … … 424 181 start = time.time() 425 182 print("Testing %s..." % c) 426 result = testClassifier(db, unusual_case, greater, c, c in options.retest)183 error,result = testClassifier(db, unusual_case, greater, c, c in (options.retest+options.retrain)) 427 184 print("%s result:" % c) 428 185 pprint.pprint(result) 429 classifiers[c]['test_error'] = (result['false_positives']+result['false_negatives'])/2.0186 classifiers[c]['test_error'] = error 430 187 print("completed in:", time.time()-start) -
trunk/lib/nanownlib/__init__.py
r10 r11 220 220 if p['sent']==1 and (seen[key]['observed'] > p['observed']): #earliest sent 221 221 seen[key] = p 222 suspect += 's' 222 suspect += 's' # duplicated sent packets 223 223 continue 224 224 if p['sent']==0 and (seen[key]['observed'] > p['observed']): #earliest rcvd 225 225 seen[key] = p 226 suspect += 'r' 226 suspect += 'r' # duplicated received packets 227 227 continue 228 228 … … 236 236 suspect,packets = removeDuplicatePackets(packets) 237 237 238 sort_key = lambda d: (d['tcpseq'],d['observed']) 238 sort_key = lambda d: (d['observed'],d['tcpseq']) 239 alt_key = lambda d: (d['tcpseq'],d['observed']) 239 240 sent = sorted((p for p in packets if p['sent']==1 and p['payload_len']>0), key=sort_key) 240 241 rcvd = sorted((p for p in packets if p['sent']==0 and p['payload_len']>0), key=sort_key) 241 242 alt_key = lambda d: (d['observed'],d['tcpseq'])243 242 rcvd_alt = sorted((p for p in packets if p['sent']==0 and p['payload_len']>0), key=alt_key) 244 243 245 244 s_off = trim_sent 246 245 if s_off >= len(sent): 246 suspect += 'd' # dropped packet? 247 247 s_off = -1 248 248 last_sent = sent[s_off] 249 249 250 250 r_off = len(rcvd) - trim_rcvd - 1 251 if r_off <= 0: 251 if r_off < 0: 252 suspect += 'd' # dropped packet? 252 253 r_off = 0 253 254 last_rcvd = rcvd[r_off] 254 255 if last_rcvd != rcvd_alt[r_off]: 255 suspect += 'R' 256 suspect += 'R' # reordered received packets 256 257 257 258 packet_rtt = last_rcvd['observed'] - last_sent['observed'] … … 263 264 last_sent_ack = min(((p['observed'],p) for p in packets 264 265 if p['sent']==0 and p['payload_len']+last_sent['tcpseq']==p['tcpack']))[1] 266 265 267 except Exception as e: 266 268 sys.stderr.write("WARN: Could not find last_sent_ack.\n") … … 301 303 def analyzeProbes(db): 302 304 db.conn.execute("CREATE INDEX IF NOT EXISTS packets_probe ON packets (probe_id)") 305 db.conn.commit() 306 303 307 pcursor = db.conn.cursor() 304 db.conn.commit()305 306 308 pcursor.execute("SELECT tcpts_mean FROM meta") 307 309 try: … … 346 348 db.addTrimAnalyses([analysis]) 347 349 except Exception as e: 348 traceback.print_exc()350 #traceback.print_exc() 349 351 sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % probe_id) 350 352 … … 367 369 analysis['probe_id'] = probe_id 368 370 except Exception as e: 369 print(e)370 sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % p id)371 #traceback.print_exc() 372 sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % probe_id) 371 373 372 374 db.addTrimAnalyses([analysis]) -
trunk/lib/nanownlib/parallel.py
r9 r11 1 1 # 2 2 3 import sys 3 4 import threading 4 5 import queue … … 33 34 self.resultq.put((job_id, self.target(*args))) 34 35 except Exception as e: 35 sys.stderr.write("ERROR: Job '%s' failed with '%s'. Dropping...\n" ,36 sys.stderr.write("ERROR: Job '%s' failed with '%s'. Dropping...\n" % 36 37 (str(job_id),str(e))) 37 38 self.workq.task_done() -
trunk/lib/nanownlib/stats.py
r10 r11 144 144 def ubersummary(values, distance=25): 145 145 left2 = 50-distance 146 left3 = 50-(distance/2.0) 146 147 left1 = left2/2.0 147 left3 = (left2+50)/2.0148 148 right2 = 50+distance 149 right3 = (right2+50)/2.0149 right3 = 50+(distance/2.0) 150 150 right1 = (right2+100)/2.0 151 151 l1,l2,l3,r3,r2,r1 = numpy.percentile(values, (left1,left2,left3,right3,right2,right1)) 152 #print(left1,left2,left3,50,right3,right2,right1)153 152 #print(l1,l2,l3,m,r3,r2,r1) 154 153 return (l1+l2*4+l3+r3+r2*4+r1)/12.0 155 154 #return statistics.mean((l1,l2,l3,m,r3,r2,r1)) 156 155 157 def quadsummary(values, distance=25): 158 left2 = 50-distance 159 left1 = left2/2.0 160 right2 = 50+distance 161 right1 = (right2+100)/2.0 162 l1,l2,r2,r1 = numpy.percentile(values, (left1,left2,right2,right1)) 163 #print(left1,left2,left3,50,right3,right2,right1) 164 #print(l1,l2,l3,m,r3,r2,r1) 165 return (l1+l2+r2+r1)/4.0 166 #return statistics.mean((l1,l2,l3,m,r3,r2,r1)) 167 156 168 157 def quadsummary(values, distance=25): 169 158 left1 = 50-distance … … 177 166 #return statistics.mean((l1,l2,l3,m,r3,r2,r1)) 178 167 179 168 169 def tsvalwmean(subseries): 170 weights = [(s['unusual_packet']+s['other_packet'])**2 for s in subseries] 171 normalizer = sum(weights)/len(weights) 172 return numpy.mean([weights[i]*(subseries[i]['unusual_tsval']-subseries[i]['other_tsval'])/normalizer 173 for i in range(len(weights))]) 174 175 #def tsvalwmean(subseries): 176 # return numpy.mean([(s['unusual_tsval']-s['other_tsval']) for s in subseries]) 177 178 180 179 def weightedMean(derived, weights): 181 180 normalizer = sum(weights.values())/len(weights) … … 186 185 return statistics.mean([w*(derived[k]['long_tsval']-derived[k]['short_tsval'])/normalizer for k,w in weights.items()]) 187 186 187 188 188 189 189 190 def estimateMean(trustFunc, weightFunc, alpha, derived): … … 199 200 200 201 201 #def estimateMedian(trustFunc, weightFunc, alpha, derived):202 # trust = trustValues(derived, trustFunc)203 # weights = weightFunc(derived, trust, alpha)204 205 # return statistics.median([(derived[k]['long']-derived[k]['short']) for k,w in weights.items() if w > 0.0])206 207 def estimateMedian(derived):208 return statistics.median([(d['long']-d['short']) for d in derived.values()])209 210 211 def estimateMidsummary(derived):212 return midsummary([(d['long']-d['short']) for d in derived.values()])213 214 215 def estimateTrimean(derived):216 return trimean([(d['long']-d['short']) for d in derived.values()])217 218 219 def tTest(expected_mean, derived):220 diffs = [(d['long']-d['short']) for d in derived.values()]221 null_tval, null_pval = scipy.stats.ttest_1samp(diffs, 0.0)222 tval, pval = scipy.stats.ttest_1samp(diffs, expected_mean)223 224 if pval < null_pval:225 return 1226 else:227 return 0228 229 230 def diffMedian(derived):231 l = [tc['long']-tc['short'] for s,tc in derived.items()]232 return statistics.median(l)233 234 235 def subsample_ids(db, probe_type, subsample_size=None):236 cursor = db.conn.cursor()237 cursor.execute("SELECT max(c) FROM (SELECT count(sample) c FROM probes WHERE type=? GROUP BY test_case)", (probe_type,))238 population_size = cursor.fetchone()[0]239 #print("population_size:", population_size)240 if subsample_size == None or subsample_size > population_size:241 subsample_size = population_size242 243 start = numpy.random.random_integers(0,population_size-1)244 cursor.execute("SELECT sample FROM probes WHERE type=? GROUP BY sample ORDER BY sample LIMIT ? OFFSET ?", (probe_type,subsample_size,start))245 for row in cursor:246 subsample_size -= 1247 yield row['sample']248 249 if subsample_size > 0:250 cursor.execute("SELECT sample FROM probes WHERE type=? GROUP BY sample ORDER BY sample LIMIT ?", (probe_type,subsample_size))251 for row in cursor:252 yield row['sample']253 254 255 def subsample(db, probe_type, subsample_size=None):256 cursor = db.conn.cursor()257 cursor.execute("SELECT count(test_case) FROM (SELECT test_case FROM probes GROUP BY test_case)")258 num_test_cases = cursor.fetchone()[0]259 260 for sid in subsample_ids(db, probe_type, subsample_size):261 cursor.execute("SELECT test_case,tc_order,time_of_day,reported,userspace_rtt,suspect,packet_rtt,tsval_rtt FROM probes p,analysis a WHERE p.sample=? and a.probe_id=p.id", (sid,))262 probes = cursor.fetchall()263 if len(probes) != num_test_cases:264 sys.stderr.write("WARN: sample %d had %d probes, but %d expected! Discarding...\n" % (sid, len(probes), num_test_cases))265 continue266 yield (sid,[dict(r) for r in probes])267 268 269 def subseries(db, probe_type, unusual_case, size=None, offset=None, field='packet_rtt'):270 population_size = db.populationSize(probe_type)271 272 if size == None or size > population_size:273 size = population_size274 if offset == None or offset >= population_size or offset < 0:275 offset = numpy.random.random_integers(0,population_size-1)276 277 query="""278 SELECT %(field)s AS unusual_case,279 (SELECT avg(%(field)s) FROM probes,analysis280 WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_cases281 FROM (SELECT probes.sample,%(field)s FROM probes,analysis282 WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u283 LIMIT :size OFFSET :offset284 """ % {"field":field}285 286 params = {"probe_type":probe_type, "unusual_case":unusual_case, "offset":offset, "size":size}287 cursor = db.conn.cursor()288 cursor.execute(query, params)289 ret_val = [dict(row) for row in cursor.fetchall()]290 #for row in cursor:291 # size -= 1292 # yield dict(row)293 294 size -= len(ret_val)295 if size > 0:296 params['offset'] = 0297 params['size'] = size298 cursor.execute(query, params)299 ret_val += [dict(row) for row in cursor.fetchall()]300 #for row in cursor:301 # yield dict(row)302 303 return ret_val304 305 306 # if test_cases=None, include all of them. Otherwise, include only the specified test cases.307 def samples2Distributions(samples, field, test_cases=None):308 ret_val = {}309 310 for sid,probes in samples:311 for p in probes:312 if p['test_case'] in ret_val:313 ret_val[p['test_case']].append(p[field])314 elif test_cases == None or p['test_case'] in test_cases:315 ret_val[p['test_case']] = [p[field]]316 317 318 return ret_val319 320 321 def samples2MeanDiffs(samples, field, unusual_case):322 ret_val = {}323 324 for sid,probes in samples:325 unusual_value = None326 for p in probes:327 if p['test_case'] == unusual_case:328 unusual_value = p[field]329 break330 yield statistics.mean([unusual_value-p[field] for p in probes if p['test_case'] != unusual_case])331 332 333 def bootstrap(estimator, db, probe_type, test_cases, subsample_size, num_trials):334 ret_val = []335 for t in range(num_trials):336 ret_val.append(estimator(test_cases, subsample(db, probe_type, subsample_size)))337 338 return ret_val339 340 341 def bootstrap2(estimator, db, probe_type, subsample_size, num_trials):342 ret_val = []343 for t in range(num_trials):344 ret_val.append(estimator(subsample(db, probe_type, subsample_size)))345 346 return ret_val347 348 349 202 def bootstrap3(estimator, db, probe_type, unusual_case, subseries_size, num_trials): 350 203 ret_val = [] … … 355 208 356 209 357 # Returns the test case name that clearly has higher RTT; otherwise, returns None358 def boxTest(params, test_cases, samples):359 if len(test_cases) != 2:360 # XXX: somehow generalize the box test to handle more than 2 cases361 raise Exception()362 dists = samples2Distributions(samples,'packet_rtt', test_cases) #XXX: field from params363 364 tmp1,tmp2 = dists.items()365 test_case1,dist1 = tmp1366 test_case2,dist2 = tmp2367 368 d1_high = numpy.percentile(dist1, params['high'])369 d2_low = numpy.percentile(dist2, params['low'])370 if d1_high < d2_low:371 return test_case2372 373 d1_low = numpy.percentile(dist1, params['low'])374 d2_high = numpy.percentile(dist2, params['high'])375 376 if d2_high < d1_low:377 return test_case1378 379 return None380 381 382 210 # Returns 1 if unusual_case is unusual in the expected direction 383 211 # 0 if it isn't unusual 384 212 # -1 if it is unusual in the wrong direction 385 213 def multiBoxTest(params, greater, samples): 386 uc = [s['unusual_ case'] for s in samples]387 rest = [s['other_ cases'] for s in samples]214 uc = [s['unusual_packet'] for s in samples] 215 rest = [s['other_packet'] for s in samples] 388 216 389 217 uc_high,uc_low = numpy.percentile(uc, (params['high'],params['low'])) … … 407 235 # 0 otherwise 408 236 def summaryTest(f, params, greater, samples): 409 diffs = [s['unusual_ case']-s['other_cases'] for s in samples]237 diffs = [s['unusual_packet']-s['other_packet'] for s in samples] 410 238 411 239 mh = f(diffs, params['distance']) … … 420 248 else: 421 249 return 0 250 422 251 423 252 midsummaryTest = functools.partial(summaryTest, midsummary) … … 451 280 452 281 def kfilter(params, observations): 453 x = numpy.array(observations) 282 x = numpy.array(observations) 454 283 movement = 0 455 est = [] 284 est = [] 456 285 var = [] 457 286 kf = KalmanFilter1D(x0 = quadsummary(x), # initial state 458 #P = 10000, 459 P = 10, # initial variance287 #P = 10000, # initial variance 288 P = 10, # initial variance 460 289 R = numpy.std(x), # msensor noise 461 290 Q = 0) # movement noise … … 471 300 472 301 def kalmanTest(params, greater, samples): 473 diffs = [s['unusual_ case']-s['other_cases'] for s in samples]302 diffs = [s['unusual_packet']-s['other_packet'] for s in samples] 474 303 475 304 m = kfilter(params, diffs)['est'][-1] … … 486 315 487 316 488 def kalmanTest2(params, greater, samples): 489 diffs = [s['unusual_case']-s['other_cases'] for s in samples] 490 491 estimates = [] 492 size = 500 493 for i in range(100): 494 off = random.randrange(0,len(diffs)) 495 sub = diffs[off:size] 496 if len(sub) < size: 497 sub += diffs[0:size-len(sub)] 498 estimates.append(kfilter(params, sub)['est'][-1]) 499 500 m = quadsummary(estimates) 317 def tsvalwmeanTest(params, greater, samples): 318 m = tsvalwmean(samples) 501 319 if greater: 502 320 if m > params['threshold']: … … 509 327 else: 510 328 return 0 511 -
trunk/lib/nanownlib/storage.py
r10 r11 115 115 116 116 117 def subseries(self, probe_type, unusual_case, size=None, offset=None, field='packet_rtt'): 118 cache_key = (probe_type,unusual_case,field) 119 117 def subseries(self, probe_type, unusual_case, size=None, offset=None): 118 cache_key = (probe_type,unusual_case) 120 119 if cache_key not in self._population_cache: 121 120 query=""" 122 SELECT %(field)s AS unusual_case, 123 (SELECT avg(%(field)s) FROM probes,analysis 124 WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_cases 125 FROM (SELECT probes.sample,%(field)s FROM probes,analysis 121 SELECT packet_rtt AS unusual_packet, 122 (SELECT avg(packet_rtt) FROM probes,analysis 123 WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_packet, 124 125 tsval_rtt AS unusual_tsval, 126 (SELECT avg(tsval_rtt) FROM probes,analysis 127 WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_tsval, 128 129 reported AS unusual_reported, 130 (SELECT avg(reported) FROM probes,analysis 131 WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_reported 132 133 FROM (SELECT probes.sample,packet_rtt,tsval_rtt,reported FROM probes,analysis 126 134 WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u 127 """ % {"field":field}135 """ 128 136 129 137 params = {"probe_type":probe_type, "unusual_case":unusual_case} … … 207 215 return [self._insert('trim_analysis', row) for row in analyses] 208 216 209 def addClassifierResult s(self, results):217 def addClassifierResult(self, results): 210 218 ret_val = self._insert('classifier_results', results) 211 219 self.conn.commit() 212 220 return ret_val 213 221 214 def fetchClassifierResult(self, classifier, trial_type, num_observations ):222 def fetchClassifierResult(self, classifier, trial_type, num_observations, params=None): 215 223 query = """ 216 224 SELECT * FROM classifier_results 217 WHERE classifier=? AND trial_type=? AND num_observations=? 218 ORDER BY false_positives+false_negatives 219 LIMIT 1; 225 WHERE classifier=:classifier 226 AND trial_type=:trial_type 227 AND num_observations=:num_observations""" 228 if params != None: 229 query += """ 230 AND params=:params""" 231 query += """ 232 ORDER BY false_positives+false_negatives 233 LIMIT 1 220 234 """ 235 236 qparams = {'classifier':classifier, 'trial_type':trial_type, 237 'num_observations':num_observations,'params':params} 221 238 cursor = self.conn.cursor() 222 cursor.execute(query, (classifier, trial_type, num_observations))239 cursor.execute(query, qparams) 223 240 ret_val = cursor.fetchone() 224 225 241 if ret_val != None: 226 242 ret_val = dict(ret_val) 227 243 return ret_val 228 244 229 245 def deleteClassifierResults(self, classifier, trial_type, num_observations=None): 230 246 params = {"classifier":classifier,"trial_type":trial_type,"num_observations":num_observations}
Note: See TracChangeset
for help on using the changeset viewer.