Legend:
- Unmodified
- Added
- Removed
-
trunk/bin/graph
r10 r11 40 40 41 41 42 def differences(db, unusual_case, column='packet_rtt'): 43 cursor = db.conn.cursor() 44 query=""" 45 SELECT %(column)s-(SELECT avg(%(column)s) FROM probes,analysis 46 WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type in ('train','test') AND sample=u.sample) 47 FROM (SELECT probes.sample,%(column)s FROM probes,analysis 48 WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type in ('train','test')) u 49 """ % {"column":column} 50 params = {"unusual_case":unusual_case} 51 cursor.execute(query, params) 52 for row in cursor: 53 yield row[0] 42 def differences(db, unusual_case, rtt_type='packet'): 43 ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train', unusual_case)] 44 ret_val += [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('test', unusual_case)] 45 return ret_val 46 47 def null_differences(db, unusual_case, rtt_type='packet'): 48 ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train_null', unusual_case)] 49 return ret_val 54 50 55 51 … … 91 87 print('packet_rtt diff ubersummary: %f' % ubersummary(diffs)) 92 88 print('packet_rtt diff MAD: %f' % mad(diffs)) 93 print('reported diff trimean: %f' % trimean(reported_diffs)) 94 print('reported diff quadsummary: %f' % quadsummary(reported_diffs)) 95 print('reported diff ubersummary: %f' % ubersummary(reported_diffs)) 96 print('reported diff MAD: %f' % mad(reported_diffs)) 97 98 import cProfile 99 kresults = kfilter({},diffs) 100 #print('packet_rtt diff kfilter: ', numpy.mean(kresults['est']), kresults['var']) 101 print('packet_rtt diff kfilter: ', kresults['est'][-1], kresults['var'][-1]) 102 kresults = kfilter({},reported_diffs) 103 #print('reported diff kfilter: ', numpy.mean(kresults['est']), kresults['var'][-1]) 104 print('reported diff kfilter: ', kresults['est'][-1], kresults['var'][-1]) 89 try: 90 print('reported diff trimean: %f' % trimean(reported_diffs)) 91 print('reported diff quadsummary: %f' % quadsummary(reported_diffs)) 92 print('reported diff ubersummary: %f' % ubersummary(reported_diffs)) 93 print('reported diff MAD: %f' % mad(reported_diffs)) 94 95 import cProfile 96 start = time.time() 97 kresults = kfilter({},diffs) 98 #print('packet_rtt diff kfilter: ', numpy.mean(kresults['est']), kresults['var']) 99 print('packet_rtt diff kfilter: ', kresults['est'][-1], kresults['var'][-1]) 100 kresults = kfilter({},reported_diffs) 101 #print('reported diff kfilter: ', numpy.mean(kresults['est']), kresults['var'][-1]) 102 print('reported diff kfilter: ', kresults['est'][-1], kresults['var'][-1]) 103 print("kfilter time: %f" % (time.time()-start)) 104 except: 105 pass 106 107 print('tsval diff mean: %f' % numpy.mean(differences(db, 'long', 'tsval'))) 108 print('tsval null diff mean: %f' % numpy.mean(null_differences(db, 'long', 'tsval'))) 109 print('tsval diff weighted mean: %f' % tsvalwmean(db.subseries('train','long')+db.subseries('test','long'))) 110 print('tsval null diff weighted mean: %f' % tsvalwmean(db.subseries('train_null','long'))) 105 111 106 112 … … 110 116 #cut_off_high = all_data[int(len(all_data)*0.997)] 111 117 118 119 def plotSingleProbe(probe_id=None): 120 if probe_id == None: 121 cursor = db.conn.cursor() 122 query="""SELECT probe_id FROM analysis WHERE suspect='' ORDER BY probe_id DESC limit 1 OFFSET 10""" 123 cursor.execute(query) 124 probe_id = cursor.fetchone()[0] 125 126 cursor = db.conn.cursor() 127 query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=1""" 128 cursor.execute(query, (probe_id,)) 129 pkts = cursor.fetchall() 130 sent_payload = [row[0] for row in pkts if row[1] != 0] 131 sent_other = [row[0] for row in pkts if row[1] == 0] 132 133 query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=0""" 134 cursor.execute(query, (probe_id,)) 135 pkts = cursor.fetchall() 136 rcvd_payload = [row[0] for row in pkts if row[1] != 0] 137 rcvd_other = [row[0] for row in pkts if row[1] == 0] 138 139 #query="""SELECT reported,time_of_day FROM probes WHERE id=?""" 140 #cursor.execute(query, (probe_id,)) 141 #reported,tod = cursor.fetchone() 142 #userspace_times = [sent_times[0]-reported/3.0, sent_times[0]+reported] 143 144 print("single probe counts:",len(sent_payload),len(sent_other),len(rcvd_payload),len(rcvd_other)) 145 plt.clf() 146 plt.title("Single HTTP Request - Packet Times") 147 sp = plt.eventplot(sent_payload, colors=('red',), lineoffsets=8, linewidths=2, alpha=0.6,label='sent') 148 so = plt.eventplot(sent_other, colors=('red',), lineoffsets=6, linewidths=2, alpha=0.6,label='sent') 149 rp = plt.eventplot(rcvd_payload, colors=('blue',), lineoffsets=4, linewidths=2, alpha=0.6,label='received') 150 ro = plt.eventplot(rcvd_other, colors=('blue',), lineoffsets=2, linewidths=2, alpha=0.6,label='received') 151 #plt.legend((s,r), ('sent','received')) 152 #plt.savefig('../img/http-packet-times.svg') 153 plt.show() 154 155 #plotSingleProbe() 156 157 158 def graphTestResults(): 159 plt.clf() 160 plt.title("Test Results") 161 plt.xlabel('sample size') 162 plt.ylabel('error rate') 163 legend = [] 164 colors = ['red','blue','green','purple','orange','black','brown'] 165 color_id = 0 166 167 cursor = db.conn.cursor() 168 query = """ 169 SELECT classifier FROM classifier_results GROUP BY classifier ORDER BY classifier; 170 """ 171 cursor.execute(query) 172 classifiers = [] 173 for c in cursor: 174 classifiers.append(c[0]) 175 176 for classifier in classifiers: 177 query=""" 178 SELECT params FROM classifier_results 179 WHERE trial_type='test' 180 AND classifier=:classifier 181 AND (false_positives+false_negatives)/2.0 < 5.0 182 ORDER BY num_observations,(false_positives+false_negatives) 183 LIMIT 1 184 """ 185 cursor.execute(query, {'classifier':classifier}) 186 row = cursor.fetchone() 187 if row == None: 188 query=""" 189 SELECT params FROM classifier_results 190 WHERE trial_type='test' and classifier=:classifier 191 ORDER BY (false_positives+false_negatives),num_observations 192 LIMIT 1 193 """ 194 cursor.execute(query, {'classifier':classifier}) 195 row = cursor.fetchone() 196 if row == None: 197 sys.stderr.write("WARN: couldn't find test results for classifier '%s'.\n" % classifier) 198 continue 199 200 best_params = row[0] 201 query=""" 202 SELECT num_observations,(false_positives+false_negatives)/2.0 FROM classifier_results 203 WHERE trial_type='test' 204 AND classifier=:classifier 205 AND params=:params 206 ORDER BY num_observations 207 """ 208 cursor.execute(query, {'classifier':classifier,'params':best_params}) 209 210 num_obs = [] 211 performance = [] 212 for row in cursor: 213 num_obs.append(row[0]) 214 performance.append(row[1]) 215 #print(num_obs,performance) 216 path = plt.scatter(num_obs, performance, color=colors[color_id], s=4, alpha=0.8, linewidths=3.0) 217 plt.plot(num_obs, performance, color=colors[color_id], alpha=0.8) 218 legend.append((classifier,path)) 219 color_id = (color_id+1) % len(colors) 220 221 plt.legend([l[1] for l in legend], [l[0] for l in legend], scatterpoints=1, fontsize='xx-small') 222 plt.show() 223 224 graphTestResults() 225 226 sys.exit(0) 112 227 113 228 plt.clf() … … 122 237 plt.show() 123 238 239 240 241 plt.clf() 242 plt.title("Simple HTTP Request") 243 plt.xlabel('Time of Day') 244 plt.ylabel('') 245 s = plt.scatter(sent_times, [2]*len(sent_times), s=3, color='red', alpha=0.9) 246 r = plt.scatter(rcvd_times, [1]*len(rcvd_times), s=3, color='blue', alpha=0.9) 247 plt.legend((s,r), ('sent','received'), scatterpoints=1) 248 plt.show() 249 250 sys.exit(0) 124 251 short_overtime,long_overtime,diff_overtime = None,None,None 125 252 -
trunk/bin/train
r10 r11 25 25 sys.path.append("%s/../lib" % script_dir) 26 26 27 27 28 from nanownlib import * 28 29 from nanownlib.stats import * 30 from nanownlib.train import * 29 31 from nanownlib.parallel import WorkerThreads 30 32 import nanownlib.storage … … 36 38 #parser.add_argument('-c', dest='cases', type=str, default='{"short":10000,"long":1010000}', 37 39 # help='JSON representation of echo timing cases. Default: {"short":10000,"long":1010000}') 38 parser.add_argument('--retrain', action='append', default=[], help='Force a classifier to be retrained. May be specified multiple times.') 40 parser.add_argument('--unusual-case', action='store', default=None, help='Specify the unusual case and whether it is greater than the other cases. Format: {case name},{1 or 0}') 41 parser.add_argument('--retrain', action='append', default=[], help='Force a classifier to be retrained (and retested). May be specified multiple times.') 39 42 parser.add_argument('--retest', action='append', default=[], help='Force a classifier to be retested. May be specified multiple times.') 40 43 parser.add_argument('session_data', default=None, 41 44 help='Database file storing session information') 42 45 options = parser.parse_args() 46 db = nanownlib.storage.db(options.session_data) 43 47 44 48 45 def trainBoxTest(db, unusual_case, greater, num_observations):46 db.resetOffsets()47 48 def trainAux(low,high,num_trials):49 estimator = functools.partial(multiBoxTest, {'low':low, 'high':high}, greater)50 estimates = bootstrap3(estimator, db, 'train', unusual_case, num_observations, num_trials)51 null_estimates = bootstrap3(estimator, db, 'train_null', unusual_case, num_observations, num_trials)52 53 bad_estimates = len([e for e in estimates if e != 1])54 bad_null_estimates = len([e for e in null_estimates if e != 0])55 56 false_negatives = 100.0*bad_estimates/num_trials57 false_positives = 100.0*bad_null_estimates/num_trials58 return false_positives,false_negatives59 60 #start = time.time()61 wt = WorkerThreads(2, trainAux)62 63 num_trials = 20064 width = 1.065 performance = []66 for low in range(0,50):67 wt.addJob(low, (low,low+width,num_trials))68 wt.wait()69 while not wt.resultq.empty():70 job_id,errors = wt.resultq.get()71 fp,fn = errors72 performance.append(((fp+fn)/2.0, job_id, fn, fp))73 performance.sort()74 #pprint.pprint(performance)75 #print(time.time()-start)76 77 num_trials = 20078 lows = [p[1] for p in performance[0:5]]79 widths = [w/10.0 for w in range(5,65,5)]80 performance = []81 for width in widths:82 false_positives = []83 false_negatives = []84 for low in lows:85 wt.addJob(low,(low,low+width,num_trials))86 wt.wait()87 while not wt.resultq.empty():88 job_id,errors = wt.resultq.get()89 fp,fn = errors90 false_negatives.append(fn)91 false_positives.append(fp)92 93 #print(width, false_negatives)94 #print(width, false_positives)95 #performance.append(((statistics.mean(false_positives)+statistics.mean(false_negatives))/2.0,96 # width, statistics.mean(false_negatives), statistics.mean(false_positives)))97 performance.append((abs(statistics.mean(false_positives)-statistics.mean(false_negatives)),98 width, statistics.mean(false_negatives), statistics.mean(false_positives)))99 performance.sort()100 #pprint.pprint(performance)101 good_width = performance[0][1]102 #print("good_width:",good_width)103 104 105 num_trials = 500106 performance = []107 for low in lows:108 wt.addJob(low, (low,low+good_width,num_trials))109 wt.wait()110 while not wt.resultq.empty():111 job_id,errors = wt.resultq.get()112 fp,fn = errors113 performance.append(((fp+fn)/2.0, job_id, fn, fp))114 performance.sort()115 #pprint.pprint(performance)116 best_low = performance[0][1]117 #print("best_low:", best_low)118 119 120 num_trials = 500121 widths = [good_width+(x/100.0) for x in range(-70,75,5) if good_width+(x/100.0) > 0.0]122 performance = []123 for width in widths:124 wt.addJob(width, (best_low,best_low+width,num_trials))125 wt.wait()126 while not wt.resultq.empty():127 job_id,errors = wt.resultq.get()128 fp,fn = errors129 #performance.append(((fp+fn)/2.0, job_id, fn, fp))130 performance.append((abs(fp-fn), job_id, fn, fp))131 performance.sort()132 #pprint.pprint(performance)133 best_width=performance[0][1]134 #print("best_width:",best_width)135 #print("final_performance:", performance[0][0])136 137 wt.stop()138 params = json.dumps({"low":best_low,"high":best_low+best_width})139 return {'trial_type':"train",140 'num_observations':num_observations,141 'num_trials':num_trials,142 'params':params,143 'false_positives':performance[0][3],144 'false_negatives':performance[0][2]}145 146 147 def trainSummary(summaryFunc, db, unusual_case, greater, num_observations):148 db.resetOffsets()149 stest = functools.partial(summaryTest, summaryFunc)150 151 def trainAux(distance, threshold, num_trials):152 estimator = functools.partial(stest, {'distance':distance,'threshold':threshold}, greater)153 estimates = bootstrap3(estimator, db, 'train', unusual_case, num_observations, num_trials)154 null_estimates = bootstrap3(estimator, db, 'train_null', unusual_case, num_observations, num_trials)155 156 bad_estimates = len([e for e in estimates if e != 1])157 bad_null_estimates = len([e for e in null_estimates if e != 0])158 159 false_negatives = 100.0*bad_estimates/num_trials160 false_positives = 100.0*bad_null_estimates/num_trials161 return false_positives,false_negatives162 163 #determine expected delta based on differences164 mean_diffs = [s['unusual_case']-s['other_cases'] for s in db.subseries('train', unusual_case)]165 threshold = summaryFunc(mean_diffs)/2.0166 #print("init_threshold:", threshold)167 168 wt = WorkerThreads(2, trainAux)169 170 num_trials = 500171 performance = []172 for distance in range(1,50):173 wt.addJob(distance, (distance,threshold,num_trials))174 wt.wait()175 while not wt.resultq.empty():176 job_id,errors = wt.resultq.get()177 fp,fn = errors178 performance.append(((fp+fn)/2.0, job_id, fn, fp))179 180 performance.sort()181 #pprint.pprint(performance)182 good_distance = performance[0][1]183 #print("good_distance:",good_distance)184 185 186 num_trials = 500187 performance = []188 for t in range(80,122,2):189 wt.addJob(threshold*(t/100.0), (good_distance,threshold*(t/100.0),num_trials))190 wt.wait()191 while not wt.resultq.empty():192 job_id,errors = wt.resultq.get()193 fp,fn = errors194 #performance.append(((fp+fn)/2.0, job_id, fn, fp))195 performance.append((abs(fp-fn), job_id, fn, fp))196 performance.sort()197 #pprint.pprint(performance)198 good_threshold = performance[0][1]199 #print("good_threshold:", good_threshold)200 201 202 num_trials = 500203 performance = []204 for d in [good_distance+s for s in range(-4,5) if good_distance+s > -1]:205 wt.addJob(d, (d,good_threshold,num_trials))206 wt.wait()207 while not wt.resultq.empty():208 job_id,errors = wt.resultq.get()209 fp,fn = errors210 performance.append(((fp+fn)/2.0, job_id, fn, fp))211 performance.sort()212 #pprint.pprint(performance)213 best_distance = performance[0][1]214 #print("best_distance:",best_distance)215 216 217 num_trials = 500218 performance = []219 for t in range(90,111):220 wt.addJob(good_threshold*(t/100.0), (best_distance,good_threshold*(t/100.0),num_trials))221 wt.wait()222 while not wt.resultq.empty():223 job_id,errors = wt.resultq.get()224 fp,fn = errors225 #performance.append(((fp+fn)/2.0, job_id, fn, fp))226 performance.append((abs(fp-fn), job_id, fn, fp))227 performance.sort()228 #pprint.pprint(performance)229 best_threshold = performance[0][1]230 #print("best_threshold:", best_threshold)231 232 wt.stop()233 params = json.dumps({'distance':best_distance,'threshold':best_threshold})234 return {'trial_type':"train",235 'num_observations':num_observations,236 'num_trials':num_trials,237 'params':params,238 'false_positives':performance[0][3],239 'false_negatives':performance[0][2]}240 241 242 def trainKalman(db, unusual_case, greater, num_observations):243 db.resetOffsets()244 245 def trainAux(params, num_trials):246 estimator = functools.partial(kalmanTest, params, greater)247 estimates = bootstrap3(estimator, db, 'train', unusual_case, num_observations, num_trials)248 null_estimates = bootstrap3(estimator, db, 'train_null', unusual_case, num_observations, num_trials)249 250 bad_estimates = len([e for e in estimates if e != 1])251 bad_null_estimates = len([e for e in null_estimates if e != 0])252 253 false_negatives = 100.0*bad_estimates/num_trials254 false_positives = 100.0*bad_null_estimates/num_trials255 return false_positives,false_negatives256 257 mean_diffs = [s['unusual_case']-s['other_cases'] for s in db.subseries('train', unusual_case)]258 good_threshold = kfilter({},mean_diffs)['est'][-1]/2.0259 260 wt = WorkerThreads(2, trainAux)261 num_trials = 200262 performance = []263 for t in range(90,111):264 params = {'threshold':good_threshold*(t/100.0)}265 wt.addJob(good_threshold*(t/100.0), (params,num_trials))266 wt.wait()267 while not wt.resultq.empty():268 job_id,errors = wt.resultq.get()269 fp,fn = errors270 #performance.append(((fp+fn)/2.0, job_id, fn, fp))271 performance.append((abs(fp-fn), job_id, fn, fp))272 performance.sort()273 #pprint.pprint(performance)274 best_threshold = performance[0][1]275 #print("best_threshold:", best_threshold)276 params = {'threshold':best_threshold}277 278 wt.stop()279 280 return {'trial_type':"train",281 'num_observations':num_observations,282 'num_trials':num_trials,283 'params':json.dumps(params),284 'false_positives':performance[0][3],285 'false_negatives':performance[0][2]}286 287 288 #determine expected delta based on differences289 classifiers = {'boxtest':{'train':trainBoxTest, 'test':multiBoxTest, 'train_results':[]},290 'midsummary':{'train':functools.partial(trainSummary, midsummary), 'test':midsummaryTest, 'train_results':[]},291 #'ubersummary':{'train':functools.partial(trainSummary, ubersummary), 'test':ubersummaryTest, 'train_results':[]},292 'quadsummary':{'train':functools.partial(trainSummary, quadsummary), 'test':quadsummaryTest, 'train_results':[]},293 'kalman':{'train':trainKalman, 'test':kalmanTest, 'train_results':[]},294 #'_trimean':{'train':None, 'test':trimeanTest, 'train_results':[]},295 }296 297 298 db = nanownlib.storage.db(options.session_data)299 300 import cProfile301 49 302 50 def trainClassifier(db, unusual_case, greater, classifier, retrain=False): … … 324 72 print("number of observations: %d | error: %f | false_positives: %f | false_negatives: %f | train time: %s | params: %s" 325 73 % (num_obs, error, result['false_positives'],result['false_negatives'], train_time, result['params'])) 326 db.addClassifierResult s(result)74 db.addClassifierResult(result) 327 75 classifiers[classifier]['train_results'].append(result) 328 76 … … 355 103 356 104 105 def getResult(classifier, params, num_obs, num_trials): 106 jparams = json.dumps(params, sort_keys=True) 107 result = db.fetchClassifierResult(classifier, 'test', num_obs, jparams) 108 if result: 109 fp = result['false_positives'] 110 fn = result['false_negatives'] 111 else: 112 fp,fn = testAux(params, num_trials, num_obs) 113 result = {'classifier':classifier, 114 'trial_type':"test", 115 'num_observations':num_obs, 116 'num_trials':num_trials, 117 'params':jparams, 118 'false_positives':fp, 119 'false_negatives':fn} 120 db.addClassifierResult(result) 121 return ((fp+fn)/2.0,result) 122 357 123 if retest: 358 124 print("Dropping stored test results...") … … 368 134 369 135 print("initial test") 370 fp,fn = testAux(params, num_trials, num_obs) 371 error = (fp+fn)/2.0 136 error,result = getResult(classifier,params,num_obs,num_trials) 372 137 print("walking up") 373 138 while (error > target_error) and (num_obs < max_obs): … … 375 140 #print("increase_factor:", increase_factor) 376 141 num_obs = min(int(increase_factor*num_obs), max_obs) 377 fp,fn = testAux(params, num_trials, num_obs) 378 error = (fp+fn)/2.0 142 error,result = getResult(classifier,params,num_obs,num_trials) 379 143 380 144 print("walking down") 381 145 while (num_obs > 0): 382 current_best = ( num_obs,error,params,fp,fn)146 current_best = (error,result) 383 147 num_obs = int(0.95*num_obs) 384 fp,fn = testAux(params, num_trials, num_obs) 385 error = (fp+fn)/2.0 148 error,result = getResult(classifier,params,num_obs,num_trials) 386 149 if error > target_error: 387 150 break 388 151 389 test_results.append(current_best) 390 391 test_results.sort() 392 best_obs,error,best_params,fp,fn = test_results[0] 393 394 return {'classifier':classifier, 395 'trial_type':"test", 396 'num_observations':best_obs, 397 'num_trials':num_trials, 398 'params':best_params, 399 'false_positives':fp, 400 'false_negatives':fn} 152 return current_best 401 153 402 154 403 start = time.time() 404 unusual_case,unusual_diff = findUnusualTestCase(db) 405 greater = (unusual_diff > 0) 406 print("unusual_case:", unusual_case) 407 print("unusual_diff:", unusual_diff) 408 end = time.time() 409 print(":", end-start) 155 if options.unusual_case != None: 156 unusual_case,greater = options.unusual_case.split(',') 157 greater = bool(int(greater)) 158 else: 159 start = time.time() 160 unusual_case,unusual_diff = findUnusualTestCase(db) 161 greater = (unusual_diff > 0) 162 print("unusual_case:", unusual_case) 163 print("unusual_diff:", unusual_diff) 164 end = time.time() 165 print(":", end-start) 166 410 167 411 168 for c in sorted(classifiers.keys()): … … 424 181 start = time.time() 425 182 print("Testing %s..." % c) 426 result = testClassifier(db, unusual_case, greater, c, c in options.retest)183 error,result = testClassifier(db, unusual_case, greater, c, c in (options.retest+options.retrain)) 427 184 print("%s result:" % c) 428 185 pprint.pprint(result) 429 classifiers[c]['test_error'] = (result['false_positives']+result['false_negatives'])/2.0186 classifiers[c]['test_error'] = error 430 187 print("completed in:", time.time()-start)
Note: See TracChangeset
for help on using the changeset viewer.