Legend:
- Unmodified
- Added
- Removed
-
trunk/bin/sampler
r5 r9 37 37 help='JSON representation of echo timing cases.') 38 38 parser.add_argument('--no-tcpts', action='store_true', help='Disbale TCP timestamp profiling') 39 parser.add_argument('--no-control', action='store_true', help='Do not collect separate control data. Instead, synthesize it from test and train data.') 39 40 parser.add_argument('session_name', default=None, 40 41 help='Name for the sampler session (used in output filenames)') … … 133 134 time.sleep(0.5) # ensure sniffer is fully ready and our process is migrated 134 135 136 if options.no_control: 137 num_control = 0 138 else: 139 num_control = int(num_samples*2/5) 140 141 num_train = int((num_samples-num_control)/3) 142 num_test = num_samples-num_train-num_control 143 144 sample_types = [('train',num_train), 145 ('train_null',num_control), 146 ('test',num_test)] 147 135 148 sid = 0 136 sample_types = [('train',int(num_samples*1/5)),137 ('train_null',int(num_samples*2/5)),138 ('test',int(num_samples*2/5)),139 ('test_null',0)]140 149 report_interval = 20 141 150 start = time.time() … … 183 192 print("associate time:", end-start) 184 193 194 if options.no_control: 195 print("TODO: implement control synthesizing!") 196 185 197 start = time.time() 186 198 num_probes = analyzeProbes(db) -
trunk/bin/train
r8 r9 9 9 import functools 10 10 import argparse 11 import threading12 import queue13 11 import pprint 14 12 import json … … 30 28 import nanownlib.storage 31 29 from nanownlib.stats import boxTest,multiBoxTest,subsample,bootstrap,bootstrap2,trimean,midhinge,midhingeTest,samples2Distributions,samples2MeanDiffs 30 from nanownlib.parallel import WorkerThreads 31 32 32 33 33 parser = argparse.ArgumentParser( … … 39 39 options = parser.parse_args() 40 40 41 42 43 class WorkerThreads(object):44 workq = None45 resultq = None46 target = None47 48 def __init__(self, num_workers, target):49 self.workq = queue.Queue()50 self.resultq = queue.Queue()51 self.target = target52 53 self.workers = []54 for i in range(num_workers):55 t = threading.Thread(target=self._worker)56 t.daemon = True57 t.start()58 self.workers.append(t)59 60 def _worker(self):61 while True:62 item = self.workq.get()63 if item == None:64 self.workq.task_done()65 break66 67 job_id,args = item68 self.resultq.put((job_id, self.target(*args)))69 self.workq.task_done()70 71 def addJob(self, job_id, args):72 self.workq.put((job_id, args))73 74 def wait(self):75 self.workq.join()76 77 def stop(self):78 for i in range(0,len(workers)):79 self.workq.put(None)80 for w in self.workers:81 w.join()82 83 41 84 42 … … 97 55 return false_positives,false_negatives 98 56 99 start = time.time()57 #start = time.time() 100 58 wt = WorkerThreads(2, trainAux) 101 59 … … 111 69 performance.append(((fp+fn)/2.0, job_id, fn, fp)) 112 70 performance.sort() 113 pprint.pprint(performance)114 print(time.time()-start)71 #pprint.pprint(performance) 72 #print(time.time()-start) 115 73 116 74 num_trials = 200 … … 137 95 width, statistics.mean(false_negatives), statistics.mean(false_positives))) 138 96 performance.sort() 139 pprint.pprint(performance)97 #pprint.pprint(performance) 140 98 good_width = performance[0][1] 141 print("good_width:",good_width)99 #print("good_width:",good_width) 142 100 143 101 … … 152 110 performance.append(((fp+fn)/2.0, job_id, fn, fp)) 153 111 performance.sort() 154 pprint.pprint(performance)112 #pprint.pprint(performance) 155 113 best_low = performance[0][1] 156 print("best_low:", best_low)157 158 159 num_trials = 500 160 widths = [good_width+(x/10 .0) for x in range(-6,7) if good_width+(x/10.0) > 0.0]114 #print("best_low:", best_low) 115 116 117 num_trials = 500 118 widths = [good_width+(x/100.0) for x in range(-60,75,5) if good_width+(x/100.0) > 0.0] 161 119 performance = [] 162 120 for width in widths: … … 168 126 performance.append(((fp+fn)/2.0, job_id, fn, fp)) 169 127 performance.sort() 170 pprint.pprint(performance)128 #pprint.pprint(performance) 171 129 best_width=performance[0][1] 172 print("best_width:",best_width) 173 print("final_performance:", performance[0][0]) 174 130 #print("best_width:",best_width) 131 #print("final_performance:", performance[0][0]) 132 133 wt.stop() 175 134 params = json.dumps({"low":best_low,"high":best_low+best_width}) 176 135 return {'algorithm':"boxtest", … … 200 159 mean_diffs = [s['unusual_case']-s['other_cases'] for s in db.subseries('train', unusual_case)] 201 160 threshold = trimean(mean_diffs)/2.0 202 print("init_threshold:", threshold)161 #print("init_threshold:", threshold) 203 162 204 163 wt = WorkerThreads(2, trainAux) … … 217 176 #pprint.pprint(performance) 218 177 good_distance = performance[0][1] 219 print("good_distance:",good_distance)178 #print("good_distance:",good_distance) 220 179 221 180 … … 232 191 #pprint.pprint(performance) 233 192 good_threshold = performance[0][1] 234 print("good_threshold:", good_threshold)193 #print("good_threshold:", good_threshold) 235 194 236 195 … … 247 206 #pprint.pprint(performance) 248 207 best_distance = performance[0][1] 249 print("best_distance:",best_distance)250 251 252 num_trials = 500 253 performance = [] 254 for t in range(9 5,106):208 #print("best_distance:",best_distance) 209 210 211 num_trials = 500 212 performance = [] 213 for t in range(90,111): 255 214 wt.addJob(good_threshold*(t/100.0), (best_distance,good_threshold*(t/100.0),num_trials)) 256 215 wt.wait() … … 262 221 #pprint.pprint(performance) 263 222 best_threshold = performance[0][1] 264 print("best_threshold:", best_threshold) 265 223 #print("best_threshold:", best_threshold) 224 225 wt.stop() 266 226 params = json.dumps({'distance':best_distance,'threshold':best_threshold}) 267 227 return {'algorithm':"midhinge", … … 274 234 275 235 276 #classifiers = {'boxtest':{'train':trainBoxTest2, 'test':multiBoxTest},277 #'midhinge':{'train':trainMidhinge, 'test':midhinge}}236 classifiers = {'boxtest':{'train':trainBoxTest, 'test':multiBoxTest}, 237 'midhinge':{'train':trainMidhinge, 'test':midhinge}} 278 238 279 239 280 240 db = nanownlib.storage.db(options.session_data) 281 #cursor = db.cursor() 282 #cursor.execute("SELECT min(sample) min, max(sample) max FROM probes") 283 #train_start,test_end = cursor.fetchone() 284 #train_end = int(test_end-train_start) 285 #test_start = train_end+1 286 #subsample_size = min(10000,(train_end-train_start+1)/4) 241 242 import cProfile 243 244 def trainClassifier(db, unusual_case, greater, trainer): 245 threshold = 5.0 # in percent 246 size = 4000 247 result = None 248 while size < db.populationSize('train')/5: 249 size = min(size*2, int(db.populationSize('train')/5)) 250 result = trainer(db,unusual_case,greater,size) 251 error = statistics.mean([result['false_positives'],result['false_negatives']]) 252 print("subseries size: %d | error: %f | false_positives: %f | false_negatives: %f" 253 % (size,error,result['false_positives'],result['false_negatives'])) 254 if error < threshold: 255 break 256 if result != None: 257 db.addClassifierResults(result) 258 259 return result 260 287 261 288 262 start = time.time() … … 294 268 print(":", end-start) 295 269 296 import cProfile 297 298 299 300 301 for size in (500,1000,2000,4000,5000,6000): 270 271 for c,funcs in classifiers.items(): 302 272 start = time.time() 303 #cProfile.run('results = trainMidhinge(db, unusual_case, greater, 100)') 304 results = trainMidhinge(db, unusual_case, greater, size) 305 #db.addClassifierResults(results) 306 print("midhinge result:") 307 pprint.pprint(results) 308 print(":", time.time()-start) 273 print("Training %s..." % c) 274 result = trainClassifier(db, unusual_case, greater, funcs['train']) 275 print("%s result:" % c) 276 pprint.pprint(result) 277 print("completed in:", time.time()-start) 309 278 310 279 sys.exit(0)
Note: See TracChangeset
for help on using the changeset viewer.