Changeset 9


Ignore:
Timestamp:
07/10/15 14:03:04 (9 years ago)
Author:
tim
Message:

.

Location:
trunk
Files:
1 added
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/bin/sampler

    r5 r9  
    3737                    help='JSON representation of echo timing cases.')
    3838parser.add_argument('--no-tcpts', action='store_true', help='Disbale TCP timestamp profiling')
     39parser.add_argument('--no-control', action='store_true', help='Do not collect separate control data.  Instead, synthesize it from test and train data.')
    3940parser.add_argument('session_name', default=None,
    4041                    help='Name for the sampler session (used in output filenames)')
     
    133134time.sleep(0.5) # ensure sniffer is fully ready and our process is migrated
    134135
     136if options.no_control:
     137    num_control = 0
     138else:
     139    num_control = int(num_samples*2/5)
     140
     141num_train = int((num_samples-num_control)/3)
     142num_test = num_samples-num_train-num_control
     143
     144sample_types = [('train',num_train),
     145                ('train_null',num_control),
     146                ('test',num_test)]
     147
    135148sid = 0
    136 sample_types = [('train',int(num_samples*1/5)),
    137                 ('train_null',int(num_samples*2/5)),
    138                 ('test',int(num_samples*2/5)),
    139                 ('test_null',0)]
    140149report_interval = 20
    141150start = time.time()
     
    183192print("associate time:", end-start)
    184193
     194if options.no_control:
     195    print("TODO: implement control synthesizing!")
     196
    185197start = time.time()
    186198num_probes = analyzeProbes(db)
  • trunk/bin/train

    r8 r9  
    99import functools
    1010import argparse
    11 import threading
    12 import queue
    1311import pprint
    1412import json
     
    3028import nanownlib.storage
    3129from nanownlib.stats import boxTest,multiBoxTest,subsample,bootstrap,bootstrap2,trimean,midhinge,midhingeTest,samples2Distributions,samples2MeanDiffs
     30from nanownlib.parallel import WorkerThreads
     31
    3232
    3333parser = argparse.ArgumentParser(
     
    3939options = parser.parse_args()
    4040
    41 
    42 
    43 class WorkerThreads(object):
    44     workq = None
    45     resultq = None
    46     target = None
    47    
    48     def __init__(self, num_workers, target):
    49         self.workq = queue.Queue()
    50         self.resultq = queue.Queue()
    51         self.target = target
    52        
    53         self.workers = []
    54         for i in range(num_workers):
    55             t = threading.Thread(target=self._worker)
    56             t.daemon = True
    57             t.start()
    58             self.workers.append(t)
    59 
    60     def _worker(self):
    61         while True:
    62             item = self.workq.get()
    63             if item == None:
    64                 self.workq.task_done()
    65                 break
    66 
    67             job_id,args = item
    68             self.resultq.put((job_id, self.target(*args)))
    69             self.workq.task_done()
    70 
    71     def addJob(self, job_id, args):
    72         self.workq.put((job_id, args))
    73            
    74     def wait(self):
    75         self.workq.join()
    76 
    77     def stop(self):
    78         for i in range(0,len(workers)):
    79             self.workq.put(None)
    80         for w in self.workers:
    81             w.join()
    82 
    8341           
    8442
     
    9755        return false_positives,false_negatives
    9856
    99     start = time.time()
     57    #start = time.time()
    10058    wt = WorkerThreads(2, trainAux)
    10159   
     
    11169        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    11270    performance.sort()
    113     pprint.pprint(performance)
    114     print(time.time()-start)
     71    #pprint.pprint(performance)
     72    #print(time.time()-start)
    11573   
    11674    num_trials = 200
     
    13795                            width, statistics.mean(false_negatives), statistics.mean(false_positives)))
    13896    performance.sort()
    139     pprint.pprint(performance)
     97    #pprint.pprint(performance)
    14098    good_width = performance[0][1]
    141     print("good_width:",good_width)
     99    #print("good_width:",good_width)
    142100
    143101
     
    152110        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    153111    performance.sort()
    154     pprint.pprint(performance)
     112    #pprint.pprint(performance)
    155113    best_low = performance[0][1]
    156     print("best_low:", best_low)
    157 
    158    
    159     num_trials = 500
    160     widths = [good_width+(x/10.0) for x in range(-6,7) if good_width+(x/10.0) > 0.0]
     114    #print("best_low:", best_low)
     115
     116   
     117    num_trials = 500
     118    widths = [good_width+(x/100.0) for x in range(-60,75,5) if good_width+(x/100.0) > 0.0]
    161119    performance = []
    162120    for width in widths:
     
    168126        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    169127    performance.sort()
    170     pprint.pprint(performance)
     128    #pprint.pprint(performance)
    171129    best_width=performance[0][1]
    172     print("best_width:",best_width)
    173     print("final_performance:", performance[0][0])
    174    
     130    #print("best_width:",best_width)
     131    #print("final_performance:", performance[0][0])
     132
     133    wt.stop()
    175134    params = json.dumps({"low":best_low,"high":best_low+best_width})
    176135    return {'algorithm':"boxtest",
     
    200159    mean_diffs = [s['unusual_case']-s['other_cases'] for s in db.subseries('train', unusual_case)]
    201160    threshold = trimean(mean_diffs)/2.0
    202     print("init_threshold:", threshold)
     161    #print("init_threshold:", threshold)
    203162   
    204163    wt = WorkerThreads(2, trainAux)
     
    217176    #pprint.pprint(performance)
    218177    good_distance = performance[0][1]
    219     print("good_distance:",good_distance)
     178    #print("good_distance:",good_distance)
    220179
    221180   
     
    232191    #pprint.pprint(performance)
    233192    good_threshold = performance[0][1]
    234     print("good_threshold:", good_threshold)
     193    #print("good_threshold:", good_threshold)
    235194
    236195   
     
    247206    #pprint.pprint(performance)
    248207    best_distance = performance[0][1]
    249     print("best_distance:",best_distance)
    250 
    251    
    252     num_trials = 500
    253     performance = []
    254     for t in range(95,106):
     208    #print("best_distance:",best_distance)
     209
     210   
     211    num_trials = 500
     212    performance = []
     213    for t in range(90,111):
    255214        wt.addJob(good_threshold*(t/100.0), (best_distance,good_threshold*(t/100.0),num_trials))
    256215    wt.wait()
     
    262221    #pprint.pprint(performance)
    263222    best_threshold = performance[0][1]
    264     print("best_threshold:", best_threshold)
    265 
     223    #print("best_threshold:", best_threshold)
     224
     225    wt.stop()
    266226    params = json.dumps({'distance':best_distance,'threshold':best_threshold})
    267227    return {'algorithm':"midhinge",
     
    274234
    275235
    276 #classifiers = {'boxtest':{'train':trainBoxTest2, 'test':multiBoxTest},
    277 #               'midhinge':{'train':trainMidhinge, 'test':midhinge}}
     236classifiers = {'boxtest':{'train':trainBoxTest, 'test':multiBoxTest},
     237               'midhinge':{'train':trainMidhinge, 'test':midhinge}}
    278238
    279239
    280240db = nanownlib.storage.db(options.session_data)
    281 #cursor = db.cursor()
    282 #cursor.execute("SELECT min(sample) min, max(sample) max FROM probes")
    283 #train_start,test_end = cursor.fetchone()
    284 #train_end = int(test_end-train_start)
    285 #test_start = train_end+1
    286 #subsample_size = min(10000,(train_end-train_start+1)/4)
     241
     242import cProfile
     243
     244def trainClassifier(db, unusual_case, greater, trainer):
     245    threshold = 5.0 # in percent
     246    size = 4000
     247    result = None
     248    while size < db.populationSize('train')/5:
     249        size = min(size*2, int(db.populationSize('train')/5))
     250        result = trainer(db,unusual_case,greater,size)
     251        error = statistics.mean([result['false_positives'],result['false_negatives']])
     252        print("subseries size: %d | error: %f | false_positives: %f | false_negatives: %f"
     253              % (size,error,result['false_positives'],result['false_negatives']))
     254        if error < threshold:
     255            break
     256    if result != None:
     257        db.addClassifierResults(result)
     258
     259    return result
     260
    287261
    288262start = time.time()
     
    294268print(":", end-start)
    295269
    296 import cProfile
    297 
    298 
    299 
    300 
    301 for size in (500,1000,2000,4000,5000,6000):
     270
     271for c,funcs in classifiers.items():
    302272    start = time.time()
    303     #cProfile.run('results = trainMidhinge(db, unusual_case, greater, 100)')
    304     results = trainMidhinge(db, unusual_case, greater, size)
    305     #db.addClassifierResults(results)
    306     print("midhinge result:")
    307     pprint.pprint(results)
    308     print(":", time.time()-start)
     273    print("Training %s..." % c)
     274    result = trainClassifier(db, unusual_case, greater, funcs['train'])
     275    print("%s result:" % c)
     276    pprint.pprint(result)
     277    print("completed in:", time.time()-start)
    309278
    310279sys.exit(0)
  • trunk/lib/nanownlib/storage.py

    r8 r9  
    130130            offset = numpy.random.random_integers(0,len(population)-1)
    131131
    132         ret_val = population[offset:offset+size]
     132        try:
     133            ret_val = population[offset:offset+size]
     134        except Exception as e:
     135            print(e, offset, size)
     136           
    133137        if len(ret_val) < size:
    134138            ret_val += population[0:size-len(ret_val)]
Note: See TracChangeset for help on using the changeset viewer.