Context Navigation

← Previous Changeset
Next Changeset →

Changeset 9

Timestamp:

07/10/15 14:03:04 (10 years ago)

Author:

tim

Message:

.

Location:

Files:

: 1 added
: 3 edited

bin/sampler (modified) (3 diffs)
bin/train (modified) (15 diffs)
lib/nanownlib/parallel.py (added)
lib/nanownlib/storage.py (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

trunk/bin/sampler

-                      r5
+                      r9
                     help='JSON representation of echo timing cases.')
 parser.add_argument('--no-tcpts', action='store_true', help='Disbale TCP timestamp profiling')
+parser.add_argument('--no-control', action='store_true', help='Do not collect separate control data.  Instead, synthesize it from test and train data.')
 parser.add_argument('session_name', default=None,
                     help='Name for the sampler session (used in output filenames)')
 …
 time.sleep(0.5) # ensure sniffer is fully ready and our process is migrated
+if options.no_control:
+    num_control = 0
+else:
+    num_control = int(num_samples*2/5)
+num_train = int((num_samples-num_control)/3)
+num_test = num_samples-num_train-num_control
+sample_types = [('train',num_train),
+                ('train_null',num_control),
+                ('test',num_test)]
 sid = 0
-sample_types = [('train',int(num_samples*1/5)),
-                ('train_null',int(num_samples*2/5)),
-                ('test',int(num_samples*2/5)),
-                ('test_null',0)]
 report_interval = 20
 start = time.time()
 …
 print("associate time:", end-start)
+if options.no_control:
+    print("TODO: implement control synthesizing!")
 start = time.time()
 num_probes = analyzeProbes(db)

trunk/bin/train

-                      r8
+                      r9
 import functools
 import argparse
-import threading
-import queue
 import pprint
 import json
 …
 import nanownlib.storage
 from nanownlib.stats import boxTest,multiBoxTest,subsample,bootstrap,bootstrap2,trimean,midhinge,midhingeTest,samples2Distributions,samples2MeanDiffs
+from nanownlib.parallel import WorkerThreads
 parser = argparse.ArgumentParser(
 …
 options = parser.parse_args()
-class WorkerThreads(object):
-    workq = None
-    resultq = None
-    target = None
-    def __init__(self, num_workers, target):
-        self.workq = queue.Queue()
-        self.resultq = queue.Queue()
-        self.target = target
-        self.workers = []
-        for i in range(num_workers):
-            t = threading.Thread(target=self._worker)
-            t.daemon = True
-            t.start()
-            self.workers.append(t)
-    def _worker(self):
-        while True:
-            item = self.workq.get()
-            if item == None:
-                self.workq.task_done()
-                break
-            job_id,args = item
-            self.resultq.put((job_id, self.target(*args)))
-            self.workq.task_done()
-    def addJob(self, job_id, args):
-        self.workq.put((job_id, args))
-    def wait(self):
-        self.workq.join()
-    def stop(self):
-        for i in range(0,len(workers)):
-            self.workq.put(None)
-        for w in self.workers:
-            w.join()
 …
         return false_positives,false_negatives
     start = time.time()
+    #start = time.time()
     wt = WorkerThreads(2, trainAux)
 …
         performance.append(((fp+fn)/2.0, job_id, fn, fp))
     performance.sort()
     pprint.pprint(performance)
     print(time.time()-start)
+    #pprint.pprint(performance)
+    #print(time.time()-start)
     num_trials = 200
 …
                             width, statistics.mean(false_negatives), statistics.mean(false_positives)))
     performance.sort()
     pprint.pprint(performance)
+    #pprint.pprint(performance)
     good_width = performance[0][1]
     print("good_width:",good_width)
+    #print("good_width:",good_width)
 …
         performance.append(((fp+fn)/2.0, job_id, fn, fp))
     performance.sort()
     pprint.pprint(performance)
+    #pprint.pprint(performance)
     best_low = performance[0][1]
     print("best_low:", best_low)
     num_trials = 500
     widths = [good_width+(x/10.0) for x in range(-6,7) if good_width+(x/10.0) > 0.0]
+    #print("best_low:", best_low)
+    num_trials = 500
+    widths = [good_width+(x/100.0) for x in range(-60,75,5) if good_width+(x/100.0) > 0.0]
     performance = []
     for width in widths:
 …
         performance.append(((fp+fn)/2.0, job_id, fn, fp))
     performance.sort()
     pprint.pprint(performance)
+    #pprint.pprint(performance)
     best_width=performance[0][1]
+    print("best_width:",best_width)
+    print("final_performance:", performance[0][0])
+    #print("best_width:",best_width)
+    #print("final_performance:", performance[0][0])
+    wt.stop()
     params = json.dumps({"low":best_low,"high":best_low+best_width})
     return {'algorithm':"boxtest",
 …
     mean_diffs = [s['unusual_case']-s['other_cases'] for s in db.subseries('train', unusual_case)]
     threshold = trimean(mean_diffs)/2.0
     print("init_threshold:", threshold)
+    #print("init_threshold:", threshold)
     wt = WorkerThreads(2, trainAux)
 …
     #pprint.pprint(performance)
     good_distance = performance[0][1]
     print("good_distance:",good_distance)
+    #print("good_distance:",good_distance)
 …
     #pprint.pprint(performance)
     good_threshold = performance[0][1]
     print("good_threshold:", good_threshold)
+    #print("good_threshold:", good_threshold)
 …
     #pprint.pprint(performance)
     best_distance = performance[0][1]
     print("best_distance:",best_distance)
     num_trials = 500
     performance = []
     for t in range(95,106):
+    #print("best_distance:",best_distance)
+    num_trials = 500
+    performance = []
+    for t in range(90,111):
         wt.addJob(good_threshold*(t/100.0), (best_distance,good_threshold*(t/100.0),num_trials))
     wt.wait()
 …
     #pprint.pprint(performance)
     best_threshold = performance[0][1]
+    print("best_threshold:", best_threshold)
+    #print("best_threshold:", best_threshold)
+    wt.stop()
     params = json.dumps({'distance':best_distance,'threshold':best_threshold})
     return {'algorithm':"midhinge",
 …
 #classifiers = {'boxtest':{'train':trainBoxTest2, 'test':multiBoxTest},
 #               'midhinge':{'train':trainMidhinge, 'test':midhinge}}
+classifiers = {'boxtest':{'train':trainBoxTest, 'test':multiBoxTest},
+               'midhinge':{'train':trainMidhinge, 'test':midhinge}}
 db = nanownlib.storage.db(options.session_data)
+#cursor = db.cursor()
+#cursor.execute("SELECT min(sample) min, max(sample) max FROM probes")
+#train_start,test_end = cursor.fetchone()
+#train_end = int(test_end-train_start)
+#test_start = train_end+1
+#subsample_size = min(10000,(train_end-train_start+1)/4)
+import cProfile
+def trainClassifier(db, unusual_case, greater, trainer):
+    threshold = 5.0 # in percent
+    size = 4000
+    result = None
+    while size < db.populationSize('train')/5:
+        size = min(size*2, int(db.populationSize('train')/5))
+        result = trainer(db,unusual_case,greater,size)
+        error = statistics.mean([result['false_positives'],result['false_negatives']])
+        print("subseries size: %d | error: %f | false_positives: %f | false_negatives: %f"
+              % (size,error,result['false_positives'],result['false_negatives']))
+        if error < threshold:
+            break
+    if result != None:
+        db.addClassifierResults(result)
+    return result
 start = time.time()
 …
 print(":", end-start)
+import cProfile
+for size in (500,1000,2000,4000,5000,6000):
+for c,funcs in classifiers.items():
     start = time.time()
+    #cProfile.run('results = trainMidhinge(db, unusual_case, greater, 100)')
+    results = trainMidhinge(db, unusual_case, greater, size)
+    #db.addClassifierResults(results)
+    print("midhinge result:")
+    pprint.pprint(results)
+    print(":", time.time()-start)
+    print("Training %s..." % c)
+    result = trainClassifier(db, unusual_case, greater, funcs['train'])
+    print("%s result:" % c)
+    pprint.pprint(result)
+    print("completed in:", time.time()-start)
 sys.exit(0)

trunk/lib/nanownlib/storage.py

-                      r8
+                      r9
             offset = numpy.random.random_integers(0,len(population)-1)
+        ret_val = population[offset:offset+size]
+        try:
+            ret_val = population[offset:offset+size]
+        except Exception as e:
+            print(e, offset, size)
         if len(ret_val) < size:
             ret_val += population[0:size-len(ret_val)]

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: