Context Navigation

← Previous Changeset
Next Changeset →

Changeset 6

Timestamp:

07/09/15 12:27:04 (10 years ago)

Author:

tim

Message:

Location:

trunk

Files:

: 1 added
: 3 edited

bin/graph (added)
bin/train (modified) (10 diffs)
lib/nanownlib/__init__.py (modified) (3 diffs)
lib/nanownlib/stats.py (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/bin/train

-                      r4
+                      r6
 def trainMidhinge(db, unusual_case, greater, subsample_size):
+def trainMidhinge(db, unusual_case, greater, subseries_size):
     def trainAux(distance, threshold, num_trials):
         estimator = functools.partial(midhingeTest, {'distance':distance,'threshold':threshold}, unusual_case, greater)
         estimates = bootstrap2(estimator, db, 'train', subsample_size, num_trials)
         null_estimates = bootstrap2(estimator, db, 'train_null', subsample_size, num_trials)
+        estimator = functools.partial(midhingeTest, {'distance':distance,'threshold':threshold}, greater)
+        estimates = bootstrap3(estimator, db, 'train', unusual_case, subseries_size, num_trials)
+        null_estimates = bootstrap3(estimator, db, 'train_null', unusual_case, subseries_size, num_trials)
         bad_estimates = len([e for e in estimates if e != 1])
 …
     #determine expected delta based on differences
     start = time.time()
     mean_diffs = list(samples2MeanDiffs(subsample(db, 'train'), 'packet_rtt', unusual_case))
+    mean_diffs = [s['unusual_case']-s['other_cases'] for s in subseries(db, 'train', unusual_case)]
     threshold = trimean(mean_diffs)/2.0
     print("initial threshold:", threshold)
 …
     print("trimean threshold:", trimean(mean_diffs)/2.0)
     mean_diffs = list(samples2MeanDiffs(subsample(db, 'train_null'), 'packet_rtt', unusual_case))
+    mean_diffs = [s['unusual_case']-s['other_cases'] for s in subseries(db, 'train_null', unusual_case)]
     print(len(mean_diffs))
     print("null mean:", statistics.mean(mean_diffs))
 …
     start = time.time()
     wt = WorkerThreads(1, trainAux)
     num_trials = 200
+    wt = WorkerThreads(2, trainAux)
+    num_trials = 20
     performance = []
     #for distance in range(1,46,4):
 …
         fp,fn = errors
         performance.append(((fp+fn)/2.0, job_id, fn, fp))
+    #for distance in range(25,46,4):
+    #    job_id = distance
+    #    fp,fn = trainAux(distance, threshold, num_trials)
+    #    performance.append(((fp+fn)/2.0, job_id, fn, fp))
     performance.sort()
     pprint.pprint(performance)
 …
     num_trials = 200
+    num_trials = 20
     start = time.time()
     performance = []
 …
     num_trials = 200
+    num_trials = 20
     start = time.time()
     performance = []
 …
     best_distance = performance[0][1]
     print("best_distance:",best_distance)
     num_trials = 200
+    num_trials = 20
     start = time.time()
     performance = []
 …
     return {'algorithm':"midhinge",
             'params':params,
             'sample_size':subsample_size,
+            'sample_size':subseries_size,
             'num_trials':num_trials,
             'trial_type':"train",
 …
 print(":", end-start)
+import cProfile
 start = time.time()
+results = trainMidhinge(db, unusual_case, greater, 6000)
+db.addClassifierResults(results)
+#cProfile.run('results = trainMidhinge(db, unusual_case, greater, 100)')
+results = trainMidhinge(db, unusual_case, greater, 100)
+#db.addClassifierResults(results)
 print("midhinge result:", results)
 end = time.time()
 print(":", end-start)
+sys.exit(0)
 start = time.time()

trunk/lib/nanownlib/init.py

-                      r5
+                      r6
     for p in packets:
         key = (p['sent'],p['tcpseq'],p['tcpack'],p['payload_len'])
         #if (key not in seen)\
         #   or p['sent']==1 and (seen[key]['observed'] < p['observed'])\
         #   or p['sent']==0 and (seen[key]['observed'] > p['observed']):
         if (key not in seen) or (seen[key]['observed'] > p['observed']):
+        if (key not in seen)\
+           or p['sent']==1 and (seen[key]['observed'] < p['observed'])\
+           or p['sent']==0 and (seen[key]['observed'] > p['observed']):
+            #if (key not in seen) or (seen[key]['observed'] > p['observed']):
             seen[key] = p
 …
     pprint.pprint(evaluations)
     delta_margin = 0.1
+    delta_margin = 0.15
     best_strim = 0
     best_rtrim = 0
 …
     remaining_tm = trimean([row['packet_rtt'] for row in cursor])
+    return (tc, tm_map[tc]-remaining_tm)
+    ret_val = (tc, tm_map[tc]-remaining_tm)
+    print("unusual_case: %s, delta: %f" % ret_val)
+    return ret_val

trunk/lib/nanownlib/stats.py

-                      r4
+                      r6
         yield (sid,[dict(r) for r in probes])
+def subseries(db, probe_type, unusual_case, size=None, offset=None, field='packet_rtt'):
+    cursor = db.conn.cursor()
+    cursor.execute("SELECT max(c) FROM (SELECT count(sample) c FROM probes WHERE type=? GROUP BY test_case)", (probe_type,))
+    population_size = cursor.fetchone()[0]
+    if size == None or size > population_size:
+        size = population_size
+    if offset == None or offset >= population_size or offset < 0:
+        offset = numpy.random.random_integers(0,population_size-1)
+    query="""
+      SELECT %(field)s AS unusual_case,
+             (SELECT avg(%(field)s) FROM probes,analysis
+              WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_cases
+      FROM   (SELECT probes.sample,%(field)s FROM probes,analysis
+              WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
+      LIMIT :size OFFSET :offset
+    """ % {"field":field}
+    params = {"probe_type":probe_type, "unusual_case":unusual_case, "offset":offset, "size":size}
+    cursor.execute(query, params)
+    for row in cursor:
+        size -= 1
+        yield dict(row)
+    if size > 0:
+        params['offset'] = 0
+        params['size'] = size
+        cursor.execute(query, params)
+        for row in cursor:
+            yield dict(row)
 # if test_cases=None, include all of them.  Otherwise, include only the specified test cases.
 def samples2Distributions(samples, field, test_cases=None):
 …
+def bootstrap3(estimator, db, probe_type, unusual_case, subseries_size, num_trials):
+    ret_val = []
+    for t in range(num_trials):
+        ret_val.append(estimator(subseries(db, probe_type, unusual_case, subseries_size)))
+    return ret_val
 # Returns the test case name that clearly has higher RTT; otherwise, returns None
 def boxTest(params, test_cases, samples):
 …
 # Returns 1 if unusual_case is unusual in the expected direction
 #         0 otherwise
 def midhingeTest(params, unusual_case, greater, samples):
     diffs = list(samples2MeanDiffs(samples, 'packet_rtt', unusual_case))
+def midhingeTest(params, greater, samples):
+    diffs = [s['unusual_case']-s['other_cases'] for s in samples]
     mh = midhinge(diffs, params['distance'])

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 6

Legend:

trunk/bin/train

trunk/lib/nanownlib/__init__.py

trunk/lib/nanownlib/stats.py

Download in other formats:

trunk/lib/nanownlib/init.py