Context Navigation

← Previous Change
Next Change →

Changeset 11 for trunk/lib

Timestamp:

07/16/15 20:40:01 (10 years ago)

Author:

tim

Message:

Location:

trunk/lib/nanownlib

Files:

: 1 added
: 4 edited

__init__.py (modified) (6 diffs)
parallel.py (modified) (2 diffs)
stats.py (modified) (11 diffs)
storage.py (modified) (2 diffs)
train.py (added)

Legend:

: Unmodified
: Added
: Removed

trunk/lib/nanownlib/init.py

-                      r10
+                      r11
         if p['sent']==1 and (seen[key]['observed'] > p['observed']): #earliest sent
             seen[key] = p
             suspect += 's'
+            suspect += 's' # duplicated sent packets
             continue
         if p['sent']==0 and (seen[key]['observed'] > p['observed']): #earliest rcvd
             seen[key] = p
             suspect += 'r'
+            suspect += 'r' # duplicated received packets
             continue
 …
     suspect,packets = removeDuplicatePackets(packets)
+    sort_key = lambda d: (d['tcpseq'],d['observed'])
+    sort_key = lambda d: (d['observed'],d['tcpseq'])
+    alt_key = lambda d: (d['tcpseq'],d['observed'])
     sent = sorted((p for p in packets if p['sent']==1 and p['payload_len']>0), key=sort_key)
     rcvd = sorted((p for p in packets if p['sent']==0 and p['payload_len']>0), key=sort_key)
-    alt_key = lambda d: (d['observed'],d['tcpseq'])
     rcvd_alt = sorted((p for p in packets if p['sent']==0 and p['payload_len']>0), key=alt_key)
     s_off = trim_sent
     if s_off >= len(sent):
+        suspect += 'd' # dropped packet?
         s_off = -1
     last_sent = sent[s_off]
     r_off = len(rcvd) - trim_rcvd - 1
+    if r_off <= 0:
+    if r_off < 0:
+        suspect += 'd' # dropped packet?
         r_off = 0
     last_rcvd = rcvd[r_off]
     if last_rcvd != rcvd_alt[r_off]:
         suspect += 'R'
+        suspect += 'R' # reordered received packets
     packet_rtt = last_rcvd['observed'] - last_sent['observed']
 …
         last_sent_ack = min(((p['observed'],p) for p in packets
                              if p['sent']==0 and p['payload_len']+last_sent['tcpseq']==p['tcpack']))[1]
     except Exception as e:
         sys.stderr.write("WARN: Could not find last_sent_ack.\n")
 …
 def analyzeProbes(db):
     db.conn.execute("CREATE INDEX IF NOT EXISTS packets_probe ON packets (probe_id)")
+    db.conn.commit()
     pcursor = db.conn.cursor()
-    db.conn.commit()
     pcursor.execute("SELECT tcpts_mean FROM meta")
     try:
 …
             db.addTrimAnalyses([analysis])
         except Exception as e:
             traceback.print_exc()
+            #traceback.print_exc()
             sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % probe_id)
 …
                     analysis['probe_id'] = probe_id
                 except Exception as e:
                     print(e)
                     sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % pid)
+                    #traceback.print_exc()
+                    sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % probe_id)
                 db.addTrimAnalyses([analysis])

trunk/lib/nanownlib/parallel.py

-                      r9
+                      r11
+#
+import sys
 import threading
 import queue
 …
                 self.resultq.put((job_id, self.target(*args)))
             except Exception as e:
                 sys.stderr.write("ERROR: Job '%s' failed with '%s'.  Dropping...\n",
+                sys.stderr.write("ERROR: Job '%s' failed with '%s'.  Dropping...\n" %
                                  (str(job_id),str(e)))
             self.workq.task_done()

trunk/lib/nanownlib/stats.py

-                      r10
+                      r11
 def ubersummary(values, distance=25):
     left2 = 50-distance
+    left3 = 50-(distance/2.0)
     left1 = left2/2.0
-    left3 = (left2+50)/2.0
     right2 = 50+distance
     right3 = (right2+50)/2.0
+    right3 = 50+(distance/2.0)
     right1 = (right2+100)/2.0
     l1,l2,l3,r3,r2,r1 = numpy.percentile(values, (left1,left2,left3,right3,right2,right1))
-    #print(left1,left2,left3,50,right3,right2,right1)
     #print(l1,l2,l3,m,r3,r2,r1)
     return (l1+l2*4+l3+r3+r2*4+r1)/12.0
     #return statistics.mean((l1,l2,l3,m,r3,r2,r1))
+def quadsummary(values, distance=25):
+    left2 = 50-distance
+    left1 = left2/2.0
+    right2 = 50+distance
+    right1 = (right2+100)/2.0
+    l1,l2,r2,r1 = numpy.percentile(values, (left1,left2,right2,right1))
+    #print(left1,left2,left3,50,right3,right2,right1)
+    #print(l1,l2,l3,m,r3,r2,r1)
+    return (l1+l2+r2+r1)/4.0
+    #return statistics.mean((l1,l2,l3,m,r3,r2,r1))
 def quadsummary(values, distance=25):
     left1 = 50-distance
 …
     #return statistics.mean((l1,l2,l3,m,r3,r2,r1))
+def tsvalwmean(subseries):
+    weights = [(s['unusual_packet']+s['other_packet'])**2 for s in subseries]
+    normalizer = sum(weights)/len(weights)
+    return numpy.mean([weights[i]*(subseries[i]['unusual_tsval']-subseries[i]['other_tsval'])/normalizer
+                       for i in range(len(weights))])
+#def tsvalwmean(subseries):
+#    return numpy.mean([(s['unusual_tsval']-s['other_tsval']) for s in subseries])
 def weightedMean(derived, weights):
     normalizer = sum(weights.values())/len(weights)
 …
     return statistics.mean([w*(derived[k]['long_tsval']-derived[k]['short_tsval'])/normalizer for k,w in weights.items()])
 def estimateMean(trustFunc, weightFunc, alpha, derived):
 …
-#def estimateMedian(trustFunc, weightFunc, alpha, derived):
-#    trust = trustValues(derived, trustFunc)
-#    weights = weightFunc(derived, trust, alpha)
-#    return statistics.median([(derived[k]['long']-derived[k]['short']) for k,w in weights.items() if w > 0.0])
-def estimateMedian(derived):
-    return statistics.median([(d['long']-d['short']) for d in derived.values()])
-def estimateMidsummary(derived):
-    return midsummary([(d['long']-d['short']) for d in derived.values()])
-def estimateTrimean(derived):
-    return trimean([(d['long']-d['short']) for d in derived.values()])
-def tTest(expected_mean, derived):
-    diffs = [(d['long']-d['short']) for d in derived.values()]
-    null_tval, null_pval = scipy.stats.ttest_1samp(diffs, 0.0)
-    tval, pval = scipy.stats.ttest_1samp(diffs, expected_mean)
-    if pval < null_pval:
-        return 1
-    else:
-        return 0
-def diffMedian(derived):
-    l = [tc['long']-tc['short'] for s,tc in derived.items()]
-    return statistics.median(l)
-def subsample_ids(db, probe_type, subsample_size=None):
-    cursor = db.conn.cursor()
-    cursor.execute("SELECT max(c) FROM (SELECT count(sample) c FROM probes WHERE type=? GROUP BY test_case)", (probe_type,))
-    population_size = cursor.fetchone()[0]
-    #print("population_size:", population_size)
-    if subsample_size == None or subsample_size > population_size:
-        subsample_size = population_size
-    start = numpy.random.random_integers(0,population_size-1)
-    cursor.execute("SELECT sample FROM probes WHERE type=? GROUP BY sample ORDER BY sample LIMIT ? OFFSET ?", (probe_type,subsample_size,start))
-    for row in cursor:
-        subsample_size -= 1
-        yield row['sample']
-    if subsample_size > 0:
-        cursor.execute("SELECT sample FROM probes WHERE type=? GROUP BY sample ORDER BY sample LIMIT ?", (probe_type,subsample_size))
-        for row in cursor:
-            yield row['sample']
-def subsample(db, probe_type, subsample_size=None):
-    cursor = db.conn.cursor()
-    cursor.execute("SELECT count(test_case) FROM (SELECT test_case FROM probes GROUP BY test_case)")
-    num_test_cases = cursor.fetchone()[0]
-    for sid in subsample_ids(db, probe_type, subsample_size):
-        cursor.execute("SELECT test_case,tc_order,time_of_day,reported,userspace_rtt,suspect,packet_rtt,tsval_rtt FROM probes p,analysis a WHERE p.sample=? and a.probe_id=p.id", (sid,))
-        probes = cursor.fetchall()
-        if len(probes) != num_test_cases:
-            sys.stderr.write("WARN: sample %d had %d probes, but %d expected!  Discarding...\n" % (sid, len(probes), num_test_cases))
-            continue
-        yield (sid,[dict(r) for r in probes])
-def subseries(db, probe_type, unusual_case, size=None, offset=None, field='packet_rtt'):
-    population_size = db.populationSize(probe_type)
-    if size == None or size > population_size:
-        size = population_size
-    if offset == None or offset >= population_size or offset < 0:
-        offset = numpy.random.random_integers(0,population_size-1)
-    query="""
-      SELECT %(field)s AS unusual_case,
-             (SELECT avg(%(field)s) FROM probes,analysis
-              WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_cases
-      FROM   (SELECT probes.sample,%(field)s FROM probes,analysis
-              WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
-      LIMIT :size OFFSET :offset
-    """ % {"field":field}
-    params = {"probe_type":probe_type, "unusual_case":unusual_case, "offset":offset, "size":size}
-    cursor = db.conn.cursor()
-    cursor.execute(query, params)
-    ret_val = [dict(row) for row in cursor.fetchall()]
-    #for row in cursor:
-    #    size -= 1
-    #    yield dict(row)
-    size -= len(ret_val)
-    if size > 0:
-        params['offset'] = 0
-        params['size'] = size
-        cursor.execute(query, params)
-        ret_val += [dict(row) for row in cursor.fetchall()]
-        #for row in cursor:
-        #    yield dict(row)
-    return ret_val
-# if test_cases=None, include all of them.  Otherwise, include only the specified test cases.
-def samples2Distributions(samples, field, test_cases=None):
-    ret_val = {}
-    for sid,probes in samples:
-        for p in probes:
-            if p['test_case'] in ret_val:
-                ret_val[p['test_case']].append(p[field])
-            elif test_cases == None or p['test_case'] in test_cases:
-                ret_val[p['test_case']] = [p[field]]
-    return ret_val
-def samples2MeanDiffs(samples, field, unusual_case):
-    ret_val = {}
-    for sid,probes in samples:
-        unusual_value = None
-        for p in probes:
-            if p['test_case'] == unusual_case:
-                unusual_value = p[field]
-                break
-        yield statistics.mean([unusual_value-p[field] for p in probes if p['test_case'] != unusual_case])
-def bootstrap(estimator, db, probe_type, test_cases, subsample_size, num_trials):
-    ret_val = []
-    for t in range(num_trials):
-        ret_val.append(estimator(test_cases, subsample(db, probe_type, subsample_size)))
-    return ret_val
-def bootstrap2(estimator, db, probe_type, subsample_size, num_trials):
-    ret_val = []
-    for t in range(num_trials):
-        ret_val.append(estimator(subsample(db, probe_type, subsample_size)))
-    return ret_val
 def bootstrap3(estimator, db, probe_type, unusual_case, subseries_size, num_trials):
     ret_val = []
 …
-# Returns the test case name that clearly has higher RTT; otherwise, returns None
-def boxTest(params, test_cases, samples):
-    if len(test_cases) != 2:
-        # XXX: somehow generalize the box test to handle more than 2 cases
-        raise Exception()
-    dists = samples2Distributions(samples,'packet_rtt', test_cases) #XXX: field from params
-    tmp1,tmp2 = dists.items()
-    test_case1,dist1 = tmp1
-    test_case2,dist2 = tmp2
-    d1_high = numpy.percentile(dist1, params['high'])
-    d2_low = numpy.percentile(dist2, params['low'])
-    if d1_high < d2_low:
-        return test_case2
-    d1_low = numpy.percentile(dist1, params['low'])
-    d2_high = numpy.percentile(dist2, params['high'])
-    if d2_high < d1_low:
-        return test_case1
-    return None
 # Returns 1 if unusual_case is unusual in the expected direction
 #         0 if it isn't unusual
 #        -1 if it is unusual in the wrong direction
 def multiBoxTest(params, greater, samples):
     uc = [s['unusual_case'] for s in samples]
     rest = [s['other_cases'] for s in samples]
+    uc = [s['unusual_packet'] for s in samples]
+    rest = [s['other_packet'] for s in samples]
     uc_high,uc_low = numpy.percentile(uc, (params['high'],params['low']))
 …
 #         0 otherwise
 def summaryTest(f, params, greater, samples):
     diffs = [s['unusual_case']-s['other_cases'] for s in samples]
+    diffs = [s['unusual_packet']-s['other_packet'] for s in samples]
     mh = f(diffs, params['distance'])
 …
         else:
             return 0
 midsummaryTest = functools.partial(summaryTest, midsummary)
 …
 def kfilter(params, observations):
     x = numpy.array(observations)
+    x = numpy.array(observations)
     movement = 0
     est = []
+    est = []
     var = []
     kf = KalmanFilter1D(x0 = quadsummary(x), # initial state
                         #P  = 10000,          # initial variance
                         P  = 10,          # initial variance
+                        #P  = 10000,         # initial variance
+                        P  = 10,            # initial variance
                         R  = numpy.std(x),   # msensor noise
                         Q  = 0)              # movement noise
 …
 def kalmanTest(params, greater, samples):
     diffs = [s['unusual_case']-s['other_cases'] for s in samples]
+    diffs = [s['unusual_packet']-s['other_packet'] for s in samples]
     m = kfilter(params, diffs)['est'][-1]
 …
+def kalmanTest2(params, greater, samples):
+    diffs = [s['unusual_case']-s['other_cases'] for s in samples]
+    estimates = []
+    size = 500
+    for i in range(100):
+        off = random.randrange(0,len(diffs))
+        sub = diffs[off:size]
+        if len(sub) < size:
+            sub += diffs[0:size-len(sub)]
+        estimates.append(kfilter(params, sub)['est'][-1])
+    m = quadsummary(estimates)
+def tsvalwmeanTest(params, greater, samples):
+    m = tsvalwmean(samples)
     if greater:
         if m > params['threshold']:
 …
         else:
             return 0

trunk/lib/nanownlib/storage.py

-                      r10
+                      r11
+    def subseries(self, probe_type, unusual_case, size=None, offset=None, field='packet_rtt'):
+        cache_key = (probe_type,unusual_case,field)
+    def subseries(self, probe_type, unusual_case, size=None, offset=None):
+        cache_key = (probe_type,unusual_case)
         if cache_key not in self._population_cache:
             query="""
+            SELECT %(field)s AS unusual_case,
+                   (SELECT avg(%(field)s) FROM probes,analysis
+                    WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_cases
+            FROM   (SELECT probes.sample,%(field)s FROM probes,analysis
+            SELECT packet_rtt AS unusual_packet,
+                   (SELECT avg(packet_rtt) FROM probes,analysis
+                    WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_packet,
+                   tsval_rtt AS unusual_tsval,
+                   (SELECT avg(tsval_rtt) FROM probes,analysis
+                    WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_tsval,
+                   reported AS unusual_reported,
+                   (SELECT avg(reported) FROM probes,analysis
+                    WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_reported
+            FROM   (SELECT probes.sample,packet_rtt,tsval_rtt,reported FROM probes,analysis
                     WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
             """ % {"field":field}
+            """
             params = {"probe_type":probe_type, "unusual_case":unusual_case}
 …
         return [self._insert('trim_analysis', row) for row in analyses]
     def addClassifierResults(self, results):
+    def addClassifierResult(self, results):
         ret_val = self._insert('classifier_results', results)
         self.conn.commit()
         return ret_val
     def fetchClassifierResult(self, classifier, trial_type, num_observations):
+    def fetchClassifierResult(self, classifier, trial_type, num_observations, params=None):
         query = """
           SELECT * FROM classifier_results
+          WHERE classifier=? AND trial_type=? AND num_observations=?
+          ORDER BY false_positives+false_negatives
+          LIMIT 1;
+            WHERE classifier=:classifier
+                  AND trial_type=:trial_type
+                  AND num_observations=:num_observations"""
+        if params != None:
+            query += """
+                  AND params=:params"""
+        query += """
+            ORDER BY false_positives+false_negatives
+            LIMIT 1
         """
+        qparams = {'classifier':classifier, 'trial_type':trial_type,
+                   'num_observations':num_observations,'params':params}
         cursor = self.conn.cursor()
         cursor.execute(query, (classifier, trial_type, num_observations))
+        cursor.execute(query, qparams)
         ret_val = cursor.fetchone()
         if ret_val != None:
             ret_val = dict(ret_val)
         return ret_val
     def deleteClassifierResults(self, classifier, trial_type, num_observations=None):
         params = {"classifier":classifier,"trial_type":trial_type,"num_observations":num_observations}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11 for trunk/lib

Legend:

trunk/lib/nanownlib/__init__.py

trunk/lib/nanownlib/parallel.py

trunk/lib/nanownlib/stats.py

trunk/lib/nanownlib/storage.py

Download in other formats:

trunk/lib/nanownlib/init.py