Changeset 11 for trunk/lib/nanownlib


Ignore:
Timestamp:
07/16/15 20:40:01 (9 years ago)
Author:
tim
Message:

.

Location:
trunk/lib/nanownlib
Files:
1 added
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/nanownlib/__init__.py

    r10 r11  
    220220        if p['sent']==1 and (seen[key]['observed'] > p['observed']): #earliest sent
    221221            seen[key] = p
    222             suspect += 's'
     222            suspect += 's' # duplicated sent packets
    223223            continue
    224224        if p['sent']==0 and (seen[key]['observed'] > p['observed']): #earliest rcvd
    225225            seen[key] = p
    226             suspect += 'r'
     226            suspect += 'r' # duplicated received packets
    227227            continue
    228228   
     
    236236    suspect,packets = removeDuplicatePackets(packets)
    237237
    238     sort_key = lambda d: (d['tcpseq'],d['observed'])
     238    sort_key = lambda d: (d['observed'],d['tcpseq'])
     239    alt_key = lambda d: (d['tcpseq'],d['observed'])
    239240    sent = sorted((p for p in packets if p['sent']==1 and p['payload_len']>0), key=sort_key)
    240241    rcvd = sorted((p for p in packets if p['sent']==0 and p['payload_len']>0), key=sort_key)
    241 
    242     alt_key = lambda d: (d['observed'],d['tcpseq'])
    243242    rcvd_alt = sorted((p for p in packets if p['sent']==0 and p['payload_len']>0), key=alt_key)
    244243
    245244    s_off = trim_sent
    246245    if s_off >= len(sent):
     246        suspect += 'd' # dropped packet?
    247247        s_off = -1
    248248    last_sent = sent[s_off]
    249249
    250250    r_off = len(rcvd) - trim_rcvd - 1
    251     if r_off <= 0:
     251    if r_off < 0:
     252        suspect += 'd' # dropped packet?
    252253        r_off = 0
    253254    last_rcvd = rcvd[r_off]
    254255    if last_rcvd != rcvd_alt[r_off]:
    255         suspect += 'R'
     256        suspect += 'R' # reordered received packets
    256257   
    257258    packet_rtt = last_rcvd['observed'] - last_sent['observed']
     
    263264        last_sent_ack = min(((p['observed'],p) for p in packets
    264265                             if p['sent']==0 and p['payload_len']+last_sent['tcpseq']==p['tcpack']))[1]
     266       
    265267    except Exception as e:
    266268        sys.stderr.write("WARN: Could not find last_sent_ack.\n")
     
    301303def analyzeProbes(db):
    302304    db.conn.execute("CREATE INDEX IF NOT EXISTS packets_probe ON packets (probe_id)")
     305    db.conn.commit()
     306
    303307    pcursor = db.conn.cursor()
    304     db.conn.commit()
    305 
    306308    pcursor.execute("SELECT tcpts_mean FROM meta")
    307309    try:
     
    346348            db.addTrimAnalyses([analysis])
    347349        except Exception as e:
    348             traceback.print_exc()
     350            #traceback.print_exc()
    349351            sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % probe_id)
    350352       
     
    367369                    analysis['probe_id'] = probe_id
    368370                except Exception as e:
    369                     print(e)
    370                     sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % pid)
     371                    #traceback.print_exc()
     372                    sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % probe_id)
    371373                   
    372374                db.addTrimAnalyses([analysis])
  • trunk/lib/nanownlib/parallel.py

    r9 r11  
    11#
    22
     3import sys
    34import threading
    45import queue
     
    3334                self.resultq.put((job_id, self.target(*args)))
    3435            except Exception as e:
    35                 sys.stderr.write("ERROR: Job '%s' failed with '%s'.  Dropping...\n",
     36                sys.stderr.write("ERROR: Job '%s' failed with '%s'.  Dropping...\n" %
    3637                                 (str(job_id),str(e)))
    3738            self.workq.task_done()
  • trunk/lib/nanownlib/stats.py

    r10 r11  
    144144def ubersummary(values, distance=25):
    145145    left2 = 50-distance
     146    left3 = 50-(distance/2.0)
    146147    left1 = left2/2.0
    147     left3 = (left2+50)/2.0
    148148    right2 = 50+distance
    149     right3 = (right2+50)/2.0
     149    right3 = 50+(distance/2.0)
    150150    right1 = (right2+100)/2.0
    151151    l1,l2,l3,r3,r2,r1 = numpy.percentile(values, (left1,left2,left3,right3,right2,right1))
    152     #print(left1,left2,left3,50,right3,right2,right1)
    153152    #print(l1,l2,l3,m,r3,r2,r1)
    154153    return (l1+l2*4+l3+r3+r2*4+r1)/12.0
    155154    #return statistics.mean((l1,l2,l3,m,r3,r2,r1))
    156155
    157 def quadsummary(values, distance=25):
    158     left2 = 50-distance
    159     left1 = left2/2.0
    160     right2 = 50+distance
    161     right1 = (right2+100)/2.0
    162     l1,l2,r2,r1 = numpy.percentile(values, (left1,left2,right2,right1))
    163     #print(left1,left2,left3,50,right3,right2,right1)
    164     #print(l1,l2,l3,m,r3,r2,r1)
    165     return (l1+l2+r2+r1)/4.0
    166     #return statistics.mean((l1,l2,l3,m,r3,r2,r1))
    167 
     156   
    168157def quadsummary(values, distance=25):
    169158    left1 = 50-distance
     
    177166    #return statistics.mean((l1,l2,l3,m,r3,r2,r1))
    178167
    179    
     168
     169def tsvalwmean(subseries):
     170    weights = [(s['unusual_packet']+s['other_packet'])**2 for s in subseries]
     171    normalizer = sum(weights)/len(weights)
     172    return numpy.mean([weights[i]*(subseries[i]['unusual_tsval']-subseries[i]['other_tsval'])/normalizer
     173                       for i in range(len(weights))])
     174
     175#def tsvalwmean(subseries):
     176#    return numpy.mean([(s['unusual_tsval']-s['other_tsval']) for s in subseries])
     177
     178
    180179def weightedMean(derived, weights):
    181180    normalizer = sum(weights.values())/len(weights)
     
    186185    return statistics.mean([w*(derived[k]['long_tsval']-derived[k]['short_tsval'])/normalizer for k,w in weights.items()])
    187186
     187
     188   
    188189
    189190def estimateMean(trustFunc, weightFunc, alpha, derived):
     
    199200
    200201
    201 #def estimateMedian(trustFunc, weightFunc, alpha, derived):
    202 #    trust = trustValues(derived, trustFunc)
    203 #    weights = weightFunc(derived, trust, alpha)
    204 
    205 #    return statistics.median([(derived[k]['long']-derived[k]['short']) for k,w in weights.items() if w > 0.0])
    206 
    207 def estimateMedian(derived):
    208     return statistics.median([(d['long']-d['short']) for d in derived.values()])
    209 
    210 
    211 def estimateMidsummary(derived):
    212     return midsummary([(d['long']-d['short']) for d in derived.values()])
    213 
    214 
    215 def estimateTrimean(derived):
    216     return trimean([(d['long']-d['short']) for d in derived.values()])
    217 
    218 
    219 def tTest(expected_mean, derived):
    220     diffs = [(d['long']-d['short']) for d in derived.values()]
    221     null_tval, null_pval = scipy.stats.ttest_1samp(diffs, 0.0)
    222     tval, pval = scipy.stats.ttest_1samp(diffs, expected_mean)
    223 
    224     if pval < null_pval:
    225         return 1
    226     else:
    227         return 0
    228 
    229    
    230 def diffMedian(derived):
    231     l = [tc['long']-tc['short'] for s,tc in derived.items()]
    232     return statistics.median(l)
    233 
    234 
    235 def subsample_ids(db, probe_type, subsample_size=None):
    236     cursor = db.conn.cursor()
    237     cursor.execute("SELECT max(c) FROM (SELECT count(sample) c FROM probes WHERE type=? GROUP BY test_case)", (probe_type,))
    238     population_size = cursor.fetchone()[0]
    239     #print("population_size:", population_size)
    240     if subsample_size == None or subsample_size > population_size:
    241         subsample_size = population_size
    242    
    243     start = numpy.random.random_integers(0,population_size-1)
    244     cursor.execute("SELECT sample FROM probes WHERE type=? GROUP BY sample ORDER BY sample LIMIT ? OFFSET ?", (probe_type,subsample_size,start))
    245     for row in cursor:
    246         subsample_size -= 1
    247         yield row['sample']
    248 
    249     if subsample_size > 0:
    250         cursor.execute("SELECT sample FROM probes WHERE type=? GROUP BY sample ORDER BY sample LIMIT ?", (probe_type,subsample_size))
    251         for row in cursor:
    252             yield row['sample']
    253    
    254 
    255 def subsample(db, probe_type, subsample_size=None):
    256     cursor = db.conn.cursor()
    257     cursor.execute("SELECT count(test_case) FROM (SELECT test_case FROM probes GROUP BY test_case)")
    258     num_test_cases = cursor.fetchone()[0]
    259    
    260     for sid in subsample_ids(db, probe_type, subsample_size):
    261         cursor.execute("SELECT test_case,tc_order,time_of_day,reported,userspace_rtt,suspect,packet_rtt,tsval_rtt FROM probes p,analysis a WHERE p.sample=? and a.probe_id=p.id", (sid,))
    262         probes = cursor.fetchall()
    263         if len(probes) != num_test_cases:
    264             sys.stderr.write("WARN: sample %d had %d probes, but %d expected!  Discarding...\n" % (sid, len(probes), num_test_cases))
    265             continue
    266         yield (sid,[dict(r) for r in probes])
    267 
    268 
    269 def subseries(db, probe_type, unusual_case, size=None, offset=None, field='packet_rtt'):
    270     population_size = db.populationSize(probe_type)
    271 
    272     if size == None or size > population_size:
    273         size = population_size
    274     if offset == None or offset >= population_size or offset < 0:
    275         offset = numpy.random.random_integers(0,population_size-1)
    276 
    277     query="""
    278       SELECT %(field)s AS unusual_case,
    279              (SELECT avg(%(field)s) FROM probes,analysis
    280               WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_cases
    281       FROM   (SELECT probes.sample,%(field)s FROM probes,analysis
    282               WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
    283       LIMIT :size OFFSET :offset
    284     """ % {"field":field}
    285    
    286     params = {"probe_type":probe_type, "unusual_case":unusual_case, "offset":offset, "size":size}
    287     cursor = db.conn.cursor()
    288     cursor.execute(query, params)
    289     ret_val = [dict(row) for row in cursor.fetchall()]
    290     #for row in cursor:
    291     #    size -= 1
    292     #    yield dict(row)
    293 
    294     size -= len(ret_val)
    295     if size > 0:
    296         params['offset'] = 0
    297         params['size'] = size
    298         cursor.execute(query, params)
    299         ret_val += [dict(row) for row in cursor.fetchall()]
    300         #for row in cursor:
    301         #    yield dict(row)
    302    
    303     return ret_val
    304 
    305 
    306 # if test_cases=None, include all of them.  Otherwise, include only the specified test cases.
    307 def samples2Distributions(samples, field, test_cases=None):
    308     ret_val = {}
    309    
    310     for sid,probes in samples:
    311         for p in probes:
    312             if p['test_case'] in ret_val:
    313                 ret_val[p['test_case']].append(p[field])
    314             elif test_cases == None or p['test_case'] in test_cases:
    315                 ret_val[p['test_case']] = [p[field]]
    316                
    317                
    318     return ret_val
    319 
    320 
    321 def samples2MeanDiffs(samples, field, unusual_case):
    322     ret_val = {}
    323    
    324     for sid,probes in samples:
    325         unusual_value = None
    326         for p in probes:
    327             if p['test_case'] == unusual_case:
    328                 unusual_value = p[field]
    329                 break
    330         yield statistics.mean([unusual_value-p[field] for p in probes if p['test_case'] != unusual_case])
    331 
    332 
    333 def bootstrap(estimator, db, probe_type, test_cases, subsample_size, num_trials):
    334     ret_val = []
    335     for t in range(num_trials):
    336         ret_val.append(estimator(test_cases, subsample(db, probe_type, subsample_size)))
    337 
    338     return ret_val
    339 
    340 
    341 def bootstrap2(estimator, db, probe_type, subsample_size, num_trials):
    342     ret_val = []
    343     for t in range(num_trials):
    344         ret_val.append(estimator(subsample(db, probe_type, subsample_size)))
    345 
    346     return ret_val
    347 
    348 
    349202def bootstrap3(estimator, db, probe_type, unusual_case, subseries_size, num_trials):
    350203    ret_val = []
     
    355208
    356209
    357 # Returns the test case name that clearly has higher RTT; otherwise, returns None
    358 def boxTest(params, test_cases, samples):
    359     if len(test_cases) != 2:
    360         # XXX: somehow generalize the box test to handle more than 2 cases
    361         raise Exception()
    362     dists = samples2Distributions(samples,'packet_rtt', test_cases) #XXX: field from params
    363 
    364     tmp1,tmp2 = dists.items()
    365     test_case1,dist1 = tmp1
    366     test_case2,dist2 = tmp2
    367    
    368     d1_high = numpy.percentile(dist1, params['high'])
    369     d2_low = numpy.percentile(dist2, params['low'])
    370     if d1_high < d2_low:
    371         return test_case2
    372 
    373     d1_low = numpy.percentile(dist1, params['low'])
    374     d2_high = numpy.percentile(dist2, params['high'])
    375 
    376     if d2_high < d1_low:
    377         return test_case1
    378    
    379     return None
    380 
    381 
    382210# Returns 1 if unusual_case is unusual in the expected direction
    383211#         0 if it isn't unusual
    384212#        -1 if it is unusual in the wrong direction
    385213def multiBoxTest(params, greater, samples):
    386     uc = [s['unusual_case'] for s in samples]
    387     rest = [s['other_cases'] for s in samples]
     214    uc = [s['unusual_packet'] for s in samples]
     215    rest = [s['other_packet'] for s in samples]
    388216   
    389217    uc_high,uc_low = numpy.percentile(uc, (params['high'],params['low']))
     
    407235#         0 otherwise
    408236def summaryTest(f, params, greater, samples):
    409     diffs = [s['unusual_case']-s['other_cases'] for s in samples]
     237    diffs = [s['unusual_packet']-s['other_packet'] for s in samples]
    410238
    411239    mh = f(diffs, params['distance'])
     
    420248        else:
    421249            return 0
     250
    422251
    423252midsummaryTest = functools.partial(summaryTest, midsummary)
     
    451280
    452281def kfilter(params, observations):
    453     x = numpy.array(observations)
     282    x = numpy.array(observations)   
    454283    movement = 0
    455     est = []   
     284    est = []
    456285    var = []
    457286    kf = KalmanFilter1D(x0 = quadsummary(x), # initial state
    458                         #P  = 10000,          # initial variance
    459                         P  = 10,          # initial variance
     287                        #P  = 10000,         # initial variance
     288                        P  = 10,            # initial variance
    460289                        R  = numpy.std(x),   # msensor noise
    461290                        Q  = 0)              # movement noise
     
    471300
    472301def kalmanTest(params, greater, samples):
    473     diffs = [s['unusual_case']-s['other_cases'] for s in samples]
     302    diffs = [s['unusual_packet']-s['other_packet'] for s in samples]
    474303
    475304    m = kfilter(params, diffs)['est'][-1]
     
    486315
    487316
    488 def kalmanTest2(params, greater, samples):
    489     diffs = [s['unusual_case']-s['other_cases'] for s in samples]
    490 
    491     estimates = []
    492     size = 500
    493     for i in range(100):
    494         off = random.randrange(0,len(diffs))
    495         sub = diffs[off:size]
    496         if len(sub) < size:
    497             sub += diffs[0:size-len(sub)]
    498         estimates.append(kfilter(params, sub)['est'][-1])
    499            
    500     m = quadsummary(estimates)
     317def tsvalwmeanTest(params, greater, samples):
     318    m = tsvalwmean(samples)
    501319    if greater:
    502320        if m > params['threshold']:
     
    509327        else:
    510328            return 0
    511 
  • trunk/lib/nanownlib/storage.py

    r10 r11  
    115115
    116116
    117     def subseries(self, probe_type, unusual_case, size=None, offset=None, field='packet_rtt'):
    118         cache_key = (probe_type,unusual_case,field)
    119        
     117    def subseries(self, probe_type, unusual_case, size=None, offset=None):
     118        cache_key = (probe_type,unusual_case)
    120119        if cache_key not in self._population_cache:
    121120            query="""
    122             SELECT %(field)s AS unusual_case,
    123                    (SELECT avg(%(field)s) FROM probes,analysis
    124                     WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_cases
    125             FROM   (SELECT probes.sample,%(field)s FROM probes,analysis
     121            SELECT packet_rtt AS unusual_packet,
     122                   (SELECT avg(packet_rtt) FROM probes,analysis
     123                    WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_packet,
     124
     125                   tsval_rtt AS unusual_tsval,
     126                   (SELECT avg(tsval_rtt) FROM probes,analysis
     127                    WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_tsval,
     128
     129                   reported AS unusual_reported,
     130                   (SELECT avg(reported) FROM probes,analysis
     131                    WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_reported
     132
     133            FROM   (SELECT probes.sample,packet_rtt,tsval_rtt,reported FROM probes,analysis
    126134                    WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
    127             """ % {"field":field}
     135            """
    128136   
    129137            params = {"probe_type":probe_type, "unusual_case":unusual_case}
     
    207215        return [self._insert('trim_analysis', row) for row in analyses]
    208216
    209     def addClassifierResults(self, results):
     217    def addClassifierResult(self, results):
    210218        ret_val = self._insert('classifier_results', results)
    211219        self.conn.commit()
    212220        return ret_val
    213221
    214     def fetchClassifierResult(self, classifier, trial_type, num_observations):
     222    def fetchClassifierResult(self, classifier, trial_type, num_observations, params=None):
    215223        query = """
    216224          SELECT * FROM classifier_results
    217           WHERE classifier=? AND trial_type=? AND num_observations=?
    218           ORDER BY false_positives+false_negatives
    219           LIMIT 1;
     225            WHERE classifier=:classifier
     226                  AND trial_type=:trial_type
     227                  AND num_observations=:num_observations"""
     228        if params != None:
     229            query += """
     230                  AND params=:params"""
     231        query += """
     232            ORDER BY false_positives+false_negatives
     233            LIMIT 1
    220234        """
     235
     236        qparams = {'classifier':classifier, 'trial_type':trial_type,
     237                   'num_observations':num_observations,'params':params}
    221238        cursor = self.conn.cursor()
    222         cursor.execute(query, (classifier, trial_type, num_observations))
     239        cursor.execute(query, qparams)
    223240        ret_val = cursor.fetchone()
    224 
    225241        if ret_val != None:
    226242            ret_val = dict(ret_val)
    227243        return ret_val
    228 
     244   
    229245    def deleteClassifierResults(self, classifier, trial_type, num_observations=None):
    230246        params = {"classifier":classifier,"trial_type":trial_type,"num_observations":num_observations}
Note: See TracChangeset for help on using the changeset viewer.