Changeset 16


Ignore:
Timestamp:
08/01/15 19:01:31 (9 years ago)
Author:
tim
Message:

.

Location:
trunk
Files:
9 edited
1 moved

Legend:

Unmodified
Added
Removed
  • trunk/bin/analyze_packets

    r10 r16  
    1010import socket
    1111import json
    12 try:
    13     import requests
    14 except:
    15     sys.stderr.write('ERROR: Could not import requests module.  Ensure it is installed.\n')
    16     sys.stderr.write('       Under Debian, the package name is "python3-requests"\n.')
    17     sys.exit(1)
     12
    1813
    1914VERSION = "{DEVELOPMENT}"
     
    4035
    4136
    42 
    4337db = nanownlib.storage.db(options.db_file)
    4438
     
    4640import cProfile
    4741#cProfile.run('num_probes = analyzeProbes(db)')
    48 num_probes = analyzeProbes(db)
     42num_probes = analyzeProbes(db, recompute=True)
    4943end = time.time()
    5044print("analyzed %d probes' packets in: %f" % (num_probes, end-start))
  • trunk/bin/graph

    r14 r16  
    3636parser.add_argument('db_file', default=None,
    3737                    help='')
     38parser.add_argument('unusual_case', nargs='?', type=str, default=None,
     39                    help='The test case that is most unusual from the others. (default: auto detect)')
    3840options = parser.parse_args()
    3941db = nanownlib.storage.db(options.db_file)
     42if options.unusual_case == None:
     43    unusual_case,delta = findUnusualTestCase(db)
    4044
    4145
     
    6973#print('(test): %f' % weightedMean(derived,weights))
    7074
    71 diffs = list(differences(db, 'long'))
    72 reported_diffs = list(differences(db, 'long', 'reported'))
     75diffs = list(differences(db, unusual_case))
     76reported_diffs = list(differences(db, unusual_case, 'reported'))
    7377#shorts = [s['packet_rtt'] for s in samples.values() if s['test_case']=='short']
    7478#longs = [s['packet_rtt'] for s in samples.values() if s['test_case']=='long']
     
    7680
    7781def basicStatistics():
    78     print('packet_rtt diff mean: %f' % statistics.mean(diffs))
    79     print('packet_rtt diff median: %f' % statistics.median(diffs))
    80     print('packet_rtt diff midhinge: %f' % midsummary(diffs))
    81     print('packet_rtt diff trimean: %f' % trimean(diffs))
    82     print('packet_rtt diff quadsummary: %f' % quadsummary(diffs))
    83     print('packet_rtt diff ubersummary: %f' % ubersummary(diffs))
    84     print('packet_rtt diff septasummary: %f' % septasummary(diffs))
    85     print('packet_rtt diff MAD: %f' % mad(diffs))
     82    print('packet_rtt diff midhinge:     %10.2f' % midsummary(diffs))
     83    print('packet_rtt diff quadsummary:  %10.2f' % quadsummary(diffs))
     84    print('packet_rtt diff septasummary: %10.2f' % septasummary(diffs))
     85    print('packet_rtt diff MAD:          %10.2f' % mad(diffs))
    8686    try:
    87         print('reported diff trimean: %f' % trimean(reported_diffs))
    88         print('reported diff quadsummary: %f' % quadsummary(reported_diffs))
    89         print('reported diff ubersummary: %f' % ubersummary(reported_diffs))
    90         print('reported diff septasummary: %f' % septasummary(reported_diffs))
    91         print('reported diff MAD: %f' % mad(reported_diffs))
     87        print('reported diff midhinge:       %10.2f' % midsummary(reported_diffs))
     88        print('reported diff quadsummary:    %10.2f' % quadsummary(reported_diffs))
     89        print('reported diff septasummary:   %10.2f' % septasummary(reported_diffs))
     90        print('reported diff MAD:            %10.2f' % mad(reported_diffs))
    9291
    9392        #import cProfile
     
    108107    #print('tsval null diff weighted mean: %f' % tsvalwmean(db.subseries('train_null','long')))
    109108
    110 
     109basicStatistics()
    111110
    112111def exampleBoxTestHistogram(low,high):
    113112    num_bins = 300
    114     all = db.subseries('train','long')+db.subseries('test','long')
     113    all = db.subseries('train',unusual_case)+db.subseries('test',unusual_case)
    115114    s   = [s['other_packet'] for s in all]
    116115    l   = [s['unusual_packet'] for s in all]
     
    362361    #plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
    363362
    364 #tsFilteredHistogram()
     363tsFilteredHistogram()
    365364
    366365
     
    459458
    460459    chartname = "/home/tim/blindspot/research/timing-analysis/paper/figures/results/%s.svg" % (basename)
    461     print(chartname)
     460    #print(chartname)
    462461   
    463462    plt.clf()
     
    469468    color_id = 0
    470469
    471     cursor = db.conn.cursor()
    472     query = """
    473       SELECT classifier FROM classifier_results GROUP BY classifier ORDER BY classifier;
    474     """
    475     cursor.execute(query)
    476     classifiers = []
    477     for c in cursor:
    478         classifiers.append(c[0])
    479 
    480     best_obs = []
    481     best_error = []
     470    best_obs,best_error = evaluateTestResults(db)
     471    best_obs =   sorted(best_obs,   key=lambda x: x['num_observations'])
     472    best_error = sorted(best_error, key=lambda x: x['error'])
     473    winner = None
     474    for bo in best_obs:
     475        sys.stdout.write("%(num_observations)d obs / %(classifier)s / %(params)s" % bo)
     476        if winner == None:
     477            sys.stdout.write(" (winner)")
     478            winner = bo
     479        print()
     480       
     481    for be in best_error:
     482        sys.stdout.write("%(error)f%% error / %(classifier)s / %(params)s" % be)
     483        if winner == None:
     484            sys.stdout.write(" (winner)")
     485            winner = be
     486        print()
     487
     488    all = sorted(best_obs+best_error, key=lambda x: x['classifier'])
    482489    max_obs = 0
    483     for classifier in classifiers:
    484         query="""
    485         SELECT params,num_observations FROM classifier_results
    486         WHERE trial_type='test'
    487          AND classifier=:classifier
    488          AND (false_positives+false_negatives)/2.0 < 5.0
    489         ORDER BY num_observations,(false_positives+false_negatives)
    490         LIMIT 1
    491         """
    492         cursor.execute(query, {'classifier':classifier})
    493         row = cursor.fetchone()
    494         if row == None:
    495             query="""
    496             SELECT params,(false_positives+false_negatives)/2 FROM classifier_results
    497             WHERE trial_type='test' and classifier=:classifier
    498             ORDER BY (false_positives+false_negatives),num_observations
    499             LIMIT 1
    500             """
    501             cursor.execute(query, {'classifier':classifier})
    502             row = cursor.fetchone()
    503             if row == None:
    504                 sys.stderr.write("WARN: couldn't find test results for classifier '%s'.\n" % classifier)
    505                 continue
    506 
    507             best_error.append((row[1], classifier))
    508         else:
    509             best_obs.append((row[1], classifier))
    510 
    511         best_params = row[0]
     490    for result in all:
    512491        query="""
    513492        SELECT num_observations,(false_positives+false_negatives)/2.0 FROM classifier_results
     
    517496        ORDER BY num_observations
    518497        """
    519         cursor.execute(query, {'classifier':classifier,'params':best_params})
     498        cursor = db.conn.cursor()
     499        cursor.execute(query, {'classifier':result['classifier'],'params':result['params']})
    520500
    521501        num_obs = []
     
    528508        path = plt.scatter(num_obs, performance, color=colors[color_id], s=4, alpha=0.8, linewidths=3.0)
    529509        plt.plot(num_obs, performance, color=colors[color_id], alpha=0.8)
    530         legend.append((classifier,path))
     510        legend.append((result['classifier'],path))
    531511        color_id = (color_id+1) % len(colors)
    532512
    533     best_obs.sort()
    534     best_error.sort()
    535     winner = None
    536     for bo in best_obs:
    537         sys.stdout.write("%d obs / %s" % bo)
    538         if winner == None:
    539             sys.stdout.write(" (winner)")
    540             winner = bo
    541         print()
    542        
    543     for be in best_error:
    544         sys.stdout.write("%f%% error / %s" % be)
    545         if winner == None:
    546             sys.stdout.write(" (winner)")
    547             winner = be
    548         print()
    549    
    550513    plt.legend([l[1] for l in legend], [l[0] for l in legend], scatterpoints=1, fontsize='x-small')
    551514    plt.plot([0, max_obs], [5.0, 5.0], "k--")
    552515    plt.xlabel('Number of Observations')
    553516    plt.ylabel('Error Rate')
    554     plt.savefig(chartname)
    555     #plt.show()
     517    #plt.savefig(chartname)
     518    plt.show()
    556519   
    557520graphTestResults()
  • trunk/bin/train

    r13 r16  
    6767            result = trainer(db,unusual_case,greater,num_obs)
    6868            result['classifier'] = classifier
    69             train_time = "%f" % (time.time()-start)
     69            train_time = "%8.2f" % (time.time()-start)
    7070           
    7171        error = statistics.mean([result['false_positives'],result['false_negatives']])
    72         print("number of observations: %d | error: %f | false_positives: %f | false_negatives: %f | train time: %s | params: %s"
     72        print("num. observations: %5d | error: %6.2f | fp: %6.2f | fn: %6.2f | train time: %s | params: %s"
    7373              % (num_obs, error, result['false_positives'],result['false_negatives'], train_time, result['params']))
    7474        db.addClassifierResult(result)
     
    9999        false_negatives = 100.0*bad_estimates/num_trials
    100100        false_positives = 100.0*bad_null_estimates/num_trials
    101         print("testAux:", num_observations, false_positives, false_negatives, params)
    102101        return false_positives,false_negatives
    103102
     
    107106        result = db.fetchClassifierResult(classifier, 'test', num_obs, jparams)
    108107        if result:
     108            test_time = '(stored)'
    109109            fp = result['false_positives']
    110110            fn = result['false_negatives']
    111111        else:
     112            start = time.time()
    112113            fp,fn = testAux(params, num_trials, num_obs)
    113114            result = {'classifier':classifier,
     
    119120                      'false_negatives':fn}
    120121            db.addClassifierResult(result)
     122            test_time = '%8.2f' % (time.time()-start)
     123           
     124        print("num. observations: %5d | error: %6.2f | fp: %6.2f | fn: %6.2f | test time: %s"
     125              % (num_obs,(fp+fn)/2.0,fp,fn,test_time))
    121126        return ((fp+fn)/2.0,result)
    122127   
     
    126131
    127132
    128     test_results = []
    129133    lte = math.log(target_error/100.0)
    130134    for tr in classifiers[classifier]['train_results']:
     
    133137        num_obs = tr['num_observations']
    134138   
    135         print("initial test")
     139        print("parameters:", params)
    136140        error,result = getResult(classifier,params,num_obs,num_trials)
    137         print("walking up")
     141        #print("walking up")
    138142        while (error > target_error) and (num_obs < max_obs):
    139143            increase_factor = 1.5 * lte/math.log(error/100.0) # don't ask how I came up with this
     
    142146            error,result = getResult(classifier,params,num_obs,num_trials)
    143147
    144         print("walking down")
     148        #print("walking down")
    145149        while (num_obs > 0):
    146             current_best = (error,result)
    147150            num_obs = int(0.95*num_obs)
    148151            error,result = getResult(classifier,params,num_obs,num_trials)
    149152            if error > target_error:
    150153                break
    151        
    152     return current_best
    153 
     154   
    154155
    155156if options.unusual_case != None:
    156157    unusual_case,greater = options.unusual_case.split(',')
    157158    greater = bool(int(greater))
     159    db.setUnusualCase(unusual_case,greater)
    158160else:
    159     start = time.time()
    160     unusual_case,unusual_diff = findUnusualTestCase(db)
    161     greater = (unusual_diff > 0)
    162     print("unusual_case:", unusual_case)
    163     print("unusual_diff:", unusual_diff)
    164     end = time.time()
    165     print(":", end-start)
     161    ucg = db.getUnusualCase()
     162    if ucg != None:
     163        unusual_case,greater = ucg
     164        print("Using cached unusual_case:", unusual_case)
     165    else:
     166        unusual_case,delta = findUnusualTestCase(db)
     167        greater = (delta > 0)
     168        print("Auto-detected unusual_case '%s' with delta: %d" %  (unusual_case,delta))
     169        db.setUnusualCase(unusual_case,greater)
    166170
    167171
     
    172176    print("Training %s..." % c)
    173177    result = trainClassifier(db, unusual_case, greater, c, c in options.retrain)
    174     print("%s result:" % c)
    175     pprint.pprint(result)
    176     print("completed in:", time.time()-start)
     178    #print("%s result:" % c)
     179    #pprint.pprint(result)
     180    print("completed in: %8.2f\n"% (time.time()-start))
    177181
    178182db.clearCache()
     
    181185    start = time.time()
    182186    print("Testing %s..." % c)
    183     error,result = testClassifier(db, unusual_case, greater, c, c in (options.retest+options.retrain))
    184     print("%s result:" % c)
    185     pprint.pprint(result)
    186     classifiers[c]['test_error'] = error
    187     print("completed in:", time.time()-start)
     187    testClassifier(db, unusual_case, greater, c, c in (options.retest+options.retrain))
     188    print("completed in: %8.2f\n"% (time.time()-start))
     189
     190
     191best_obs,best_error = evaluateTestResults(db)
     192best_obs =   sorted(best_obs,   key=lambda x: x['num_observations'])
     193best_error = sorted(best_error, key=lambda x: x['error'])
     194winner = None
     195for bo in best_obs:
     196    sys.stdout.write("%(num_observations)5d obs   | %(classifier)12s | %(params)s" % bo)
     197    if winner == None:
     198        sys.stdout.write(" (winner)")
     199        winner = bo
     200    print()
     201       
     202for be in best_error:
     203    sys.stdout.write("%(error)3.2f%% error | %(classifier)12s | %(params)s" % be)
     204    if winner == None:
     205        sys.stdout.write(" (winner)")
     206        winner = be
     207    print()
  • trunk/lib/nanownlib/__init__.py

    r13 r16  
    1818import gzip
    1919import statistics
    20 import numpy
    21 import netifaces
     20try:
     21    import numpy
     22except:
     23    sys.stderr.write('ERROR: Could not import numpy module.  Ensure it is installed.\n')
     24    sys.stderr.write('       Under Debian, the package name is "python3-numpy"\n.')
     25    sys.exit(1)
     26
    2227try:
    2328    import requests
     
    3944
    4045def getIfaceForIP(ip):
     46    try:
     47        import netifaces
     48    except:
     49        sys.stderr.write('ERROR: Could not import netifaces module.  Ensure it is installed.\n')
     50        sys.stderr.write('       Try: pip3 install netifaces\n.')
     51        sys.exit(1)
     52   
    4153    for iface in netifaces.interfaces():
    4254        addrs = netifaces.ifaddresses(iface).get(netifaces.AF_INET, None)
     
    176188    my_ip = getLocalIP(target_ip, target_port)
    177189    my_iface = getIfaceForIP(my_ip)
    178     return subprocess.Popen(['chrt', '-r', '99', 'nanown-csamp', my_iface, my_ip,
     190    return subprocess.Popen(['chrt', '-r', '99', 'nanown-listen', my_iface, my_ip,
    179191                             target_ip, "%d" % target_port, output_file, '0'])
    180192
     
    303315
    304316
    305 def analyzeProbes(db):
     317def analyzeProbes(db, trim=None, recompute=False):
    306318    db.conn.execute("CREATE INDEX IF NOT EXISTS packets_probe ON packets (probe_id)")
    307319    db.conn.commit()
     
    316328    pcursor.execute("DELETE FROM trim_analysis")
    317329    db.conn.commit()
     330    if recompute:
     331        pcursor.execute("DELETE FROM analysis")
     332        db.conn.commit()
    318333
    319334    def loadPackets(db):
    320335        cursor = db.conn.cursor()
    321         cursor.execute("SELECT * FROM packets ORDER BY probe_id")
     336        #cursor.execute("SELECT * FROM packets ORDER BY probe_id")
     337        cursor.execute("SELECT * FROM packets WHERE probe_id NOT IN (SELECT probe_id FROM analysis) ORDER BY probe_id")
    322338
    323339        probe_id = None
     
    334350        ret_val.append((probe_id,entry))
    335351        return ret_val
    336    
    337     start = time.time()
     352
     353    def processPackets(packet_cache, strim, rtrim):
     354        sent_tally = []
     355        rcvd_tally = []
     356        analyses = []
     357        for probe_id,packets in packet_cache:
     358            try:
     359                analysis,s,r = analyzePackets(packets, timestamp_precision)
     360                analysis['probe_id'] = probe_id
     361                analyses.append(analysis)
     362                sent_tally.append(s)
     363                rcvd_tally.append(r)
     364            except Exception as e:
     365                #traceback.print_exc()
     366                sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % probe_id)
     367        db.addTrimAnalyses(analyses)
     368        db.conn.commit()
     369        return statistics.mode(sent_tally),statistics.mode(rcvd_tally)
     370   
     371    #start = time.time()
    338372    packet_cache = loadPackets(db)
    339     print("packets loaded in: %f" % (time.time()-start))
    340    
    341     count = 0
    342     sent_tally = []
    343     rcvd_tally = []
    344     for probe_id,packets in packet_cache:
    345         try:
    346             analysis,s,r = analyzePackets(packets, timestamp_precision)
    347             analysis['probe_id'] = probe_id
    348             sent_tally.append(s)
    349             rcvd_tally.append(r)
    350             db.addTrimAnalyses([analysis])
    351         except Exception as e:
    352             #traceback.print_exc()
    353             sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % probe_id)
    354        
    355         #print(pid,analysis)
    356         count += 1
     373    #print("packets loaded in: %f" % (time.time()-start))
     374
     375    if trim != None:
     376        best_strim,best_rtrim = trim
     377        processPackets(packet_cache, best_strim, best_rtrim)
     378    else:
     379        num_sent,num_rcvd = processPackets(packet_cache, 0, 0)
     380        print("num_sent: %d, num_rcvd: %d" % (num_sent,num_rcvd))
     381   
     382        for strim in range(0,num_sent):
     383            for rtrim in range(0,num_rcvd):
     384                #print(strim,rtrim)
     385                if strim == 0 and rtrim == 0:
     386                    continue # no point in doing 0,0 again
     387                processPackets(packet_cache, strim, rtrim)
     388
     389   
     390        unusual_case,delta = findUnusualTestCase(db, (0,0))
     391        evaluations = {}
     392        for strim in range(0,num_sent):
     393            for rtrim in range(0,num_rcvd):
     394                evaluations[(strim,rtrim)] = evaluateTrim(db, unusual_case, strim, rtrim)
     395
     396        import pprint
     397        pprint.pprint(evaluations)
     398
     399        delta_margin = 0.15
     400        best_strim = 0
     401        best_rtrim = 0
     402        good_delta,good_mad = evaluations[(0,0)]
     403   
     404        for strim in range(1,num_sent):
     405            delta,mad = evaluations[(strim,0)]
     406            if delta*good_delta > 0.0 and (abs(good_delta) - abs(delta)) < abs(delta_margin*good_delta) and mad < good_mad:
     407                best_strim = strim
     408            else:
     409                break
     410
     411        good_delta,good_mad = evaluations[(best_strim,0)]
     412        for rtrim in range(1,num_rcvd):
     413            delta,mad = evaluations[(best_strim,rtrim)]
     414            if delta*good_delta > 0.0 and (abs(good_delta) - abs(delta)) < abs(delta_margin*good_delta) and mad < good_mad:
     415                best_rtrim = rtrim
     416            else:
     417                break
     418
     419        print("selected trim parameters:",(best_strim,best_rtrim))
     420   
     421    pcursor.execute("""INSERT OR IGNORE INTO analysis
     422                         SELECT id,probe_id,suspect,packet_rtt,tsval_rtt
     423                           FROM trim_analysis
     424                           WHERE sent_trimmed=? AND rcvd_trimmed=?""",
     425                    (best_strim,best_rtrim))
    357426    db.conn.commit()
    358     num_sent = statistics.mode(sent_tally)
    359     num_rcvd = statistics.mode(rcvd_tally)
    360     sent_tally = None
    361     rcvd_tally = None
    362     print("num_sent: %d, num_rcvd: %d" % (num_sent,num_rcvd))
    363    
    364     for strim in range(0,num_sent):
    365         for rtrim in range(0,num_rcvd):
    366             #print(strim,rtrim)
    367             if strim == 0 and rtrim == 0:
    368                 continue # no point in doing 0,0 again
    369             for probe_id,packets in packet_cache:
    370                 try:
    371                     analysis,s,r = analyzePackets(packets, timestamp_precision, strim, rtrim)
    372                     analysis['probe_id'] = probe_id
    373                 except Exception as e:
    374                     #traceback.print_exc()
    375                     sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % probe_id)
    376                    
    377                 db.addTrimAnalyses([analysis])
    378     db.conn.commit()
    379 
    380     # Populate analysis table so findUnusualTestCase can give us a starting point
    381     pcursor.execute("DELETE FROM analysis")
    382     db.conn.commit()
    383     pcursor.execute("INSERT INTO analysis SELECT id,probe_id,suspect,packet_rtt,tsval_rtt FROM trim_analysis WHERE sent_trimmed=0 AND rcvd_trimmed=0")
    384    
    385     unusual_case,delta = findUnusualTestCase(db)
    386     evaluations = {}
    387     for strim in range(0,num_sent):
    388         for rtrim in range(0,num_rcvd):
    389             evaluations[(strim,rtrim)] = evaluateTrim(db, unusual_case, strim, rtrim)
    390 
    391     import pprint
    392     pprint.pprint(evaluations)
    393 
    394     delta_margin = 0.15
    395     best_strim = 0
    396     best_rtrim = 0
    397     good_delta,good_mad = evaluations[(0,0)]
    398    
    399     for strim in range(1,num_sent):
    400         delta,mad = evaluations[(strim,0)]
    401         if delta*good_delta > 0.0 and (abs(good_delta) - abs(delta)) < abs(delta_margin*good_delta) and mad < good_mad:
    402             best_strim = strim
    403         else:
    404             break
    405 
    406     good_delta,good_mad = evaluations[(best_strim,0)]
    407     for rtrim in range(1,num_rcvd):
    408         delta,mad = evaluations[(best_strim,rtrim)]
    409         if delta*good_delta > 0.0 and (abs(good_delta) - abs(delta)) < abs(delta_margin*good_delta) and mad < good_mad:
    410             best_rtrim = rtrim
    411         else:
    412             break
    413 
    414     print("selected trim parameters:",(best_strim,best_rtrim))
    415    
    416     if best_strim != 0 or best_rtrim !=0:
    417         pcursor.execute("DELETE FROM analysis")
    418         db.conn.commit()
    419         pcursor.execute("INSERT INTO analysis SELECT id,probe_id,suspect,packet_rtt,tsval_rtt FROM trim_analysis WHERE sent_trimmed=? AND rcvd_trimmed=?",
    420                         (best_strim,best_rtrim))
    421 
    422     #pcursor.execute("DELETE FROM trim_analysis")
    423     db.conn.commit()
    424    
    425     return count
     427   
     428    return len(packet_cache)
    426429
    427430
     
    442445    ptimes = cursor.fetchone()
    443446    window_size = 100*int((ptimes['end']-ptimes['start'])/ptimes['count'])
    444     print("associate window_size:", window_size)
     447    #print("associate window_size:", window_size)
    445448
    446449    db.addPackets(parseJSONLines(sniffer_fp), window_size)
     
    460463
    461464
    462 def findUnusualTestCase(db):
     465def findUnusualTestCase(db, trim=None):
    463466    test_cases = enumStoredTestCases(db)
    464 
     467    if trim != None:
     468        params = {'strim':trim[0], 'rtrim':trim[1]}
     469        qsuffix = " AND sent_trimmed=:strim AND rcvd_trimmed=:rtrim"
     470        table = "trim_analysis"
     471    else:
     472        params = {}
     473        qsuffix = ""
     474        table = "analysis"
     475   
    465476    cursor = db.conn.cursor()
    466     cursor.execute("SELECT packet_rtt FROM probes,analysis WHERE probes.id=analysis.probe_id AND probes.type in ('train','test')")
     477    cursor.execute("SELECT packet_rtt FROM probes,"+table+" a WHERE probes.id=a.probe_id AND probes.type in ('train','test')"+qsuffix, params)
    467478    global_tm = quadsummary([row['packet_rtt'] for row in cursor])
    468479
    469480    tm_abs = []
    470481    tm_map = {}
     482
    471483    # XXX: if more speed needed, percentile extension to sqlite might be handy...
    472484    for tc in test_cases:
    473         cursor.execute("SELECT packet_rtt FROM probes,analysis WHERE probes.id=analysis.probe_id AND probes.type in ('train','test') AND probes.test_case=?", (tc,))
     485        params['test_case']=tc
     486        query = """SELECT packet_rtt FROM probes,"""+table+""" a
     487                   WHERE probes.id=a.probe_id AND probes.type in ('train','test')
     488                   AND probes.test_case=:test_case""" + qsuffix
     489        cursor.execute(query, params)
    474490        tm_map[tc] = quadsummary([row['packet_rtt'] for row in cursor])
    475491        tm_abs.append((abs(tm_map[tc]-global_tm), tc))
    476492
    477493    magnitude,tc = max(tm_abs)
    478     cursor.execute("SELECT packet_rtt FROM probes,analysis WHERE probes.id=analysis.probe_id AND probes.type in ('train','test') AND probes.test_case<>?", (tc,))
     494    params['test_case']=tc
     495    query = """SELECT packet_rtt FROM probes,"""+table+""" a
     496               WHERE probes.id=a.probe_id AND probes.type in ('train','test')
     497               AND probes.test_case<>:test_case""" + qsuffix
     498    cursor.execute(query,params)
    479499    remaining_tm = quadsummary([row['packet_rtt'] for row in cursor])
    480500
    481     ret_val = (tc, tm_map[tc]-remaining_tm)
    482     print("unusual_case: %s, delta: %f" % ret_val)
    483     return ret_val
     501    delta = tm_map[tc]-remaining_tm
     502    # Hack to make the chosen unusual_case more intuitive to the user
     503    if len(test_cases) == 2 and delta < 0.0:
     504        tc = [t for t in test_cases if t != tc][0]
     505        delta = abs(delta)
     506
     507    return tc,delta
    484508
    485509
     
    492516        cursor.execute("SELECT count(id) c FROM (SELECT id FROM probes WHERE type=? AND time_of_day>? GROUP BY sample)", (st[0],int(start_time*1000000000)))
    493517        count = cursor.fetchone()[0]
    494         output += " | %s remaining: %d" % (st[0], st[1]-count)
     518        output += " | %s remaining: %6d" % (st[0], st[1]-count)
    495519        total_completed += count
    496520        total_requested += st[1]
    497521
    498522    rate = total_completed / (time.time() - start_time)
    499     total_time = total_requested / rate       
     523    total_time = total_requested / rate
    500524    eta = datetime.datetime.fromtimestamp(start_time+total_time)
    501     print("STATUS:",output[3:],"| est. total_time: %s | est. ETA: %s" % (str(datetime.timedelta(seconds=total_time)), str(eta)))
     525    print("STATUS:",output[3:],"| est. total_time: %s | ETA: %s" % (str(datetime.timedelta(seconds=total_time)), eta.strftime("%Y-%m-%d %X")))
     526
     527
     528
     529def evaluateTestResults(db):
     530    cursor = db.conn.cursor()
     531    query = """
     532      SELECT classifier FROM classifier_results GROUP BY classifier ORDER BY classifier;
     533    """
     534    cursor.execute(query)
     535    classifiers = []
     536    for c in cursor:
     537        classifiers.append(c[0])
     538
     539    best_obs = []
     540    best_error = []
     541    max_obs = 0
     542    for classifier in classifiers:
     543        query="""
     544        SELECT classifier,params,num_observations,(false_positives+false_negatives)/2 error
     545        FROM classifier_results
     546        WHERE trial_type='test'
     547         AND classifier=:classifier
     548         AND (false_positives+false_negatives)/2.0 < 5.0
     549        ORDER BY num_observations,(false_positives+false_negatives)
     550        LIMIT 1
     551        """
     552        cursor.execute(query, {'classifier':classifier})
     553        row = cursor.fetchone()
     554        if row == None:
     555            query="""
     556            SELECT classifier,params,num_observations,(false_positives+false_negatives)/2 error
     557            FROM classifier_results
     558            WHERE trial_type='test' and classifier=:classifier
     559            ORDER BY (false_positives+false_negatives),num_observations
     560            LIMIT 1
     561            """
     562            cursor.execute(query, {'classifier':classifier})
     563            row = cursor.fetchone()
     564            if row == None:
     565                sys.stderr.write("WARN: couldn't find test results for classifier '%s'.\n" % classifier)
     566                continue
     567            row = dict(row)
     568
     569            best_error.append(dict(row))
     570        else:
     571            best_obs.append(dict(row))
     572
     573
     574    return best_obs,best_error
  • trunk/lib/nanownlib/parallel.py

    r11 r16  
    4848   
    4949    def stop(self):
    50         for i in range(0,len(self.workers)):
     50        try:
     51            while True:
     52                self.workq.get(block=False)
     53                self.workq.task_done()
     54        except queue.Empty as e:
     55            pass
     56       
     57        for i in range(len(self.workers)):
    5158            self.workq.put(None)
    5259        for w in self.workers:
  • trunk/lib/nanownlib/stats.py

    r13 r16  
    77import gzip
    88import random
    9 import scipy
    10 import scipy.stats
    119import numpy
    1210
     
    249247
    250248    mh = f(diffs, params['distance'])
     249    #print("estimate:", mh)
    251250    if greater:
    252251        if mh > params['threshold']:
  • trunk/lib/nanownlib/storage.py

    r11 r16  
    77import threading
    88import sqlite3
    9 
    10 import numpy
     9try:
     10    import numpy
     11except:
     12    sys.stderr.write('ERROR: Could not import numpy module.  Ensure it is installed.\n')
     13    sys.stderr.write('       Under Debian, the package name is "python3-numpy"\n.')
     14    sys.exit(1)
     15
    1116# Don't trust numpy's seeding
    1217numpy.random.seed(random.SystemRandom().randint(0,2**32-1))
     
    3944                                      tcpts_mean REAL,
    4045                                      tcpts_stddev REAL,
    41                                       tcpts_slopes TEXT)
     46                                      tcpts_slopes TEXT,
     47                                      unusual_case TEXT,
     48                                      greater INTEGER)
    4249                """)
    4350
     
    196203    def addPackets(self, pkts, window_size):
    197204        query = ("INSERT INTO packets (id,probe_id,sent,observed,tsval,payload_len,tcpseq,tcpack)"
    198                  " VALUES(randomblob(16),"
     205                 " VALUES(hex(randomblob(16)),"
    199206                 "(SELECT id FROM probes WHERE local_port=:local_port AND :observed>time_of_day"
    200207                 " AND :observed<time_of_day+userspace_rtt+%d"
     
    254261        self.conn.execute(query, params)
    255262        self.conn.commit()
    256        
     263   
     264    def setUnusualCase(self, unusual_case, greater):
     265        query = """SELECT * FROM meta LIMIT 1"""
     266        cursor = self.conn.cursor()
     267        cursor.execute(query)
     268        row = cursor.fetchone()
     269        if row == None:
     270            params = {"id":_newid()}
     271        else:
     272            params = dict(row)
     273
     274        params["unusual_case"]=unusual_case
     275        params["greater"]=greater
     276       
     277        keys = params.keys()
     278        columns = ','.join(keys)
     279        placeholders = ':'+', :'.join(keys)
     280       
     281        query = """INSERT OR REPLACE INTO meta (%s) VALUES (%s)""" % (columns, placeholders)
     282        cursor.execute(query, params)
     283       
     284       
     285    def getUnusualCase(self):
     286        query = """SELECT unusual_case,greater FROM meta LIMIT 1"""
     287        cursor = self.conn.cursor()
     288        cursor.execute(query)
     289        row = cursor.fetchone()
     290        if row == None or row[0] == None or row[1] == None:
     291            return None
     292        else:
     293            return tuple(row)
  • trunk/lib/nanownlib/train.py

    r13 r16  
    4343    num_trials = 200
    4444    lows = [p[1] for p in performance[0:5]]
    45     widths = [w/10.0 for w in range(5,65,5)]
     45    widths = [w/10.0 for w in range(5,155,10)]
    4646    performance = []
    4747    for width in widths:
     
    8585   
    8686    num_trials = 500
    87     widths = [good_width+(x/100.0) for x in range(-70,75,5) if good_width+(x/100.0) > 0.0]
     87    widths = [good_width+(x/100.0) for x in range(-120,125,5) if good_width+(x/100.0) > 0.0]
    8888    performance = []
    8989    for width in widths:
  • trunk/src/compile.sh

    r4 r16  
    11#!/bin/sh
    22
    3 gcc -ggdb -Wl,-z,relro,-z,now -fstack-protector-strong -Wformat -Werror=format-security -D_FORTIFY_SOURCE=2 csamp.c -lpcap -o ../bin/csamp
     3gcc -ggdb -Wl,-z,relro,-z,now -fstack-protector-strong -Wformat -Werror=format-security -D_FORTIFY_SOURCE=2 listen.c -lpcap -o ../bin/nanown-listen
  • trunk/src/listen.c

    r4 r16  
    188188    if(besttst != -1)
    189189    {
     190#if DEBUG
    190191      fprintf(stderr, "INFO: Attempting to set the timestamp source to: %s\n",
    191192              pcap_tstamp_type_val_to_name(besttst));
     193#endif
    192194      if(pcap_set_tstamp_type(ret_val, besttst) != 0)
    193195        fprintf(stderr, "WARN: Failed to set preferred timestamp source.\n");
Note: See TracChangeset for help on using the changeset viewer.