Changeset 6 for trunk


Ignore:
Timestamp:
07/09/15 12:27:04 (9 years ago)
Author:
tim
Message:

.

Location:
trunk
Files:
1 added
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/bin/train

    r4 r6  
    290290
    291291
    292 def trainMidhinge(db, unusual_case, greater, subsample_size):
     292def trainMidhinge(db, unusual_case, greater, subseries_size):
    293293
    294294    def trainAux(distance, threshold, num_trials):
    295         estimator = functools.partial(midhingeTest, {'distance':distance,'threshold':threshold}, unusual_case, greater)
    296         estimates = bootstrap2(estimator, db, 'train', subsample_size, num_trials)
    297         null_estimates = bootstrap2(estimator, db, 'train_null', subsample_size, num_trials)
     295        estimator = functools.partial(midhingeTest, {'distance':distance,'threshold':threshold}, greater)
     296        estimates = bootstrap3(estimator, db, 'train', unusual_case, subseries_size, num_trials)
     297        null_estimates = bootstrap3(estimator, db, 'train_null', unusual_case, subseries_size, num_trials)
    298298
    299299        bad_estimates = len([e for e in estimates if e != 1])
     
    306306    #determine expected delta based on differences
    307307    start = time.time()
    308     mean_diffs = list(samples2MeanDiffs(subsample(db, 'train'), 'packet_rtt', unusual_case))
     308    mean_diffs = [s['unusual_case']-s['other_cases'] for s in subseries(db, 'train', unusual_case)]
    309309    threshold = trimean(mean_diffs)/2.0
    310310    print("initial threshold:", threshold)
     
    313313    print("trimean threshold:", trimean(mean_diffs)/2.0)
    314314   
    315     mean_diffs = list(samples2MeanDiffs(subsample(db, 'train_null'), 'packet_rtt', unusual_case))
     315    mean_diffs = [s['unusual_case']-s['other_cases'] for s in subseries(db, 'train_null', unusual_case)]
    316316    print(len(mean_diffs))
    317317    print("null mean:", statistics.mean(mean_diffs))
     
    323323   
    324324    start = time.time()
    325     wt = WorkerThreads(1, trainAux)
    326    
    327     num_trials = 200
     325    wt = WorkerThreads(2, trainAux)
     326   
     327    num_trials = 20
    328328    performance = []
    329329    #for distance in range(1,46,4):
     
    335335        fp,fn = errors
    336336        performance.append(((fp+fn)/2.0, job_id, fn, fp))
     337    #for distance in range(25,46,4):
     338    #    job_id = distance
     339    #    fp,fn = trainAux(distance, threshold, num_trials)
     340    #    performance.append(((fp+fn)/2.0, job_id, fn, fp))
     341   
    337342    performance.sort()
    338343    pprint.pprint(performance)
     
    342347
    343348   
    344     num_trials = 200
     349    num_trials = 20
    345350    start = time.time()
    346351    performance = []
     
    359364
    360365   
    361     num_trials = 200
     366    num_trials = 20
    362367    start = time.time()
    363368    performance = []
     
    374379    best_distance = performance[0][1]
    375380    print("best_distance:",best_distance)
    376 
    377     num_trials = 200
     381   
     382    num_trials = 20
    378383    start = time.time()
    379384    performance = []
     
    394399    return {'algorithm':"midhinge",
    395400            'params':params,
    396             'sample_size':subsample_size,
     401            'sample_size':subseries_size,
    397402            'num_trials':num_trials,
    398403            'trial_type':"train",
     
    421426print(":", end-start)
    422427
     428import cProfile
     429
    423430start = time.time()
    424 results = trainMidhinge(db, unusual_case, greater, 6000)
    425 db.addClassifierResults(results)
     431#cProfile.run('results = trainMidhinge(db, unusual_case, greater, 100)')
     432results = trainMidhinge(db, unusual_case, greater, 100)
     433#db.addClassifierResults(results)
    426434print("midhinge result:", results)
    427435end = time.time()
    428436print(":", end-start)
     437
     438sys.exit(0)
    429439
    430440start = time.time()
  • trunk/lib/nanownlib/__init__.py

    r5 r6  
    214214    for p in packets:
    215215        key = (p['sent'],p['tcpseq'],p['tcpack'],p['payload_len'])
    216         #if (key not in seen)\
    217         #   or p['sent']==1 and (seen[key]['observed'] < p['observed'])\
    218         #   or p['sent']==0 and (seen[key]['observed'] > p['observed']):
    219         if (key not in seen) or (seen[key]['observed'] > p['observed']):
     216        if (key not in seen)\
     217           or p['sent']==1 and (seen[key]['observed'] < p['observed'])\
     218           or p['sent']==0 and (seen[key]['observed'] > p['observed']):
     219            #if (key not in seen) or (seen[key]['observed'] > p['observed']):
    220220            seen[key] = p
    221221   
     
    352352    pprint.pprint(evaluations)
    353353
    354     delta_margin = 0.1
     354    delta_margin = 0.15
    355355    best_strim = 0
    356356    best_rtrim = 0
     
    439439    remaining_tm = trimean([row['packet_rtt'] for row in cursor])
    440440
    441     return (tc, tm_map[tc]-remaining_tm)
     441    ret_val = (tc, tm_map[tc]-remaining_tm)
     442    print("unusual_case: %s, delta: %f" % ret_val)
     443    return ret_val
    442444
    443445
  • trunk/lib/nanownlib/stats.py

    r4 r6  
    227227        yield (sid,[dict(r) for r in probes])
    228228
     229
     230def subseries(db, probe_type, unusual_case, size=None, offset=None, field='packet_rtt'):
     231    cursor = db.conn.cursor()
     232    cursor.execute("SELECT max(c) FROM (SELECT count(sample) c FROM probes WHERE type=? GROUP BY test_case)", (probe_type,))
     233    population_size = cursor.fetchone()[0]
     234
     235    if size == None or size > population_size:
     236        size = population_size
     237    if offset == None or offset >= population_size or offset < 0:
     238        offset = numpy.random.random_integers(0,population_size-1)
     239
     240    query="""
     241      SELECT %(field)s AS unusual_case,
     242             (SELECT avg(%(field)s) FROM probes,analysis
     243              WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_cases
     244      FROM   (SELECT probes.sample,%(field)s FROM probes,analysis
     245              WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
     246      LIMIT :size OFFSET :offset
     247    """ % {"field":field}
     248   
     249    params = {"probe_type":probe_type, "unusual_case":unusual_case, "offset":offset, "size":size}
     250    cursor.execute(query, params)
     251    for row in cursor:
     252        size -= 1
     253        yield dict(row)
     254
     255    if size > 0:
     256        params['offset'] = 0
     257        params['size'] = size
     258        cursor.execute(query, params)
     259        for row in cursor:
     260            yield dict(row)
     261   
     262
    229263# if test_cases=None, include all of them.  Otherwise, include only the specified test cases.
    230264def samples2Distributions(samples, field, test_cases=None):
     
    270304
    271305
     306def bootstrap3(estimator, db, probe_type, unusual_case, subseries_size, num_trials):
     307    ret_val = []
     308    for t in range(num_trials):
     309        ret_val.append(estimator(subseries(db, probe_type, unusual_case, subseries_size)))
     310
     311    return ret_val
     312
     313
    272314# Returns the test case name that clearly has higher RTT; otherwise, returns None
    273315def boxTest(params, test_cases, samples):
     
    329371# Returns 1 if unusual_case is unusual in the expected direction
    330372#         0 otherwise
    331 def midhingeTest(params, unusual_case, greater, samples):
    332     diffs = list(samples2MeanDiffs(samples, 'packet_rtt', unusual_case))
     373def midhingeTest(params, greater, samples):
     374    diffs = [s['unusual_case']-s['other_cases'] for s in samples]
    333375
    334376    mh = midhinge(diffs, params['distance'])
Note: See TracChangeset for help on using the changeset viewer.