source: trunk/bin/graph @ 14

Last change on this file since 14 was 14, checked in by tim, 9 years ago

.

  • Property svn:executable set to *
File size: 33.4 KB
Line 
1#!/usr/bin/env python3
2
3import sys
4import os
5import time
6import random
7import tempfile
8import argparse
9import socket
10import json
11
12import numpy
13import matplotlib.mlab as mlab
14import matplotlib.pyplot as plt
15
16
17VERSION = "{DEVELOPMENT}"
18if VERSION == "{DEVELOPMENT}":
19    script_dir = '.'
20    try:
21        script_dir = os.path.dirname(os.path.realpath(__file__))
22    except:
23        try:
24            script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
25        except:
26            pass
27    sys.path.append("%s/../lib" % script_dir)
28
29from nanownlib import *
30from nanownlib.stats import *
31import nanownlib.storage
32
33
34parser = argparse.ArgumentParser(
35    description="")
36parser.add_argument('db_file', default=None,
37                    help='')
38options = parser.parse_args()
39db = nanownlib.storage.db(options.db_file)
40
41
42def differences(db, unusual_case, rtt_type='packet'):
43    ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train', unusual_case)]
44    ret_val += [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('test', unusual_case)]
45    return ret_val
46
47def null_differences(db, unusual_case, rtt_type='packet'):
48    ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train_null', unusual_case)]
49    return ret_val
50
51
52def timeSeries(db, probe_type, unusual_case):
53    cursor = db.conn.cursor()
54    query="""
55      SELECT time_of_day,packet_rtt AS uc,(SELECT avg(packet_rtt) FROM probes,analysis
56                                           WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS oc
57      FROM (SELECT time_of_day,probes.sample,packet_rtt FROM probes,analysis
58                                           WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
59    """
60   
61    params = {"probe_type":probe_type,"unusual_case":unusual_case}
62    cursor.execute(query, params)
63    for row in cursor:
64        yield {'time_of_day':row['time_of_day'],unusual_case:row['uc'],'other_cases':row['oc']}
65#samples,derived,null_derived = parse_data(input1)
66
67#trust = trustValues(derived, sum)
68#weights = linearWeights(derived, trust, 0.25)
69#print('(test): %f' % weightedMean(derived,weights))
70
71diffs = list(differences(db, 'long'))
72reported_diffs = list(differences(db, 'long', 'reported'))
73#shorts = [s['packet_rtt'] for s in samples.values() if s['test_case']=='short']
74#longs = [s['packet_rtt'] for s in samples.values() if s['test_case']=='long']
75
76
77def basicStatistics():
78    print('packet_rtt diff mean: %f' % statistics.mean(diffs))
79    print('packet_rtt diff median: %f' % statistics.median(diffs))
80    print('packet_rtt diff midhinge: %f' % midsummary(diffs))
81    print('packet_rtt diff trimean: %f' % trimean(diffs))
82    print('packet_rtt diff quadsummary: %f' % quadsummary(diffs))
83    print('packet_rtt diff ubersummary: %f' % ubersummary(diffs))
84    print('packet_rtt diff septasummary: %f' % septasummary(diffs))
85    print('packet_rtt diff MAD: %f' % mad(diffs))
86    try:
87        print('reported diff trimean: %f' % trimean(reported_diffs))
88        print('reported diff quadsummary: %f' % quadsummary(reported_diffs))
89        print('reported diff ubersummary: %f' % ubersummary(reported_diffs))
90        print('reported diff septasummary: %f' % septasummary(reported_diffs))
91        print('reported diff MAD: %f' % mad(reported_diffs))
92
93        #import cProfile
94        #start = time.time()
95        #kresults = kfilter({},diffs)
96        #print('packet_rtt diff kfilter: ', numpy.mean(kresults['est']), kresults['var'])
97        #print('packet_rtt diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
98        #kresults = kfilter({},reported_diffs)
99        #print('reported diff kfilter: ', numpy.mean(kresults['est']), kresults['var'][-1])
100        #print('reported diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
101        #print("kfilter time: %f" % (time.time()-start))
102    except:
103        pass
104
105    #print('tsval diff mean: %f' % numpy.mean(differences(db, 'long', 'tsval')))
106    #print('tsval null diff mean: %f' % numpy.mean(null_differences(db, 'long', 'tsval')))
107    #print('tsval diff weighted mean: %f' % tsvalwmean(db.subseries('train','long')+db.subseries('test','long')))
108    #print('tsval null diff weighted mean: %f' % tsvalwmean(db.subseries('train_null','long')))
109
110
111
112def exampleBoxTestHistogram(low,high):
113    num_bins = 300
114    all = db.subseries('train','long')+db.subseries('test','long')
115    s   = [s['other_packet'] for s in all]
116    l   = [s['unusual_packet'] for s in all]
117
118    s_low,s_high = numpy.percentile(s, (low,high))
119    l_low,l_high = numpy.percentile(l, (low,high))
120
121    s.sort()
122    cut_off_low = s[int(len(diffs)*0.002)]
123    cut_off_high = s[int(len(diffs)*0.998)]
124   
125    plt.clf()
126    # the histogram of the data
127    #n, bins, patches = plt.hist(s, num_bins, normed=1, color='blue', histtype='step', alpha=0.8,
128    #                            label='Test Case 1')
129    #n, bins, patches = plt.hist(l, num_bins, normed=1, color='red', histtype='step', alpha=0.8,
130    #                            label='Test Case 2')
131    #
132    n, bins, patches = plt.hist((s,l), num_bins, normed=1, color=('blue','red'), histtype='step', alpha=0.8,
133                                 label=('Test Case 1','Test Case 2'), range=(cut_off_low,cut_off_high))
134
135    from matplotlib.patches import FancyBboxPatch
136    currentAxis = plt.gca()
137    currentAxis.add_patch(FancyBboxPatch((s_low, 0), s_high-s_low, 0.0001, boxstyle='square', facecolor="blue", alpha=0.4))
138    currentAxis.add_patch(FancyBboxPatch((l_low, 0), l_high-l_low, 0.0001, boxstyle='square', facecolor="red", alpha=0.4))
139
140   
141    plt.xlabel('RTT Difference')
142    plt.ylabel('Probability')
143    #plt.title(r'Box Test Example - Overlapping Boxes')
144
145    # Tweak spacing to prevent clipping of ylabel
146    plt.subplots_adjust(left=0.15)
147    plt.legend()
148    plt.show()
149    #plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
150
151
152#exampleBoxTestHistogram(6,8)
153
154
155def testKalman4D(params=None):
156    from pykalman import KalmanFilter
157    train = db.subseries('train','long', offset=0)
158    test = db.subseries('test','long', offset=0)
159    null = db.subseries('train_null','long', offset=0)
160    measurements = numpy.asarray([(s['unusual_packet'],s['other_packet'],s['unusual_tsval'],s['other_tsval']) for s in (train+test)])
161    null_measurements = numpy.asarray([(s['unusual_packet'],s['other_packet'],s['unusual_tsval'],s['other_tsval']) for s in null])
162   
163    if params == None:
164        kf = KalmanFilter(n_dim_obs=4, n_dim_state=4,
165                          initial_state_mean=[quadsummary([s['unusual_packet'] for s in train]),
166                                              quadsummary([s['other_packet'] for s in train]),
167                                              numpy.mean([s['unusual_tsval'] for s in train]),
168                                              numpy.mean([s['other_tsval'] for s in train])])
169        kf = KalmanFilter(n_dim_obs=4, n_dim_state=4)
170       
171        start=time.time()
172        kf = kf.em(measurements[0:len(train)]+null_measurements[0:50000], n_iter=10,
173                   em_vars=('transition_matrices',
174                            'observation_matrices',
175                            'transition_offsets',
176                            'observation_offsets',
177                            'transition_covariance',
178                            'observation_covariance',
179                            'initial_state_mean',
180                            'initial_state_covariance'))
181        params = {'transition_matrices': kf.transition_matrices.tolist(),
182                  'observation_matrices': kf.observation_matrices.tolist(),
183                  'transition_offsets': kf.transition_offsets.tolist(),
184                  'observation_offsets': kf.observation_offsets.tolist(),
185                  'transition_covariance': kf.transition_covariance.tolist(),
186                  'observation_covariance': kf.observation_covariance.tolist(),
187                  'initial_state_mean': kf.initial_state_mean.tolist(),
188                  'initial_state_covariance': kf.initial_state_covariance.tolist()}
189        print("Learned Params:\n")
190        import pprint
191        pprint.pprint(params)
192        print("pykalman em time: %f" % (time.time()-start))
193       
194    #kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, **params)
195
196    num_obs=5000
197    for offset in range(50000,100000+num_obs,num_obs):
198        start=time.time()
199        m = measurements[offset:offset+num_obs]
200        #params['initial_state_mean']=[quadsummary([s[0] for s in m]),
201        #                              quadsummary([s[1] for s in m]),
202        #                              numpy.mean([s[2] for s in m]),
203        #                              numpy.mean([s[3] for s in m])]
204        kf = KalmanFilter(n_dim_obs=4, n_dim_state=4, **params)
205        (smoothed_state_means, smoothed_state_covariances) = kf.smooth(m)
206        #print("pykalman smooth time: %f" % (time.time()-start))
207        up = numpy.mean([m[0] for m in smoothed_state_means])
208        op = numpy.mean([m[1] for m in smoothed_state_means])
209        #print("packet_rtt pykalman final:", smoothed_state_means[-1][0]-smoothed_state_means[-1][1])
210        print("packet_rtt pykalman mean:", up-op)
211        print("packet_rtt mean:", numpy.mean([s[0]-s[1] for s in m]))
212        #up = numpy.mean([m[2] for m in smoothed_state_means])
213        #op = numpy.mean([m[3] for m in smoothed_state_means])
214        #print("tsval_rtt pykalman final:", smoothed_state_means[-1][2]-smoothed_state_means[-1][3])
215        #print("tsval_rtt pykalman mean:", up-op)
216        #print("tsval_rtt mean:", numpy.mean([s[2]-s[3] for s in m]))
217
218    for offset in range(0,len(null_measurements)+num_obs,num_obs):
219        start=time.time()
220        m = null_measurements[offset:offset+num_obs]
221        #params['initial_state_mean']=[quadsummary([s[0] for s in m]),
222        #                              quadsummary([s[1] for s in m]),
223        #                              numpy.mean([s[2] for s in m]),
224        #                              numpy.mean([s[3] for s in m])]
225        kf = KalmanFilter(n_dim_obs=4, n_dim_state=4, **params)
226        (smoothed_state_means, smoothed_state_covariances) = kf.smooth(m)
227        up = numpy.mean([m[0] for m in smoothed_state_means])
228        op = numpy.mean([m[1] for m in smoothed_state_means])
229        #print("null packet_rtt pykalman final:", smoothed_state_means[-1][0]-smoothed_state_means[-1][1])
230        print("null packet_rtt pykalman mean:", up-op)
231        print("null packet_rtt mean:", numpy.mean([s[0]-s[1] for s in m]))
232        #up = numpy.mean([m[2] for m in smoothed_state_means])
233        #op = numpy.mean([m[3] for m in smoothed_state_means])
234        #print("null tsval_rtt pykalman final:", smoothed_state_means[-1][2]-smoothed_state_means[-1][3])
235        #print("null tsval_rtt pykalman mean:", up-op)
236        #print("null tsval_rtt mean:", numpy.mean([s[2]-s[3] for s in m]))
237
238       
239#testKalman4D(echo_vm_5k)
240
241
242
243def testKalman(params=None):
244    from pykalman import AdditiveUnscentedKalmanFilter,KalmanFilter
245    train = db.subseries('train','long', offset=0)
246    test = db.subseries('test','long', offset=0)
247    measurements = numpy.asarray([(s['unusual_packet'],s['other_packet']) for s in (train+test)])
248
249    #kf = KalmanFilter(transition_matrices = [[1, 1], [0, 1]], observation_matrices = [[0.1, 0.5], [-0.3, 0.0]])
250    kf = KalmanFilter(n_dim_obs=2, n_dim_state=2,
251                      initial_state_mean=[quadsummary([s['unusual_packet'] for s in train]),
252                                          quadsummary([s['other_packet'] for s in train])])
253    #kf = AdditiveUnscentedKalmanFilter(n_dim_obs=2, n_dim_state=2)
254
255    if params == None:
256        start=time.time()
257        kf = kf.em(measurements[0:len(train)], n_iter=10,
258                   em_vars=('transition_matrices',
259                            'observation_matrices',
260                            'transition_offsets',
261                            'observation_offsets',
262                            'transition_covariance',
263                            'observation_covariance',
264                            'initial_state_covariance'))
265        params = {'transition_matrices': kf.transition_matrices.tolist(),
266                  'observation_matrices': kf.observation_matrices.tolist(),
267                  'transition_offsets': kf.transition_offsets.tolist(),
268                  'observation_offsets': kf.observation_offsets.tolist(),
269                  'transition_covariance': kf.transition_covariance.tolist(),
270                  'observation_covariance': kf.observation_covariance.tolist(),
271                  'initial_state_mean': kf.initial_state_mean.tolist(),
272                  'initial_state_covariance': kf.initial_state_covariance.tolist()}
273        print("Learned Params:\n")
274        import pprint
275        pprint.pprint(params)
276        print("pykalman em time: %f" % (time.time()-start))
277       
278    #kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, **params)
279
280    num_obs=10000
281    for offset in range(50000,100000+num_obs,num_obs):
282        start=time.time()
283        kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, **params)
284        m = measurements[offset:offset+num_obs]
285        (smoothed_state_means, smoothed_state_covariances) = kf.smooth(m)
286        print("pykalman smooth time: %f" % (time.time()-start))
287        up = numpy.mean([m[0] for m in smoothed_state_means])
288        op = numpy.mean([m[1] for m in smoothed_state_means])
289        print("packet_rtt pykalman final:", smoothed_state_means[-1][0]-smoothed_state_means[-1][1])
290        print("packet_rtt pykalman mean:", up-op)
291        print("packet_rtt mean:", numpy.mean([s[0]-s[1] for s in m]))
292
293
294#testKalman(ten_iter)
295
296
297def getTCPTSPrecision():
298    cursor = db.conn.cursor()
299    query="""SELECT tcpts_mean FROM meta"""
300    cursor.execute(query)
301    row = cursor.fetchone()
302    if row:
303        return row[0]
304    return None
305
306
307def tsFilteredHistogram():
308    tcpts_precision = getTCPTSPrecision()
309   
310    num_bins = 500
311    all = db.subseries('train','long')+db.subseries('test','long')
312    diffs     = [s['unusual_packet']-s['other_packet'] for s in all]
313    ts0_diffs = [s['unusual_packet']-s['other_packet'] for s in all if s['unusual_tsval']-s['other_tsval'] == 0]
314    #ts1_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(s['unusual_tsval']-s['other_tsval']) > 0]
315    #ts2_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision)) <= 1.0]
316    ts1_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) == 1]
317    ts2_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) >= 2]
318    #ts3_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) == 3]
319    #ts4_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) == 4]
320
321    #ts_mode = statistics.mode([s['unusual_tsval'] for s in all]+[s['other_tsval'] for s in all])
322    #ts_diff_mode = statistics.mode([s['unusual_tsval']-s['other_tsval'] for s in all])
323    #ts_common_mode = [s['unusual_packet']-s['other_packet'] for s in all if s['unusual_tsval']<=ts_mode and s['other_tsval']<=ts_mode]
324    #ts_common_diff_mode = [s['unusual_packet']-s['other_packet'] for s in all if s['unusual_tsval']-s['other_tsval']==ts_diff_mode]
325
326    #print('packet_rtt diff quadsummary: %f' % quadsummary(diffs))
327    #print('packet_rtt tsval diff=0 quadsummary: %f' % quadsummary(ts0_diffs))
328    #print('packet_rtt tsval diff>0 quadsummary: %f' % quadsummary(ts1_diffs))
329    #print('packet_rtt tsval diff<=1 quadsummary: %f' % quadsummary(ts2_diffs))
330    #print('packet_rtt tsval mode quadsummary: %f' % quadsummary(ts_common_mode))
331    #print(len(diffs), len(ts0_diffs)+len(ts1_diffs))
332    diffs.sort()
333    cut_off_low = diffs[int(len(diffs)*0.008)]
334    cut_off_high = diffs[int(len(diffs)*0.992)]
335
336    plt.clf()
337    # the histogram of the data
338    n, bins, patches = plt.hist(diffs, num_bins, normed=0, color='black', histtype='step', alpha=0.8,
339                                range=(cut_off_low,cut_off_high), label='All Packets')
340    n, bins, patches = plt.hist(ts0_diffs, num_bins, normed=0, color='blue', histtype='step', alpha=0.8,
341                                range=(cut_off_low,cut_off_high), label='TSval Difference == 0')
342    n, bins, patches = plt.hist(ts1_diffs, num_bins, normed=0, color='orange', histtype='step', alpha=0.8,
343                                range=(cut_off_low,cut_off_high), label='TSval Difference == 1')
344    n, bins, patches = plt.hist(ts2_diffs, num_bins, normed=0, color='red', histtype='step', alpha=0.8,
345                                range=(cut_off_low,cut_off_high), label='TSval Difference >= 2')
346    #n, bins, patches = plt.hist(ts3_diffs, num_bins, normed=0, color='red', histtype='step', alpha=0.8,
347    #                            range=(cut_off_low,cut_off_high), label='tsval diff == 3')
348    #n, bins, patches = plt.hist(ts4_diffs, num_bins, normed=0, color='brown', histtype='step', alpha=0.8,
349    #                            range=(cut_off_low,cut_off_high), label='tsval diff == 4')
350    #n, bins, patches = plt.hist(ts_common_mode, num_bins, normed=0, color='green', histtype='step', alpha=0.8,
351    #                            range=(cut_off_low,cut_off_high), label='tsval common mode')
352    #n, bins, patches = plt.hist(ts_common_diff_mode, num_bins, normed=0, color='green', histtype='step', alpha=0.8,
353    #                            range=(cut_off_low,cut_off_high), label='tsval common diff mode')
354    plt.xlabel('RTT Difference')
355    #plt.ylabel('Probability')
356    #plt.title(r'Histogram - distribution of differences by tsval')
357
358    # Tweak spacing to prevent clipping of ylabel
359    plt.subplots_adjust(left=0.15)
360    plt.legend()
361    plt.show()
362    #plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
363
364#tsFilteredHistogram()
365
366
367def exampleSummaryHistogram():
368    num_bins = 300
369    all = db.subseries('train','long')+db.subseries('test','long')
370    diffs     = [s['unusual_packet']-s['other_packet'] for s in all]
371
372    diffs.sort()
373    cut_off_low = diffs[int(len(diffs)*0.003)]
374    cut_off_high = diffs[int(len(diffs)*0.997)]
375
376    plt.clf()
377    # the histogram of the data
378    n, bins, patches = plt.hist(diffs, num_bins, normed=0, color='black', histtype='step', alpha=0.8,
379                                range=(cut_off_low,cut_off_high), label='all')
380
381    plt.xlabel('RTT Difference')
382    plt.ylabel('Probability')
383    #plt.title(r'Histogram - distribution of differences by tsval')
384
385    w = 25
386    l1,r1,l2,r2,l3,r3 = numpy.percentile(diffs, (50-w,50+w,50-w/2,50+w/2,(50-w)/2,(50+w)/2+50))
387    #plt.plot([l1, 0], [l1, 0.0001], "k--")
388    #plt.plot([r1, 0], [r1, 0.0001], "k--")
389    from matplotlib.patches import FancyBboxPatch
390    currentAxis = plt.gca()
391    currentAxis.add_patch(FancyBboxPatch((l1, 0), 2500, 5000, boxstyle='square', facecolor="blue", alpha=0.4, edgecolor='none'))
392    currentAxis.add_patch(FancyBboxPatch((r1, 0), 2500, 5000, boxstyle='square', facecolor="blue", alpha=0.4, edgecolor='none'))
393    currentAxis.add_patch(FancyBboxPatch((l2, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
394    currentAxis.add_patch(FancyBboxPatch((r2, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
395    currentAxis.add_patch(FancyBboxPatch((l3, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
396    currentAxis.add_patch(FancyBboxPatch((r3, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
397    currentAxis.add_patch(FancyBboxPatch((50, 0), 2500, 5000, boxstyle='square', facecolor="black", alpha=0.4, edgecolor='none'))
398    currentAxis.add_patch(FancyBboxPatch((numpy.mean((l1,r1,l2,r2)), 0), 2500, 5000, boxstyle='square', facecolor="red", alpha=0.4, edgecolor='none'))
399    #currentAxis.add_patch(FancyBboxPatch((numpy.mean((1000)), 0), 1500, 5000, boxstyle='square', facecolor="black", alpha=0.4, edgecolor='none'))
400
401    # Tweak spacing to prevent clipping of ylabel
402    plt.subplots_adjust(left=0.15)
403    #plt.legend()
404    plt.show()
405    #plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
406
407#exampleSummaryHistogram()
408
409
410
411#all_data = longs+shorts
412#all_data.sort()
413#cut_off_low = all_data[0]
414#cut_off_high = all_data[int(len(all_data)*0.997)]
415
416
417def plotSingleProbe(probe_id=None):
418    if probe_id == None:
419        cursor = db.conn.cursor()
420        query="""SELECT probe_id FROM analysis WHERE suspect='' ORDER BY probe_id DESC limit 1 OFFSET 10"""
421        cursor.execute(query)
422        probe_id = cursor.fetchone()[0]
423   
424    cursor = db.conn.cursor()
425    query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=1"""
426    cursor.execute(query, (probe_id,))
427    pkts = cursor.fetchall()
428    sent_payload = [row[0] for row in pkts if row[1] != 0]
429    sent_other = [row[0] for row in pkts if row[1] == 0]
430   
431    query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=0"""
432    cursor.execute(query, (probe_id,))
433    pkts = cursor.fetchall()
434    rcvd_payload = [row[0] for row in pkts if row[1] != 0]
435    rcvd_other = [row[0] for row in pkts if row[1] == 0]
436   
437    #query="""SELECT reported,time_of_day FROM probes WHERE id=?"""
438    #cursor.execute(query, (probe_id,))
439    #reported,tod = cursor.fetchone()
440    #userspace_times = [sent_times[0]-reported/3.0, sent_times[0]+reported]
441
442    print("single probe counts:",len(sent_payload),len(sent_other),len(rcvd_payload),len(rcvd_other))
443    plt.clf()
444    plt.title("Single HTTP Request - Packet Times")
445    sp = plt.eventplot(sent_payload, colors=('red',), lineoffsets=8, linewidths=2, alpha=0.6,label='sent')
446    so = plt.eventplot(sent_other, colors=('red',), lineoffsets=6, linewidths=2, alpha=0.6,label='sent')
447    rp = plt.eventplot(rcvd_payload, colors=('blue',), lineoffsets=4, linewidths=2, alpha=0.6,label='received')
448    ro = plt.eventplot(rcvd_other, colors=('blue',), lineoffsets=2, linewidths=2, alpha=0.6,label='received')
449    #plt.legend((s,r), ('sent','received'))
450    #plt.savefig('../img/http-packet-times.svg')
451    plt.show()
452
453#plotSingleProbe()
454
455
456def graphTestResults():
457    basename = os.path.basename(options.db_file)
458    basename,ext = os.path.splitext(basename)
459
460    chartname = "/home/tim/blindspot/research/timing-analysis/paper/figures/results/%s.svg" % (basename)
461    print(chartname)
462   
463    plt.clf()
464    plt.title("Test Results")
465    plt.xlabel('sample size')
466    plt.ylabel('error rate')
467    legend = []
468    colors = ['red','blue','green','purple','orange','black','brown']
469    color_id = 0
470
471    cursor = db.conn.cursor()
472    query = """
473      SELECT classifier FROM classifier_results GROUP BY classifier ORDER BY classifier;
474    """
475    cursor.execute(query)
476    classifiers = []
477    for c in cursor:
478        classifiers.append(c[0])
479
480    best_obs = []
481    best_error = []
482    max_obs = 0
483    for classifier in classifiers:
484        query="""
485        SELECT params,num_observations FROM classifier_results
486        WHERE trial_type='test'
487         AND classifier=:classifier
488         AND (false_positives+false_negatives)/2.0 < 5.0
489        ORDER BY num_observations,(false_positives+false_negatives)
490        LIMIT 1
491        """
492        cursor.execute(query, {'classifier':classifier})
493        row = cursor.fetchone()
494        if row == None:
495            query="""
496            SELECT params,(false_positives+false_negatives)/2 FROM classifier_results
497            WHERE trial_type='test' and classifier=:classifier
498            ORDER BY (false_positives+false_negatives),num_observations
499            LIMIT 1
500            """
501            cursor.execute(query, {'classifier':classifier})
502            row = cursor.fetchone()
503            if row == None:
504                sys.stderr.write("WARN: couldn't find test results for classifier '%s'.\n" % classifier)
505                continue
506
507            best_error.append((row[1], classifier))
508        else:
509            best_obs.append((row[1], classifier))
510
511        best_params = row[0]
512        query="""
513        SELECT num_observations,(false_positives+false_negatives)/2.0 FROM classifier_results
514        WHERE trial_type='test'
515         AND classifier=:classifier
516         AND params=:params
517        ORDER BY num_observations
518        """
519        cursor.execute(query, {'classifier':classifier,'params':best_params})
520
521        num_obs = []
522        performance = []
523        for row in cursor:
524            max_obs = max(max_obs, row[0])
525            num_obs.append(row[0])
526            performance.append(row[1])
527        #print(num_obs,performance)
528        path = plt.scatter(num_obs, performance, color=colors[color_id], s=4, alpha=0.8, linewidths=3.0)
529        plt.plot(num_obs, performance, color=colors[color_id], alpha=0.8)
530        legend.append((classifier,path))
531        color_id = (color_id+1) % len(colors)
532
533    best_obs.sort()
534    best_error.sort()
535    winner = None
536    for bo in best_obs:
537        sys.stdout.write("%d obs / %s" % bo)
538        if winner == None:
539            sys.stdout.write(" (winner)")
540            winner = bo
541        print()
542       
543    for be in best_error:
544        sys.stdout.write("%f%% error / %s" % be)
545        if winner == None:
546            sys.stdout.write(" (winner)")
547            winner = be
548        print()
549   
550    plt.legend([l[1] for l in legend], [l[0] for l in legend], scatterpoints=1, fontsize='x-small')
551    plt.plot([0, max_obs], [5.0, 5.0], "k--")
552    plt.xlabel('Number of Observations')
553    plt.ylabel('Error Rate')
554    plt.savefig(chartname)
555    #plt.show()
556   
557graphTestResults()
558
559sys.exit(0)
560
561short_overtime = [(sample['time_of_day'],sample['short']) for sample in timeSeries(db,'train','short')]
562long_overtime = [(sample['time_of_day'],sample['long']) for sample in timeSeries(db,'train','long')]
563diff_overtime = [(sample['time_of_day'],sample['long']-sample['other_cases']) for sample in timeSeries(db,'train','long')]
564short_overtime.sort()
565long_overtime.sort()
566diff_overtime.sort()
567
568plt.clf()
569plt.title("Packet RTT over time")
570plt.xlabel('Time of Day')
571plt.ylabel('RTT')
572s = plt.scatter([t for t,rtt in short_overtime], [rtt for t,rtt in short_overtime], s=1, color='red', alpha=0.6)
573l = plt.scatter([t for t,rtt in long_overtime], [rtt for t,rtt in long_overtime], s=1, color='blue', alpha=0.6)
574d = plt.scatter([t for t,rtt in diff_overtime], [rtt for t,rtt in diff_overtime], s=1, color='purple', alpha=0.6)
575plt.legend((s,l,d), ('short','long','difference'), scatterpoints=1)
576#plt.savefig('paper/figures/comcast-powerboost1.png')
577plt.show()
578
579
580
581plt.clf()
582plt.title("Simple HTTP Request")
583plt.xlabel('Time of Day')
584plt.ylabel('')
585s = plt.scatter(sent_times, [2]*len(sent_times), s=3, color='red', alpha=0.9)
586r = plt.scatter(rcvd_times, [1]*len(rcvd_times), s=3, color='blue', alpha=0.9)
587plt.legend((s,r), ('sent','received'), scatterpoints=1)
588plt.show()
589
590sys.exit(0)
591short_overtime,long_overtime,diff_overtime = None,None,None
592
593
594num_bins = 300
595reported_diffs.sort()
596cut_off_low = reported_diffs[int(len(diffs)*0.003)]
597cut_off_high = reported_diffs[int(len(diffs)*0.997)]
598
599plt.clf()
600# the histogram of the data
601n, bins, patches = plt.hist(reported_diffs, num_bins, normed=1, color='black', histtype='step', alpha=0.8,
602                            range=(cut_off_low,cut_off_high))
603plt.xlabel('RTT Difference')
604plt.ylabel('Probability')
605plt.title(r'Histogram - distribution of differences')
606
607# Tweak spacing to prevent clipping of ylabel
608plt.subplots_adjust(left=0.15)
609#plt.legend()
610plt.show()
611#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
612
613
614
615
616num_bins = 300
617diffs.sort()
618cut_off_low = diffs[int(len(diffs)*0.003)]
619cut_off_high = diffs[int(len(diffs)*0.997)]
620
621plt.clf()
622# the histogram of the data
623n, bins, patches = plt.hist(diffs, num_bins, normed=1, color='purple', histtype='step', alpha=0.8,
624                            range=(cut_off_low,cut_off_high))
625plt.xlabel('RTT Difference')
626plt.ylabel('Probability')
627plt.title(r'Histogram - distribution of differences')
628
629# Tweak spacing to prevent clipping of ylabel
630plt.subplots_adjust(left=0.15)
631#plt.legend()
632plt.show()
633#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
634
635sys.exit(0)
636
637
638
639num_bins = 150
640# the histogram of the data
641n, bins, patches = plt.hist((shorts,longs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8,
642                            range=(cut_off_low,cut_off_high))
643#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
644# add a 'best fit' line
645#y = mlab.normpdf(bins, mu, sigma)
646#plt.plot(bins, y, 'r--')
647plt.xlabel('packet_rtt')
648plt.ylabel('Probability')
649plt.title(r'Histogram - RTT short and long')
650
651# Tweak spacing to prevent clipping of ylabel
652plt.subplots_adjust(left=0.15)
653plt.legend()
654#plt.show()
655plt.savefig('paper/figures/comcast-powerboost2.svg')
656
657
658
659
660num_trials = 200
661
662
663subsample_sizes = (50,150,300,500,700,1000,2000,3000,5000,7000,10000,15000,20000)
664estimator = functools.partial(boxTest, 0.07, 0.08)
665performance = []
666for subsample_size in subsample_sizes:
667    estimates = bootstrap(derived, subsample_size, num_trials, estimator)
668    performance.append(100.0*len([e for e in estimates if e == 1])/num_trials)
669
670null_performance = []
671for subsample_size in subsample_sizes:
672    null_estimates = bootstrap(null_derived, subsample_size, num_trials, estimator)
673    null_performance.append(100.0*len([e for e in null_estimates if e == 0])/num_trials)
674
675plt.clf()
676plt.title("boxTest bootstrap")
677plt.xlabel('sample size')
678plt.ylabel('performance')
679plt.scatter(subsample_sizes, performance, s=2, color='red', alpha=0.6)
680plt.scatter(subsample_sizes, null_performance, s=2, color='blue', alpha=0.6)
681plt.show()
682
683
684
685subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
686estimator = diffMedian
687performance = []
688for subsample_size in subsample_sizes:
689    estimates = bootstrap(derived, subsample_size, num_trials, estimator)
690    performance.append(100.0*len([e for e in estimates if e > expected_mean*0.9 and e < expected_mean*1.1])/num_trials)
691
692plt.clf()
693plt.title("diff median bootstrap")
694plt.xlabel('sample size')
695plt.ylabel('performance')
696plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
697plt.show()
698
699
700
701
702subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
703weight_funcs = (linearWeights, prunedWeights)
704for wf in weight_funcs:
705    estimator = functools.partial(estimateMean, hypotenuse, wf, 0.40)
706    performance = []
707    for subsample_size in subsample_sizes:
708        estimates = bootstrap(derived, subsample_size, num_trials, estimator)
709        performance.append(100.0*len([e for e in estimates if e > expected_mean*0.9 and e < expected_mean*1.1])/num_trials)
710
711    plt.clf()
712    plt.title(repr(wf))
713    plt.xlabel('sample size')
714    plt.ylabel('performance')
715    plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
716    plt.show()
717
718
719
720num_bins = 300
721# the histogram of the data
722n, bins, patches = plt.hist((tsshorts,tslongs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8)
723#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
724# add a 'best fit' line
725#y = mlab.normpdf(bins, mu, sigma)
726#plt.plot(bins, y, 'r--')
727plt.xlabel('packet_rtt')
728plt.ylabel('Probability')
729plt.title(r'Histogram - tsval_rtt short vs long')
730
731# Tweak spacing to prevent clipping of ylabel
732plt.subplots_adjust(left=0.15)
733plt.legend()
734plt.show()
735
736
737
738   
739####
740#trust_methods = [min,max,sum,difference,product]
741trust_methods = [sum,product,hypotenuse]
742colors = ['red','blue','green','purple','orange','black']
743weight_methods = [prunedWeights, linearWeights]
744alphas = [i/100.0 for i in range(0,100,2)]
745
746
747
748
749plt.clf()
750plt.title(r'Trust Method Comparison - Linear')
751plt.xlabel('Alpha')
752plt.ylabel('Mean error')
753paths = []
754for tm in trust_methods:
755    trust = trustValues(derived, tm)
756    series = []
757    for alpha in alphas:
758        weights = linearWeights(derived, trust, alpha)
759        series.append(weightedMean(derived, weights) - expected_mean)
760
761    paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
762
763plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
764plt.show()
765
766
767
768plt.clf()
769plt.title(r'Trust Method Comparison - Pruned')
770plt.xlabel('Alpha')
771plt.ylabel('Mean error')
772paths = []
773for tm in trust_methods:
774    trust = trustValues(derived, tm)
775    series = []
776    for alpha in alphas:
777        weights = prunedWeights(derived, trust, alpha)
778        series.append(weightedMean(derived, weights) - expected_mean)
779
780    paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
781
782plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
783plt.show()
784
785
786sys.exit(0)
787
788plt.clf()
789plt.title(r'Trust Method Comparison - Inverted')
790plt.xlabel('Alpha')
791plt.ylabel('Mean error')
792paths = []
793for tm in trust_methods:
794    trust = trustValues(derived, tm)
795    series = []
796    for alpha in alphas:
797        weights = invertedWeights(derived, trust, alpha)
798        series.append(weightedMean(derived, weights) - expected_mean)
799
800    paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
801
802plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
803plt.show()
804
805
806plt.clf()
807plt.title(r'Trust Method Comparison - Arctangent')
808plt.xlabel('Alpha')
809plt.ylabel('Mean error')
810paths = []
811for tm in trust_methods:
812    trust = trustValues(derived, tm)
813    series = []
814    for alpha in alphas:
815        weights = arctanWeights(derived, trust, alpha)
816        series.append(weightedMean(derived, weights) - expected_mean)
817
818    paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
819
820plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
821plt.show()
Note: See TracBrowser for help on using the repository browser.