source: trunk/bin/graph @ 11

Last change on this file since 11 was 11, checked in by tim, 9 years ago

.

  • Property svn:executable set to *
File size: 16.2 KB
Line 
1#!/usr/bin/env python3
2
3import sys
4import os
5import time
6import random
7import tempfile
8import argparse
9import socket
10import json
11
12import numpy
13import matplotlib.mlab as mlab
14import matplotlib.pyplot as plt
15
16
17VERSION = "{DEVELOPMENT}"
18if VERSION == "{DEVELOPMENT}":
19    script_dir = '.'
20    try:
21        script_dir = os.path.dirname(os.path.realpath(__file__))
22    except:
23        try:
24            script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
25        except:
26            pass
27    sys.path.append("%s/../lib" % script_dir)
28
29from nanownlib import *
30from nanownlib.stats import *
31import nanownlib.storage
32
33
34parser = argparse.ArgumentParser(
35    description="")
36parser.add_argument('db_file', default=None,
37                    help='')
38options = parser.parse_args()
39db = nanownlib.storage.db(options.db_file)
40
41
42def differences(db, unusual_case, rtt_type='packet'):
43    ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train', unusual_case)]
44    ret_val += [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('test', unusual_case)]
45    return ret_val
46
47def null_differences(db, unusual_case, rtt_type='packet'):
48    ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train_null', unusual_case)]
49    return ret_val
50
51
52def timeSeries(db, probe_type, unusual_case):
53    cursor = db.conn.cursor()
54    query="""
55      SELECT time_of_day,packet_rtt AS uc,(SELECT avg(packet_rtt) FROM probes,analysis
56                                           WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS oc
57      FROM (SELECT time_of_day,probes.sample,packet_rtt FROM probes,analysis
58                                           WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
59    """
60   
61    params = {"probe_type":probe_type,"unusual_case":unusual_case}
62    cursor.execute(query, params)
63    for row in cursor:
64        yield {'time_of_day':row['time_of_day'],unusual_case:row['uc'],'other_cases':row['oc']}
65#samples,derived,null_derived = parse_data(input1)
66
67#trust = trustValues(derived, sum)
68#weights = linearWeights(derived, trust, 0.25)
69#print('(test): %f' % weightedMean(derived,weights))
70
71diffs = list(differences(db, 'long'))
72reported_diffs = list(differences(db, 'long', 'reported'))
73#shorts = [s['packet_rtt'] for s in samples.values() if s['test_case']=='short']
74#longs = [s['packet_rtt'] for s in samples.values() if s['test_case']=='long']
75
76short_overtime = [(sample['time_of_day'],sample['short']) for sample in timeSeries(db,'train','short')]
77long_overtime = [(sample['time_of_day'],sample['long']) for sample in timeSeries(db,'train','long')]
78diff_overtime = [(sample['time_of_day'],sample['long']-sample['other_cases']) for sample in timeSeries(db,'train','long')]
79short_overtime.sort()
80long_overtime.sort()
81diff_overtime.sort()
82
83print('packet_rtt diff median: %f' % statistics.median(diffs))
84print('packet_rtt diff midhinge: %f' % midsummary(diffs))
85print('packet_rtt diff trimean: %f' % trimean(diffs))
86print('packet_rtt diff quadsummary: %f' % quadsummary(diffs))
87print('packet_rtt diff ubersummary: %f' % ubersummary(diffs))
88print('packet_rtt diff MAD: %f' % mad(diffs))
89try:
90    print('reported diff trimean: %f' % trimean(reported_diffs))
91    print('reported diff quadsummary: %f' % quadsummary(reported_diffs))
92    print('reported diff ubersummary: %f' % ubersummary(reported_diffs))
93    print('reported diff MAD: %f' % mad(reported_diffs))
94
95    import cProfile
96    start = time.time()
97    kresults = kfilter({},diffs)
98    #print('packet_rtt diff kfilter: ', numpy.mean(kresults['est']), kresults['var'])
99    print('packet_rtt diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
100    kresults = kfilter({},reported_diffs)
101    #print('reported diff kfilter: ', numpy.mean(kresults['est']), kresults['var'][-1])
102    print('reported diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
103    print("kfilter time: %f" % (time.time()-start))
104except:
105    pass
106
107print('tsval diff mean: %f' % numpy.mean(differences(db, 'long', 'tsval')))
108print('tsval null diff mean: %f' % numpy.mean(null_differences(db, 'long', 'tsval')))
109print('tsval diff weighted mean: %f' % tsvalwmean(db.subseries('train','long')+db.subseries('test','long')))
110print('tsval null diff weighted mean: %f' % tsvalwmean(db.subseries('train_null','long')))
111
112
113#all_data = longs+shorts
114#all_data.sort()
115#cut_off_low = all_data[0]
116#cut_off_high = all_data[int(len(all_data)*0.997)]
117
118
119def plotSingleProbe(probe_id=None):
120    if probe_id == None:
121        cursor = db.conn.cursor()
122        query="""SELECT probe_id FROM analysis WHERE suspect='' ORDER BY probe_id DESC limit 1 OFFSET 10"""
123        cursor.execute(query)
124        probe_id = cursor.fetchone()[0]
125   
126    cursor = db.conn.cursor()
127    query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=1"""
128    cursor.execute(query, (probe_id,))
129    pkts = cursor.fetchall()
130    sent_payload = [row[0] for row in pkts if row[1] != 0]
131    sent_other = [row[0] for row in pkts if row[1] == 0]
132   
133    query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=0"""
134    cursor.execute(query, (probe_id,))
135    pkts = cursor.fetchall()
136    rcvd_payload = [row[0] for row in pkts if row[1] != 0]
137    rcvd_other = [row[0] for row in pkts if row[1] == 0]
138   
139    #query="""SELECT reported,time_of_day FROM probes WHERE id=?"""
140    #cursor.execute(query, (probe_id,))
141    #reported,tod = cursor.fetchone()
142    #userspace_times = [sent_times[0]-reported/3.0, sent_times[0]+reported]
143
144    print("single probe counts:",len(sent_payload),len(sent_other),len(rcvd_payload),len(rcvd_other))
145    plt.clf()
146    plt.title("Single HTTP Request - Packet Times")
147    sp = plt.eventplot(sent_payload, colors=('red',), lineoffsets=8, linewidths=2, alpha=0.6,label='sent')
148    so = plt.eventplot(sent_other, colors=('red',), lineoffsets=6, linewidths=2, alpha=0.6,label='sent')
149    rp = plt.eventplot(rcvd_payload, colors=('blue',), lineoffsets=4, linewidths=2, alpha=0.6,label='received')
150    ro = plt.eventplot(rcvd_other, colors=('blue',), lineoffsets=2, linewidths=2, alpha=0.6,label='received')
151    #plt.legend((s,r), ('sent','received'))
152    #plt.savefig('../img/http-packet-times.svg')
153    plt.show()
154
155#plotSingleProbe()
156
157
158def graphTestResults():
159    plt.clf()
160    plt.title("Test Results")
161    plt.xlabel('sample size')
162    plt.ylabel('error rate')
163    legend = []
164    colors = ['red','blue','green','purple','orange','black','brown']
165    color_id = 0
166
167    cursor = db.conn.cursor()
168    query = """
169      SELECT classifier FROM classifier_results GROUP BY classifier ORDER BY classifier;
170    """
171    cursor.execute(query)
172    classifiers = []
173    for c in cursor:
174        classifiers.append(c[0])
175
176    for classifier in classifiers:
177        query="""
178        SELECT params FROM classifier_results
179        WHERE trial_type='test'
180         AND classifier=:classifier
181         AND (false_positives+false_negatives)/2.0 < 5.0
182        ORDER BY num_observations,(false_positives+false_negatives)
183        LIMIT 1
184        """
185        cursor.execute(query, {'classifier':classifier})
186        row = cursor.fetchone()
187        if row == None:
188            query="""
189            SELECT params FROM classifier_results
190            WHERE trial_type='test' and classifier=:classifier
191            ORDER BY (false_positives+false_negatives),num_observations
192            LIMIT 1
193            """
194            cursor.execute(query, {'classifier':classifier})
195            row = cursor.fetchone()
196            if row == None:
197                sys.stderr.write("WARN: couldn't find test results for classifier '%s'.\n" % classifier)
198                continue
199
200        best_params = row[0]
201        query="""
202        SELECT num_observations,(false_positives+false_negatives)/2.0 FROM classifier_results
203        WHERE trial_type='test'
204         AND classifier=:classifier
205         AND params=:params
206        ORDER BY num_observations
207        """
208        cursor.execute(query, {'classifier':classifier,'params':best_params})
209
210        num_obs = []
211        performance = []
212        for row in cursor:
213            num_obs.append(row[0])
214            performance.append(row[1])
215        #print(num_obs,performance)
216        path = plt.scatter(num_obs, performance, color=colors[color_id], s=4, alpha=0.8, linewidths=3.0)
217        plt.plot(num_obs, performance, color=colors[color_id], alpha=0.8)
218        legend.append((classifier,path))
219        color_id = (color_id+1) % len(colors)
220
221    plt.legend([l[1] for l in legend], [l[0] for l in legend], scatterpoints=1, fontsize='xx-small')
222    plt.show()
223       
224graphTestResults()
225
226sys.exit(0)
227
228plt.clf()
229plt.title("Packet RTT over time")
230plt.xlabel('Time of Day')
231plt.ylabel('RTT')
232s = plt.scatter([t for t,rtt in short_overtime], [rtt for t,rtt in short_overtime], s=1, color='red', alpha=0.6)
233l = plt.scatter([t for t,rtt in long_overtime], [rtt for t,rtt in long_overtime], s=1, color='blue', alpha=0.6)
234d = plt.scatter([t for t,rtt in diff_overtime], [rtt for t,rtt in diff_overtime], s=1, color='purple', alpha=0.6)
235plt.legend((s,l,d), ('short','long','difference'), scatterpoints=1)
236#plt.savefig('paper/figures/comcast-powerboost1.png')
237plt.show()
238
239
240
241plt.clf()
242plt.title("Simple HTTP Request")
243plt.xlabel('Time of Day')
244plt.ylabel('')
245s = plt.scatter(sent_times, [2]*len(sent_times), s=3, color='red', alpha=0.9)
246r = plt.scatter(rcvd_times, [1]*len(rcvd_times), s=3, color='blue', alpha=0.9)
247plt.legend((s,r), ('sent','received'), scatterpoints=1)
248plt.show()
249
250sys.exit(0)
251short_overtime,long_overtime,diff_overtime = None,None,None
252
253
254num_bins = 300
255reported_diffs.sort()
256cut_off_low = reported_diffs[int(len(diffs)*0.003)]
257cut_off_high = reported_diffs[int(len(diffs)*0.997)]
258
259plt.clf()
260# the histogram of the data
261n, bins, patches = plt.hist(reported_diffs, num_bins, normed=1, color='black', histtype='step', alpha=0.8,
262                            range=(cut_off_low,cut_off_high))
263plt.xlabel('RTT Difference')
264plt.ylabel('Probability')
265plt.title(r'Histogram - distribution of differences')
266
267# Tweak spacing to prevent clipping of ylabel
268plt.subplots_adjust(left=0.15)
269#plt.legend()
270plt.show()
271#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
272
273
274
275
276num_bins = 300
277diffs.sort()
278cut_off_low = diffs[int(len(diffs)*0.003)]
279cut_off_high = diffs[int(len(diffs)*0.997)]
280
281plt.clf()
282# the histogram of the data
283n, bins, patches = plt.hist(diffs, num_bins, normed=1, color='purple', histtype='step', alpha=0.8,
284                            range=(cut_off_low,cut_off_high))
285plt.xlabel('RTT Difference')
286plt.ylabel('Probability')
287plt.title(r'Histogram - distribution of differences')
288
289# Tweak spacing to prevent clipping of ylabel
290plt.subplots_adjust(left=0.15)
291#plt.legend()
292plt.show()
293#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
294
295sys.exit(0)
296
297
298
299num_bins = 150
300# the histogram of the data
301n, bins, patches = plt.hist((shorts,longs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8,
302                            range=(cut_off_low,cut_off_high))
303#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
304# add a 'best fit' line
305#y = mlab.normpdf(bins, mu, sigma)
306#plt.plot(bins, y, 'r--')
307plt.xlabel('packet_rtt')
308plt.ylabel('Probability')
309plt.title(r'Histogram - RTT short and long')
310
311# Tweak spacing to prevent clipping of ylabel
312plt.subplots_adjust(left=0.15)
313plt.legend()
314#plt.show()
315plt.savefig('paper/figures/comcast-powerboost2.svg')
316
317
318
319
320num_trials = 200
321
322
323subsample_sizes = (50,150,300,500,700,1000,2000,3000,5000,7000,10000,15000,20000)
324estimator = functools.partial(boxTest, 0.07, 0.08)
325performance = []
326for subsample_size in subsample_sizes:
327    estimates = bootstrap(derived, subsample_size, num_trials, estimator)
328    performance.append(100.0*len([e for e in estimates if e == 1])/num_trials)
329
330null_performance = []
331for subsample_size in subsample_sizes:
332    null_estimates = bootstrap(null_derived, subsample_size, num_trials, estimator)
333    null_performance.append(100.0*len([e for e in null_estimates if e == 0])/num_trials)
334
335plt.clf()
336plt.title("boxTest bootstrap")
337plt.xlabel('sample size')
338plt.ylabel('performance')
339plt.scatter(subsample_sizes, performance, s=2, color='red', alpha=0.6)
340plt.scatter(subsample_sizes, null_performance, s=2, color='blue', alpha=0.6)
341plt.show()
342
343
344
345subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
346estimator = diffMedian
347performance = []
348for subsample_size in subsample_sizes:
349    estimates = bootstrap(derived, subsample_size, num_trials, estimator)
350    performance.append(100.0*len([e for e in estimates if e > expected_mean*0.9 and e < expected_mean*1.1])/num_trials)
351
352plt.clf()
353plt.title("diff median bootstrap")
354plt.xlabel('sample size')
355plt.ylabel('performance')
356plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
357plt.show()
358
359
360
361
362subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
363weight_funcs = (linearWeights, prunedWeights)
364for wf in weight_funcs:
365    estimator = functools.partial(estimateMean, hypotenuse, wf, 0.40)
366    performance = []
367    for subsample_size in subsample_sizes:
368        estimates = bootstrap(derived, subsample_size, num_trials, estimator)
369        performance.append(100.0*len([e for e in estimates if e > expected_mean*0.9 and e < expected_mean*1.1])/num_trials)
370
371    plt.clf()
372    plt.title(repr(wf))
373    plt.xlabel('sample size')
374    plt.ylabel('performance')
375    plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
376    plt.show()
377
378
379
380num_bins = 300
381# the histogram of the data
382n, bins, patches = plt.hist((tsshorts,tslongs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8)
383#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
384# add a 'best fit' line
385#y = mlab.normpdf(bins, mu, sigma)
386#plt.plot(bins, y, 'r--')
387plt.xlabel('packet_rtt')
388plt.ylabel('Probability')
389plt.title(r'Histogram - tsval_rtt short vs long')
390
391# Tweak spacing to prevent clipping of ylabel
392plt.subplots_adjust(left=0.15)
393plt.legend()
394plt.show()
395
396
397
398   
399####
400#trust_methods = [min,max,sum,difference,product]
401trust_methods = [sum,product,hypotenuse]
402colors = ['red','blue','green','purple','orange','black']
403weight_methods = [prunedWeights, linearWeights]
404alphas = [i/100.0 for i in range(0,100,2)]
405
406
407
408
409plt.clf()
410plt.title(r'Trust Method Comparison - Linear')
411plt.xlabel('Alpha')
412plt.ylabel('Mean error')
413paths = []
414for tm in trust_methods:
415    trust = trustValues(derived, tm)
416    series = []
417    for alpha in alphas:
418        weights = linearWeights(derived, trust, alpha)
419        series.append(weightedMean(derived, weights) - expected_mean)
420
421    paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
422
423plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
424plt.show()
425
426
427
428plt.clf()
429plt.title(r'Trust Method Comparison - Pruned')
430plt.xlabel('Alpha')
431plt.ylabel('Mean error')
432paths = []
433for tm in trust_methods:
434    trust = trustValues(derived, tm)
435    series = []
436    for alpha in alphas:
437        weights = prunedWeights(derived, trust, alpha)
438        series.append(weightedMean(derived, weights) - expected_mean)
439
440    paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
441
442plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
443plt.show()
444
445
446sys.exit(0)
447
448plt.clf()
449plt.title(r'Trust Method Comparison - Inverted')
450plt.xlabel('Alpha')
451plt.ylabel('Mean error')
452paths = []
453for tm in trust_methods:
454    trust = trustValues(derived, tm)
455    series = []
456    for alpha in alphas:
457        weights = invertedWeights(derived, trust, alpha)
458        series.append(weightedMean(derived, weights) - expected_mean)
459
460    paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
461
462plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
463plt.show()
464
465
466plt.clf()
467plt.title(r'Trust Method Comparison - Arctangent')
468plt.xlabel('Alpha')
469plt.ylabel('Mean error')
470paths = []
471for tm in trust_methods:
472    trust = trustValues(derived, tm)
473    series = []
474    for alpha in alphas:
475        weights = arctanWeights(derived, trust, alpha)
476        series.append(weightedMean(derived, weights) - expected_mean)
477
478    paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
479
480plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
481plt.show()
Note: See TracBrowser for help on using the repository browser.