source: trunk/bin/graph @ 9

Last change on this file since 9 was 6, checked in by tim, 9 years ago

.

  • Property svn:executable set to *
File size: 10.8 KB
Line 
1#!/usr/bin/env python3
2
3import sys
4import os
5import time
6import random
7import tempfile
8import argparse
9import socket
10import json
11
12import matplotlib.mlab as mlab
13import matplotlib.pyplot as plt
14
15
16VERSION = "{DEVELOPMENT}"
17if VERSION == "{DEVELOPMENT}":
18    script_dir = '.'
19    try:
20        script_dir = os.path.dirname(os.path.realpath(__file__))
21    except:
22        try:
23            script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
24        except:
25            pass
26    sys.path.append("%s/../lib" % script_dir)
27
28from nanownlib import *
29from nanownlib.stats import *
30import nanownlib.storage
31
32
33parser = argparse.ArgumentParser(
34    description="")
35parser.add_argument('db_file', default=None,
36                    help='')
37options = parser.parse_args()
38db = nanownlib.storage.db(options.db_file)
39
40
41def differences(db, unusual_case, column='packet_rtt'):
42    cursor = db.conn.cursor()
43    query="""
44      SELECT %(column)s-(SELECT avg(%(column)s) FROM probes,analysis
45                         WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type in ('train','test') AND sample=u.sample)
46      FROM (SELECT probes.sample,%(column)s FROM probes,analysis
47                         WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type in ('train','test')) u
48      """ % {"column":column}
49    params = {"unusual_case":unusual_case}
50    cursor.execute(query, params)
51    for row in cursor:
52        yield row[0]
53
54
55def timeSeries(db, probe_type, unusual_case):
56    cursor = db.conn.cursor()
57    query="""
58      SELECT time_of_day,packet_rtt AS uc,(SELECT avg(packet_rtt) FROM probes,analysis
59                                           WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS oc
60      FROM (SELECT time_of_day,probes.sample,packet_rtt FROM probes,analysis
61                                           WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
62    """
63   
64    params = {"probe_type":probe_type,"unusual_case":unusual_case}
65    cursor.execute(query, params)
66    for row in cursor:
67        yield {'time_of_day':row['time_of_day'],unusual_case:row['uc'],'other_cases':row['oc']}
68#samples,derived,null_derived = parse_data(input1)
69
70#trust = trustValues(derived, sum)
71#weights = linearWeights(derived, trust, 0.25)
72#print('(test): %f' % weightedMean(derived,weights))
73
74diffs = list(differences(db, 'long'))
75reported_diffs = list(differences(db, 'long', 'reported'))
76#shorts = [s['packet_rtt'] for s in samples.values() if s['test_case']=='short']
77#longs = [s['packet_rtt'] for s in samples.values() if s['test_case']=='long']
78
79short_overtime = [(sample['time_of_day'],sample['short']) for sample in timeSeries(db,'train','short')]
80long_overtime = [(sample['time_of_day'],sample['long']) for sample in timeSeries(db,'train','long')]
81diff_overtime = [(sample['time_of_day'],sample['long']-sample['other_cases']) for sample in timeSeries(db,'train','long')]
82short_overtime.sort()
83long_overtime.sort()
84diff_overtime.sort()
85
86print('packet_rtt diff median: %f' % statistics.median(diffs))
87print('packet_rtt diff midhinge: %f' % midhinge(diffs))
88print('packet_rtt diff trimean: %f' % trimean(diffs))
89print('packet_rtt diff MAD: %f' % mad(diffs))
90print('reported diff trimean: %f' % trimean(reported_diffs))
91print('reported diff MAD: %f' % mad(reported_diffs))
92
93
94#all_data = longs+shorts
95#all_data.sort()
96#cut_off_low = all_data[0]
97#cut_off_high = all_data[int(len(all_data)*0.997)]
98
99
100plt.clf()
101plt.title("Packet RTT over time")
102plt.xlabel('Time of Day')
103plt.ylabel('RTT')
104s = plt.scatter([t for t,rtt in short_overtime], [rtt for t,rtt in short_overtime], s=1, color='red', alpha=0.6)
105l = plt.scatter([t for t,rtt in long_overtime], [rtt for t,rtt in long_overtime], s=1, color='blue', alpha=0.6)
106d = plt.scatter([t for t,rtt in diff_overtime], [rtt for t,rtt in diff_overtime], s=1, color='purple', alpha=0.6)
107plt.legend((s,l,d), ('short','long','difference'), scatterpoints=1)
108#plt.savefig('paper/figures/comcast-powerboost1.png')
109plt.show()
110
111short_overtime,long_overtime,diff_overtime = None,None,None
112
113
114num_bins = 300
115reported_diffs.sort()
116cut_off_low = reported_diffs[int(len(diffs)*0.003)]
117cut_off_high = reported_diffs[int(len(diffs)*0.997)]
118
119plt.clf()
120# the histogram of the data
121n, bins, patches = plt.hist(reported_diffs, num_bins, normed=1, color='black', histtype='step', alpha=0.8,
122                            range=(cut_off_low,cut_off_high))
123plt.xlabel('RTT Difference')
124plt.ylabel('Probability')
125plt.title(r'Histogram - distribution of differences')
126
127# Tweak spacing to prevent clipping of ylabel
128plt.subplots_adjust(left=0.15)
129#plt.legend()
130plt.show()
131#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
132
133
134
135
136num_bins = 300
137diffs.sort()
138cut_off_low = diffs[int(len(diffs)*0.003)]
139cut_off_high = diffs[int(len(diffs)*0.997)]
140
141plt.clf()
142# the histogram of the data
143n, bins, patches = plt.hist(diffs, num_bins, normed=1, color='purple', histtype='step', alpha=0.8,
144                            range=(cut_off_low,cut_off_high))
145plt.xlabel('RTT Difference')
146plt.ylabel('Probability')
147plt.title(r'Histogram - distribution of differences')
148
149# Tweak spacing to prevent clipping of ylabel
150plt.subplots_adjust(left=0.15)
151#plt.legend()
152plt.show()
153#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
154
155sys.exit(0)
156
157
158
159num_bins = 150
160# the histogram of the data
161n, bins, patches = plt.hist((shorts,longs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8,
162                            range=(cut_off_low,cut_off_high))
163#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
164# add a 'best fit' line
165#y = mlab.normpdf(bins, mu, sigma)
166#plt.plot(bins, y, 'r--')
167plt.xlabel('packet_rtt')
168plt.ylabel('Probability')
169plt.title(r'Histogram - RTT short and long')
170
171# Tweak spacing to prevent clipping of ylabel
172plt.subplots_adjust(left=0.15)
173plt.legend()
174#plt.show()
175plt.savefig('paper/figures/comcast-powerboost2.svg')
176
177
178
179
180num_trials = 200
181
182
183subsample_sizes = (50,150,300,500,700,1000,2000,3000,5000,7000,10000,15000,20000)
184estimator = functools.partial(boxTest, 0.07, 0.08)
185performance = []
186for subsample_size in subsample_sizes:
187    estimates = bootstrap(derived, subsample_size, num_trials, estimator)
188    performance.append(100.0*len([e for e in estimates if e == 1])/num_trials)
189
190null_performance = []
191for subsample_size in subsample_sizes:
192    null_estimates = bootstrap(null_derived, subsample_size, num_trials, estimator)
193    null_performance.append(100.0*len([e for e in null_estimates if e == 0])/num_trials)
194
195plt.clf()
196plt.title("boxTest bootstrap")
197plt.xlabel('sample size')
198plt.ylabel('performance')
199plt.scatter(subsample_sizes, performance, s=2, color='red', alpha=0.6)
200plt.scatter(subsample_sizes, null_performance, s=2, color='blue', alpha=0.6)
201plt.show()
202
203
204
205subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
206estimator = diffMedian
207performance = []
208for subsample_size in subsample_sizes:
209    estimates = bootstrap(derived, subsample_size, num_trials, estimator)
210    performance.append(100.0*len([e for e in estimates if e > expected_mean*0.9 and e < expected_mean*1.1])/num_trials)
211
212plt.clf()
213plt.title("diff median bootstrap")
214plt.xlabel('sample size')
215plt.ylabel('performance')
216plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
217plt.show()
218
219
220
221
222subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
223weight_funcs = (linearWeights, prunedWeights)
224for wf in weight_funcs:
225    estimator = functools.partial(estimateMean, hypotenuse, wf, 0.40)
226    performance = []
227    for subsample_size in subsample_sizes:
228        estimates = bootstrap(derived, subsample_size, num_trials, estimator)
229        performance.append(100.0*len([e for e in estimates if e > expected_mean*0.9 and e < expected_mean*1.1])/num_trials)
230
231    plt.clf()
232    plt.title(repr(wf))
233    plt.xlabel('sample size')
234    plt.ylabel('performance')
235    plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
236    plt.show()
237
238
239
240num_bins = 300
241# the histogram of the data
242n, bins, patches = plt.hist((tsshorts,tslongs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8)
243#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
244# add a 'best fit' line
245#y = mlab.normpdf(bins, mu, sigma)
246#plt.plot(bins, y, 'r--')
247plt.xlabel('packet_rtt')
248plt.ylabel('Probability')
249plt.title(r'Histogram - tsval_rtt short vs long')
250
251# Tweak spacing to prevent clipping of ylabel
252plt.subplots_adjust(left=0.15)
253plt.legend()
254plt.show()
255
256
257
258   
259####
260#trust_methods = [min,max,sum,difference,product]
261trust_methods = [sum,product,hypotenuse]
262colors = ['red','blue','green','purple','orange','black']
263weight_methods = [prunedWeights, linearWeights]
264alphas = [i/100.0 for i in range(0,100,2)]
265
266
267
268
269plt.clf()
270plt.title(r'Trust Method Comparison - Linear')
271plt.xlabel('Alpha')
272plt.ylabel('Mean error')
273paths = []
274for tm in trust_methods:
275    trust = trustValues(derived, tm)
276    series = []
277    for alpha in alphas:
278        weights = linearWeights(derived, trust, alpha)
279        series.append(weightedMean(derived, weights) - expected_mean)
280
281    paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
282
283plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
284plt.show()
285
286
287
288plt.clf()
289plt.title(r'Trust Method Comparison - Pruned')
290plt.xlabel('Alpha')
291plt.ylabel('Mean error')
292paths = []
293for tm in trust_methods:
294    trust = trustValues(derived, tm)
295    series = []
296    for alpha in alphas:
297        weights = prunedWeights(derived, trust, alpha)
298        series.append(weightedMean(derived, weights) - expected_mean)
299
300    paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
301
302plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
303plt.show()
304
305
306sys.exit(0)
307
308plt.clf()
309plt.title(r'Trust Method Comparison - Inverted')
310plt.xlabel('Alpha')
311plt.ylabel('Mean error')
312paths = []
313for tm in trust_methods:
314    trust = trustValues(derived, tm)
315    series = []
316    for alpha in alphas:
317        weights = invertedWeights(derived, trust, alpha)
318        series.append(weightedMean(derived, weights) - expected_mean)
319
320    paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
321
322plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
323plt.show()
324
325
326plt.clf()
327plt.title(r'Trust Method Comparison - Arctangent')
328plt.xlabel('Alpha')
329plt.ylabel('Mean error')
330paths = []
331for tm in trust_methods:
332    trust = trustValues(derived, tm)
333    series = []
334    for alpha in alphas:
335        weights = arctanWeights(derived, trust, alpha)
336        series.append(weightedMean(derived, weights) - expected_mean)
337
338    paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
339
340plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
341plt.show()
Note: See TracBrowser for help on using the repository browser.