Context Navigation

source: trunk/bin/graph @ 8

Last change on this file since 8 was 6, checked in by tim, 10 years ago
.
Property svn:executable set to ``*
File size: 10.8 KB

Line
1	#!/usr/bin/env python3
2
3	import sys
4	import os
5	import time
6	import random
7	import tempfile
8	import argparse
9	import socket
10	import json
11
12	import matplotlib.mlab as mlab
13	import matplotlib.pyplot as plt
14
15
16	VERSION = "{DEVELOPMENT}"
17	if VERSION == "{DEVELOPMENT}":
18	script_dir = '.'
19	try:
20	script_dir = os.path.dirname(os.path.realpath(__file__))
21	except:
22	try:
23	script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
24	except:
25	pass
26	sys.path.append("%s/../lib" % script_dir)
27
28	from nanownlib import *
29	from nanownlib.stats import *
30	import nanownlib.storage
31
32
33	parser = argparse.ArgumentParser(
34	description="")
35	parser.add_argument('db_file', default=None,
36	help='')
37	options = parser.parse_args()
38	db = nanownlib.storage.db(options.db_file)
39
40
41	def differences(db, unusual_case, column='packet_rtt'):
42	cursor = db.conn.cursor()
43	query="""
44	SELECT %(column)s-(SELECT avg(%(column)s) FROM probes,analysis
45	WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type in ('train','test') AND sample=u.sample)
46	FROM (SELECT probes.sample,%(column)s FROM probes,analysis
47	WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type in ('train','test')) u
48	""" % {"column":column}
49	params = {"unusual_case":unusual_case}
50	cursor.execute(query, params)
51	for row in cursor:
52	yield row[0]
53
54
55	def timeSeries(db, probe_type, unusual_case):
56	cursor = db.conn.cursor()
57	query="""
58	SELECT time_of_day,packet_rtt AS uc,(SELECT avg(packet_rtt) FROM probes,analysis
59	WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS oc
60	FROM (SELECT time_of_day,probes.sample,packet_rtt FROM probes,analysis
61	WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
62	"""
63
64	params = {"probe_type":probe_type,"unusual_case":unusual_case}
65	cursor.execute(query, params)
66	for row in cursor:
67	yield {'time_of_day':row['time_of_day'],unusual_case:row['uc'],'other_cases':row['oc']}
68	#samples,derived,null_derived = parse_data(input1)
69
70	#trust = trustValues(derived, sum)
71	#weights = linearWeights(derived, trust, 0.25)
72	#print('(test): %f' % weightedMean(derived,weights))
73
74	diffs = list(differences(db, 'long'))
75	reported_diffs = list(differences(db, 'long', 'reported'))
76	#shorts = [s['packet_rtt'] for s in samples.values() if s['test_case']=='short']
77	#longs = [s['packet_rtt'] for s in samples.values() if s['test_case']=='long']
78
79	short_overtime = [(sample['time_of_day'],sample['short']) for sample in timeSeries(db,'train','short')]
80	long_overtime = [(sample['time_of_day'],sample['long']) for sample in timeSeries(db,'train','long')]
81	diff_overtime = [(sample['time_of_day'],sample['long']-sample['other_cases']) for sample in timeSeries(db,'train','long')]
82	short_overtime.sort()
83	long_overtime.sort()
84	diff_overtime.sort()
85
86	print('packet_rtt diff median: %f' % statistics.median(diffs))
87	print('packet_rtt diff midhinge: %f' % midhinge(diffs))
88	print('packet_rtt diff trimean: %f' % trimean(diffs))
89	print('packet_rtt diff MAD: %f' % mad(diffs))
90	print('reported diff trimean: %f' % trimean(reported_diffs))
91	print('reported diff MAD: %f' % mad(reported_diffs))
92
93
94	#all_data = longs+shorts
95	#all_data.sort()
96	#cut_off_low = all_data[0]
97	#cut_off_high = all_data[int(len(all_data)*0.997)]
98
99
100	plt.clf()
101	plt.title("Packet RTT over time")
102	plt.xlabel('Time of Day')
103	plt.ylabel('RTT')
104	s = plt.scatter([t for t,rtt in short_overtime], [rtt for t,rtt in short_overtime], s=1, color='red', alpha=0.6)
105	l = plt.scatter([t for t,rtt in long_overtime], [rtt for t,rtt in long_overtime], s=1, color='blue', alpha=0.6)
106	d = plt.scatter([t for t,rtt in diff_overtime], [rtt for t,rtt in diff_overtime], s=1, color='purple', alpha=0.6)
107	plt.legend((s,l,d), ('short','long','difference'), scatterpoints=1)
108	#plt.savefig('paper/figures/comcast-powerboost1.png')
109	plt.show()
110
111	short_overtime,long_overtime,diff_overtime = None,None,None
112
113
114	num_bins = 300
115	reported_diffs.sort()
116	cut_off_low = reported_diffs[int(len(diffs)*0.003)]
117	cut_off_high = reported_diffs[int(len(diffs)*0.997)]
118
119	plt.clf()
120	# the histogram of the data
121	n, bins, patches = plt.hist(reported_diffs, num_bins, normed=1, color='black', histtype='step', alpha=0.8,
122	range=(cut_off_low,cut_off_high))
123	plt.xlabel('RTT Difference')
124	plt.ylabel('Probability')
125	plt.title(r'Histogram - distribution of differences')
126
127	# Tweak spacing to prevent clipping of ylabel
128	plt.subplots_adjust(left=0.15)
129	#plt.legend()
130	plt.show()
131	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
132
133
134
135
136	num_bins = 300
137	diffs.sort()
138	cut_off_low = diffs[int(len(diffs)*0.003)]
139	cut_off_high = diffs[int(len(diffs)*0.997)]
140
141	plt.clf()
142	# the histogram of the data
143	n, bins, patches = plt.hist(diffs, num_bins, normed=1, color='purple', histtype='step', alpha=0.8,
144	range=(cut_off_low,cut_off_high))
145	plt.xlabel('RTT Difference')
146	plt.ylabel('Probability')
147	plt.title(r'Histogram - distribution of differences')
148
149	# Tweak spacing to prevent clipping of ylabel
150	plt.subplots_adjust(left=0.15)
151	#plt.legend()
152	plt.show()
153	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
154
155	sys.exit(0)
156
157
158
159	num_bins = 150
160	# the histogram of the data
161	n, bins, patches = plt.hist((shorts,longs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8,
162	range=(cut_off_low,cut_off_high))
163	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
164	# add a 'best fit' line
165	#y = mlab.normpdf(bins, mu, sigma)
166	#plt.plot(bins, y, 'r--')
167	plt.xlabel('packet_rtt')
168	plt.ylabel('Probability')
169	plt.title(r'Histogram - RTT short and long')
170
171	# Tweak spacing to prevent clipping of ylabel
172	plt.subplots_adjust(left=0.15)
173	plt.legend()
174	#plt.show()
175	plt.savefig('paper/figures/comcast-powerboost2.svg')
176
177
178
179
180	num_trials = 200
181
182
183	subsample_sizes = (50,150,300,500,700,1000,2000,3000,5000,7000,10000,15000,20000)
184	estimator = functools.partial(boxTest, 0.07, 0.08)
185	performance = []
186	for subsample_size in subsample_sizes:
187	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
188	performance.append(100.0*len([e for e in estimates if e == 1])/num_trials)
189
190	null_performance = []
191	for subsample_size in subsample_sizes:
192	null_estimates = bootstrap(null_derived, subsample_size, num_trials, estimator)
193	null_performance.append(100.0*len([e for e in null_estimates if e == 0])/num_trials)
194
195	plt.clf()
196	plt.title("boxTest bootstrap")
197	plt.xlabel('sample size')
198	plt.ylabel('performance')
199	plt.scatter(subsample_sizes, performance, s=2, color='red', alpha=0.6)
200	plt.scatter(subsample_sizes, null_performance, s=2, color='blue', alpha=0.6)
201	plt.show()
202
203
204
205	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
206	estimator = diffMedian
207	performance = []
208	for subsample_size in subsample_sizes:
209	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
210	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
211
212	plt.clf()
213	plt.title("diff median bootstrap")
214	plt.xlabel('sample size')
215	plt.ylabel('performance')
216	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
217	plt.show()
218
219
220
221
222	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
223	weight_funcs = (linearWeights, prunedWeights)
224	for wf in weight_funcs:
225	estimator = functools.partial(estimateMean, hypotenuse, wf, 0.40)
226	performance = []
227	for subsample_size in subsample_sizes:
228	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
229	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
230
231	plt.clf()
232	plt.title(repr(wf))
233	plt.xlabel('sample size')
234	plt.ylabel('performance')
235	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
236	plt.show()
237
238
239
240	num_bins = 300
241	# the histogram of the data
242	n, bins, patches = plt.hist((tsshorts,tslongs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8)
243	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
244	# add a 'best fit' line
245	#y = mlab.normpdf(bins, mu, sigma)
246	#plt.plot(bins, y, 'r--')
247	plt.xlabel('packet_rtt')
248	plt.ylabel('Probability')
249	plt.title(r'Histogram - tsval_rtt short vs long')
250
251	# Tweak spacing to prevent clipping of ylabel
252	plt.subplots_adjust(left=0.15)
253	plt.legend()
254	plt.show()
255
256
257
258
259	####
260	#trust_methods = [min,max,sum,difference,product]
261	trust_methods = [sum,product,hypotenuse]
262	colors = ['red','blue','green','purple','orange','black']
263	weight_methods = [prunedWeights, linearWeights]
264	alphas = [i/100.0 for i in range(0,100,2)]
265
266
267
268
269	plt.clf()
270	plt.title(r'Trust Method Comparison - Linear')
271	plt.xlabel('Alpha')
272	plt.ylabel('Mean error')
273	paths = []
274	for tm in trust_methods:
275	trust = trustValues(derived, tm)
276	series = []
277	for alpha in alphas:
278	weights = linearWeights(derived, trust, alpha)
279	series.append(weightedMean(derived, weights) - expected_mean)
280
281	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
282
283	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
284	plt.show()
285
286
287
288	plt.clf()
289	plt.title(r'Trust Method Comparison - Pruned')
290	plt.xlabel('Alpha')
291	plt.ylabel('Mean error')
292	paths = []
293	for tm in trust_methods:
294	trust = trustValues(derived, tm)
295	series = []
296	for alpha in alphas:
297	weights = prunedWeights(derived, trust, alpha)
298	series.append(weightedMean(derived, weights) - expected_mean)
299
300	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
301
302	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
303	plt.show()
304
305
306	sys.exit(0)
307
308	plt.clf()
309	plt.title(r'Trust Method Comparison - Inverted')
310	plt.xlabel('Alpha')
311	plt.ylabel('Mean error')
312	paths = []
313	for tm in trust_methods:
314	trust = trustValues(derived, tm)
315	series = []
316	for alpha in alphas:
317	weights = invertedWeights(derived, trust, alpha)
318	series.append(weightedMean(derived, weights) - expected_mean)
319
320	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
321
322	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
323	plt.show()
324
325
326	plt.clf()
327	plt.title(r'Trust Method Comparison - Arctangent')
328	plt.xlabel('Alpha')
329	plt.ylabel('Mean error')
330	paths = []
331	for tm in trust_methods:
332	trust = trustValues(derived, tm)
333	series = []
334	for alpha in alphas:
335	weights = arctanWeights(derived, trust, alpha)
336	series.append(weightedMean(derived, weights) - expected_mean)
337
338	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
339
340	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
341	plt.show()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: