Context Navigation

source: trunk/bin/graph @ 11

Last change on this file since 11 was 11, checked in by tim, 9 years ago
.
Property svn:executable set to ``*
File size: 16.2 KB

Line
1	#!/usr/bin/env python3
2
3	import sys
4	import os
5	import time
6	import random
7	import tempfile
8	import argparse
9	import socket
10	import json
11
12	import numpy
13	import matplotlib.mlab as mlab
14	import matplotlib.pyplot as plt
15
16
17	VERSION = "{DEVELOPMENT}"
18	if VERSION == "{DEVELOPMENT}":
19	script_dir = '.'
20	try:
21	script_dir = os.path.dirname(os.path.realpath(__file__))
22	except:
23	try:
24	script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
25	except:
26	pass
27	sys.path.append("%s/../lib" % script_dir)
28
29	from nanownlib import *
30	from nanownlib.stats import *
31	import nanownlib.storage
32
33
34	parser = argparse.ArgumentParser(
35	description="")
36	parser.add_argument('db_file', default=None,
37	help='')
38	options = parser.parse_args()
39	db = nanownlib.storage.db(options.db_file)
40
41
42	def differences(db, unusual_case, rtt_type='packet'):
43	ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train', unusual_case)]
44	ret_val += [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('test', unusual_case)]
45	return ret_val
46
47	def null_differences(db, unusual_case, rtt_type='packet'):
48	ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train_null', unusual_case)]
49	return ret_val
50
51
52	def timeSeries(db, probe_type, unusual_case):
53	cursor = db.conn.cursor()
54	query="""
55	SELECT time_of_day,packet_rtt AS uc,(SELECT avg(packet_rtt) FROM probes,analysis
56	WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS oc
57	FROM (SELECT time_of_day,probes.sample,packet_rtt FROM probes,analysis
58	WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
59	"""
60
61	params = {"probe_type":probe_type,"unusual_case":unusual_case}
62	cursor.execute(query, params)
63	for row in cursor:
64	yield {'time_of_day':row['time_of_day'],unusual_case:row['uc'],'other_cases':row['oc']}
65	#samples,derived,null_derived = parse_data(input1)
66
67	#trust = trustValues(derived, sum)
68	#weights = linearWeights(derived, trust, 0.25)
69	#print('(test): %f' % weightedMean(derived,weights))
70
71	diffs = list(differences(db, 'long'))
72	reported_diffs = list(differences(db, 'long', 'reported'))
73	#shorts = [s['packet_rtt'] for s in samples.values() if s['test_case']=='short']
74	#longs = [s['packet_rtt'] for s in samples.values() if s['test_case']=='long']
75
76	short_overtime = [(sample['time_of_day'],sample['short']) for sample in timeSeries(db,'train','short')]
77	long_overtime = [(sample['time_of_day'],sample['long']) for sample in timeSeries(db,'train','long')]
78	diff_overtime = [(sample['time_of_day'],sample['long']-sample['other_cases']) for sample in timeSeries(db,'train','long')]
79	short_overtime.sort()
80	long_overtime.sort()
81	diff_overtime.sort()
82
83	print('packet_rtt diff median: %f' % statistics.median(diffs))
84	print('packet_rtt diff midhinge: %f' % midsummary(diffs))
85	print('packet_rtt diff trimean: %f' % trimean(diffs))
86	print('packet_rtt diff quadsummary: %f' % quadsummary(diffs))
87	print('packet_rtt diff ubersummary: %f' % ubersummary(diffs))
88	print('packet_rtt diff MAD: %f' % mad(diffs))
89	try:
90	print('reported diff trimean: %f' % trimean(reported_diffs))
91	print('reported diff quadsummary: %f' % quadsummary(reported_diffs))
92	print('reported diff ubersummary: %f' % ubersummary(reported_diffs))
93	print('reported diff MAD: %f' % mad(reported_diffs))
94
95	import cProfile
96	start = time.time()
97	kresults = kfilter({},diffs)
98	#print('packet_rtt diff kfilter: ', numpy.mean(kresults['est']), kresults['var'])
99	print('packet_rtt diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
100	kresults = kfilter({},reported_diffs)
101	#print('reported diff kfilter: ', numpy.mean(kresults['est']), kresults['var'][-1])
102	print('reported diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
103	print("kfilter time: %f" % (time.time()-start))
104	except:
105	pass
106
107	print('tsval diff mean: %f' % numpy.mean(differences(db, 'long', 'tsval')))
108	print('tsval null diff mean: %f' % numpy.mean(null_differences(db, 'long', 'tsval')))
109	print('tsval diff weighted mean: %f' % tsvalwmean(db.subseries('train','long')+db.subseries('test','long')))
110	print('tsval null diff weighted mean: %f' % tsvalwmean(db.subseries('train_null','long')))
111
112
113	#all_data = longs+shorts
114	#all_data.sort()
115	#cut_off_low = all_data[0]
116	#cut_off_high = all_data[int(len(all_data)*0.997)]
117
118
119	def plotSingleProbe(probe_id=None):
120	if probe_id == None:
121	cursor = db.conn.cursor()
122	query="""SELECT probe_id FROM analysis WHERE suspect='' ORDER BY probe_id DESC limit 1 OFFSET 10"""
123	cursor.execute(query)
124	probe_id = cursor.fetchone()[0]
125
126	cursor = db.conn.cursor()
127	query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=1"""
128	cursor.execute(query, (probe_id,))
129	pkts = cursor.fetchall()
130	sent_payload = [row[0] for row in pkts if row[1] != 0]
131	sent_other = [row[0] for row in pkts if row[1] == 0]
132
133	query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=0"""
134	cursor.execute(query, (probe_id,))
135	pkts = cursor.fetchall()
136	rcvd_payload = [row[0] for row in pkts if row[1] != 0]
137	rcvd_other = [row[0] for row in pkts if row[1] == 0]
138
139	#query="""SELECT reported,time_of_day FROM probes WHERE id=?"""
140	#cursor.execute(query, (probe_id,))
141	#reported,tod = cursor.fetchone()
142	#userspace_times = [sent_times[0]-reported/3.0, sent_times[0]+reported]
143
144	print("single probe counts:",len(sent_payload),len(sent_other),len(rcvd_payload),len(rcvd_other))
145	plt.clf()
146	plt.title("Single HTTP Request - Packet Times")
147	sp = plt.eventplot(sent_payload, colors=('red',), lineoffsets=8, linewidths=2, alpha=0.6,label='sent')
148	so = plt.eventplot(sent_other, colors=('red',), lineoffsets=6, linewidths=2, alpha=0.6,label='sent')
149	rp = plt.eventplot(rcvd_payload, colors=('blue',), lineoffsets=4, linewidths=2, alpha=0.6,label='received')
150	ro = plt.eventplot(rcvd_other, colors=('blue',), lineoffsets=2, linewidths=2, alpha=0.6,label='received')
151	#plt.legend((s,r), ('sent','received'))
152	#plt.savefig('../img/http-packet-times.svg')
153	plt.show()
154
155	#plotSingleProbe()
156
157
158	def graphTestResults():
159	plt.clf()
160	plt.title("Test Results")
161	plt.xlabel('sample size')
162	plt.ylabel('error rate')
163	legend = []
164	colors = ['red','blue','green','purple','orange','black','brown']
165	color_id = 0
166
167	cursor = db.conn.cursor()
168	query = """
169	SELECT classifier FROM classifier_results GROUP BY classifier ORDER BY classifier;
170	"""
171	cursor.execute(query)
172	classifiers = []
173	for c in cursor:
174	classifiers.append(c[0])
175
176	for classifier in classifiers:
177	query="""
178	SELECT params FROM classifier_results
179	WHERE trial_type='test'
180	AND classifier=:classifier
181	AND (false_positives+false_negatives)/2.0 < 5.0
182	ORDER BY num_observations,(false_positives+false_negatives)
183	LIMIT 1
184	"""
185	cursor.execute(query, {'classifier':classifier})
186	row = cursor.fetchone()
187	if row == None:
188	query="""
189	SELECT params FROM classifier_results
190	WHERE trial_type='test' and classifier=:classifier
191	ORDER BY (false_positives+false_negatives),num_observations
192	LIMIT 1
193	"""
194	cursor.execute(query, {'classifier':classifier})
195	row = cursor.fetchone()
196	if row == None:
197	sys.stderr.write("WARN: couldn't find test results for classifier '%s'.\n" % classifier)
198	continue
199
200	best_params = row[0]
201	query="""
202	SELECT num_observations,(false_positives+false_negatives)/2.0 FROM classifier_results
203	WHERE trial_type='test'
204	AND classifier=:classifier
205	AND params=:params
206	ORDER BY num_observations
207	"""
208	cursor.execute(query, {'classifier':classifier,'params':best_params})
209
210	num_obs = []
211	performance = []
212	for row in cursor:
213	num_obs.append(row[0])
214	performance.append(row[1])
215	#print(num_obs,performance)
216	path = plt.scatter(num_obs, performance, color=colors[color_id], s=4, alpha=0.8, linewidths=3.0)
217	plt.plot(num_obs, performance, color=colors[color_id], alpha=0.8)
218	legend.append((classifier,path))
219	color_id = (color_id+1) % len(colors)
220
221	plt.legend([l[1] for l in legend], [l[0] for l in legend], scatterpoints=1, fontsize='xx-small')
222	plt.show()
223
224	graphTestResults()
225
226	sys.exit(0)
227
228	plt.clf()
229	plt.title("Packet RTT over time")
230	plt.xlabel('Time of Day')
231	plt.ylabel('RTT')
232	s = plt.scatter([t for t,rtt in short_overtime], [rtt for t,rtt in short_overtime], s=1, color='red', alpha=0.6)
233	l = plt.scatter([t for t,rtt in long_overtime], [rtt for t,rtt in long_overtime], s=1, color='blue', alpha=0.6)
234	d = plt.scatter([t for t,rtt in diff_overtime], [rtt for t,rtt in diff_overtime], s=1, color='purple', alpha=0.6)
235	plt.legend((s,l,d), ('short','long','difference'), scatterpoints=1)
236	#plt.savefig('paper/figures/comcast-powerboost1.png')
237	plt.show()
238
239
240
241	plt.clf()
242	plt.title("Simple HTTP Request")
243	plt.xlabel('Time of Day')
244	plt.ylabel('')
245	s = plt.scatter(sent_times, [2]*len(sent_times), s=3, color='red', alpha=0.9)
246	r = plt.scatter(rcvd_times, [1]*len(rcvd_times), s=3, color='blue', alpha=0.9)
247	plt.legend((s,r), ('sent','received'), scatterpoints=1)
248	plt.show()
249
250	sys.exit(0)
251	short_overtime,long_overtime,diff_overtime = None,None,None
252
253
254	num_bins = 300
255	reported_diffs.sort()
256	cut_off_low = reported_diffs[int(len(diffs)*0.003)]
257	cut_off_high = reported_diffs[int(len(diffs)*0.997)]
258
259	plt.clf()
260	# the histogram of the data
261	n, bins, patches = plt.hist(reported_diffs, num_bins, normed=1, color='black', histtype='step', alpha=0.8,
262	range=(cut_off_low,cut_off_high))
263	plt.xlabel('RTT Difference')
264	plt.ylabel('Probability')
265	plt.title(r'Histogram - distribution of differences')
266
267	# Tweak spacing to prevent clipping of ylabel
268	plt.subplots_adjust(left=0.15)
269	#plt.legend()
270	plt.show()
271	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
272
273
274
275
276	num_bins = 300
277	diffs.sort()
278	cut_off_low = diffs[int(len(diffs)*0.003)]
279	cut_off_high = diffs[int(len(diffs)*0.997)]
280
281	plt.clf()
282	# the histogram of the data
283	n, bins, patches = plt.hist(diffs, num_bins, normed=1, color='purple', histtype='step', alpha=0.8,
284	range=(cut_off_low,cut_off_high))
285	plt.xlabel('RTT Difference')
286	plt.ylabel('Probability')
287	plt.title(r'Histogram - distribution of differences')
288
289	# Tweak spacing to prevent clipping of ylabel
290	plt.subplots_adjust(left=0.15)
291	#plt.legend()
292	plt.show()
293	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
294
295	sys.exit(0)
296
297
298
299	num_bins = 150
300	# the histogram of the data
301	n, bins, patches = plt.hist((shorts,longs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8,
302	range=(cut_off_low,cut_off_high))
303	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
304	# add a 'best fit' line
305	#y = mlab.normpdf(bins, mu, sigma)
306	#plt.plot(bins, y, 'r--')
307	plt.xlabel('packet_rtt')
308	plt.ylabel('Probability')
309	plt.title(r'Histogram - RTT short and long')
310
311	# Tweak spacing to prevent clipping of ylabel
312	plt.subplots_adjust(left=0.15)
313	plt.legend()
314	#plt.show()
315	plt.savefig('paper/figures/comcast-powerboost2.svg')
316
317
318
319
320	num_trials = 200
321
322
323	subsample_sizes = (50,150,300,500,700,1000,2000,3000,5000,7000,10000,15000,20000)
324	estimator = functools.partial(boxTest, 0.07, 0.08)
325	performance = []
326	for subsample_size in subsample_sizes:
327	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
328	performance.append(100.0*len([e for e in estimates if e == 1])/num_trials)
329
330	null_performance = []
331	for subsample_size in subsample_sizes:
332	null_estimates = bootstrap(null_derived, subsample_size, num_trials, estimator)
333	null_performance.append(100.0*len([e for e in null_estimates if e == 0])/num_trials)
334
335	plt.clf()
336	plt.title("boxTest bootstrap")
337	plt.xlabel('sample size')
338	plt.ylabel('performance')
339	plt.scatter(subsample_sizes, performance, s=2, color='red', alpha=0.6)
340	plt.scatter(subsample_sizes, null_performance, s=2, color='blue', alpha=0.6)
341	plt.show()
342
343
344
345	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
346	estimator = diffMedian
347	performance = []
348	for subsample_size in subsample_sizes:
349	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
350	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
351
352	plt.clf()
353	plt.title("diff median bootstrap")
354	plt.xlabel('sample size')
355	plt.ylabel('performance')
356	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
357	plt.show()
358
359
360
361
362	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
363	weight_funcs = (linearWeights, prunedWeights)
364	for wf in weight_funcs:
365	estimator = functools.partial(estimateMean, hypotenuse, wf, 0.40)
366	performance = []
367	for subsample_size in subsample_sizes:
368	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
369	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
370
371	plt.clf()
372	plt.title(repr(wf))
373	plt.xlabel('sample size')
374	plt.ylabel('performance')
375	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
376	plt.show()
377
378
379
380	num_bins = 300
381	# the histogram of the data
382	n, bins, patches = plt.hist((tsshorts,tslongs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8)
383	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
384	# add a 'best fit' line
385	#y = mlab.normpdf(bins, mu, sigma)
386	#plt.plot(bins, y, 'r--')
387	plt.xlabel('packet_rtt')
388	plt.ylabel('Probability')
389	plt.title(r'Histogram - tsval_rtt short vs long')
390
391	# Tweak spacing to prevent clipping of ylabel
392	plt.subplots_adjust(left=0.15)
393	plt.legend()
394	plt.show()
395
396
397
398
399	####
400	#trust_methods = [min,max,sum,difference,product]
401	trust_methods = [sum,product,hypotenuse]
402	colors = ['red','blue','green','purple','orange','black']
403	weight_methods = [prunedWeights, linearWeights]
404	alphas = [i/100.0 for i in range(0,100,2)]
405
406
407
408
409	plt.clf()
410	plt.title(r'Trust Method Comparison - Linear')
411	plt.xlabel('Alpha')
412	plt.ylabel('Mean error')
413	paths = []
414	for tm in trust_methods:
415	trust = trustValues(derived, tm)
416	series = []
417	for alpha in alphas:
418	weights = linearWeights(derived, trust, alpha)
419	series.append(weightedMean(derived, weights) - expected_mean)
420
421	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
422
423	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
424	plt.show()
425
426
427
428	plt.clf()
429	plt.title(r'Trust Method Comparison - Pruned')
430	plt.xlabel('Alpha')
431	plt.ylabel('Mean error')
432	paths = []
433	for tm in trust_methods:
434	trust = trustValues(derived, tm)
435	series = []
436	for alpha in alphas:
437	weights = prunedWeights(derived, trust, alpha)
438	series.append(weightedMean(derived, weights) - expected_mean)
439
440	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
441
442	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
443	plt.show()
444
445
446	sys.exit(0)
447
448	plt.clf()
449	plt.title(r'Trust Method Comparison - Inverted')
450	plt.xlabel('Alpha')
451	plt.ylabel('Mean error')
452	paths = []
453	for tm in trust_methods:
454	trust = trustValues(derived, tm)
455	series = []
456	for alpha in alphas:
457	weights = invertedWeights(derived, trust, alpha)
458	series.append(weightedMean(derived, weights) - expected_mean)
459
460	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
461
462	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
463	plt.show()
464
465
466	plt.clf()
467	plt.title(r'Trust Method Comparison - Arctangent')
468	plt.xlabel('Alpha')
469	plt.ylabel('Mean error')
470	paths = []
471	for tm in trust_methods:
472	trust = trustValues(derived, tm)
473	series = []
474	for alpha in alphas:
475	weights = arctanWeights(derived, trust, alpha)
476	series.append(weightedMean(derived, weights) - expected_mean)
477
478	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
479
480	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
481	plt.show()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: