Context Navigation

graph @ 24

Last change on this file since 24 was 16, checked in by tim, 9 years ago
.
Property svn:executable set to ``*
File size: 32.4 KB

Line
1	#!/usr/bin/env python3
2
3	import sys
4	import os
5	import time
6	import random
7	import tempfile
8	import argparse
9	import socket
10	import json
11
12	import numpy
13	import matplotlib.mlab as mlab
14	import matplotlib.pyplot as plt
15
16
17	VERSION = "{DEVELOPMENT}"
18	if VERSION == "{DEVELOPMENT}":
19	script_dir = '.'
20	try:
21	script_dir = os.path.dirname(os.path.realpath(__file__))
22	except:
23	try:
24	script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
25	except:
26	pass
27	sys.path.append("%s/../lib" % script_dir)
28
29	from nanownlib import *
30	from nanownlib.stats import *
31	import nanownlib.storage
32
33
34	parser = argparse.ArgumentParser(
35	description="")
36	parser.add_argument('db_file', default=None,
37	help='')
38	parser.add_argument('unusual_case', nargs='?', type=str, default=None,
39	help='The test case that is most unusual from the others. (default: auto detect)')
40	options = parser.parse_args()
41	db = nanownlib.storage.db(options.db_file)
42	if options.unusual_case == None:
43	unusual_case,delta = findUnusualTestCase(db)
44
45
46	def differences(db, unusual_case, rtt_type='packet'):
47	ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train', unusual_case)]
48	ret_val += [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('test', unusual_case)]
49	return ret_val
50
51	def null_differences(db, unusual_case, rtt_type='packet'):
52	ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train_null', unusual_case)]
53	return ret_val
54
55
56	def timeSeries(db, probe_type, unusual_case):
57	cursor = db.conn.cursor()
58	query="""
59	SELECT time_of_day,packet_rtt AS uc,(SELECT avg(packet_rtt) FROM probes,analysis
60	WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS oc
61	FROM (SELECT time_of_day,probes.sample,packet_rtt FROM probes,analysis
62	WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
63	"""
64
65	params = {"probe_type":probe_type,"unusual_case":unusual_case}
66	cursor.execute(query, params)
67	for row in cursor:
68	yield {'time_of_day':row['time_of_day'],unusual_case:row['uc'],'other_cases':row['oc']}
69	#samples,derived,null_derived = parse_data(input1)
70
71	#trust = trustValues(derived, sum)
72	#weights = linearWeights(derived, trust, 0.25)
73	#print('(test): %f' % weightedMean(derived,weights))
74
75	diffs = list(differences(db, unusual_case))
76	reported_diffs = list(differences(db, unusual_case, 'reported'))
77	#shorts = [s['packet_rtt'] for s in samples.values() if s['test_case']=='short']
78	#longs = [s['packet_rtt'] for s in samples.values() if s['test_case']=='long']
79
80
81	def basicStatistics():
82	print('packet_rtt diff midhinge: %10.2f' % midsummary(diffs))
83	print('packet_rtt diff quadsummary: %10.2f' % quadsummary(diffs))
84	print('packet_rtt diff septasummary: %10.2f' % septasummary(diffs))
85	print('packet_rtt diff MAD: %10.2f' % mad(diffs))
86	try:
87	print('reported diff midhinge: %10.2f' % midsummary(reported_diffs))
88	print('reported diff quadsummary: %10.2f' % quadsummary(reported_diffs))
89	print('reported diff septasummary: %10.2f' % septasummary(reported_diffs))
90	print('reported diff MAD: %10.2f' % mad(reported_diffs))
91
92	#import cProfile
93	#start = time.time()
94	#kresults = kfilter({},diffs)
95	#print('packet_rtt diff kfilter: ', numpy.mean(kresults['est']), kresults['var'])
96	#print('packet_rtt diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
97	#kresults = kfilter({},reported_diffs)
98	#print('reported diff kfilter: ', numpy.mean(kresults['est']), kresults['var'][-1])
99	#print('reported diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
100	#print("kfilter time: %f" % (time.time()-start))
101	except:
102	pass
103
104	#print('tsval diff mean: %f' % numpy.mean(differences(db, 'long', 'tsval')))
105	#print('tsval null diff mean: %f' % numpy.mean(null_differences(db, 'long', 'tsval')))
106	#print('tsval diff weighted mean: %f' % tsvalwmean(db.subseries('train','long')+db.subseries('test','long')))
107	#print('tsval null diff weighted mean: %f' % tsvalwmean(db.subseries('train_null','long')))
108
109	basicStatistics()
110
111	def exampleBoxTestHistogram(low,high):
112	num_bins = 300
113	all = db.subseries('train',unusual_case)+db.subseries('test',unusual_case)
114	s = [s['other_packet'] for s in all]
115	l = [s['unusual_packet'] for s in all]
116
117	s_low,s_high = numpy.percentile(s, (low,high))
118	l_low,l_high = numpy.percentile(l, (low,high))
119
120	s.sort()
121	cut_off_low = s[int(len(diffs)*0.002)]
122	cut_off_high = s[int(len(diffs)*0.998)]
123
124	plt.clf()
125	# the histogram of the data
126	#n, bins, patches = plt.hist(s, num_bins, normed=1, color='blue', histtype='step', alpha=0.8,
127	# label='Test Case 1')
128	#n, bins, patches = plt.hist(l, num_bins, normed=1, color='red', histtype='step', alpha=0.8,
129	# label='Test Case 2')
130	#
131	n, bins, patches = plt.hist((s,l), num_bins, normed=1, color=('blue','red'), histtype='step', alpha=0.8,
132	label=('Test Case 1','Test Case 2'), range=(cut_off_low,cut_off_high))
133
134	from matplotlib.patches import FancyBboxPatch
135	currentAxis = plt.gca()
136	currentAxis.add_patch(FancyBboxPatch((s_low, 0), s_high-s_low, 0.0001, boxstyle='square', facecolor="blue", alpha=0.4))
137	currentAxis.add_patch(FancyBboxPatch((l_low, 0), l_high-l_low, 0.0001, boxstyle='square', facecolor="red", alpha=0.4))
138
139
140	plt.xlabel('RTT Difference')
141	plt.ylabel('Probability')
142	#plt.title(r'Box Test Example - Overlapping Boxes')
143
144	# Tweak spacing to prevent clipping of ylabel
145	plt.subplots_adjust(left=0.15)
146	plt.legend()
147	plt.show()
148	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
149
150
151	#exampleBoxTestHistogram(6,8)
152
153
154	def testKalman4D(params=None):
155	from pykalman import KalmanFilter
156	train = db.subseries('train','long', offset=0)
157	test = db.subseries('test','long', offset=0)
158	null = db.subseries('train_null','long', offset=0)
159	measurements = numpy.asarray([(s['unusual_packet'],s['other_packet'],s['unusual_tsval'],s['other_tsval']) for s in (train+test)])
160	null_measurements = numpy.asarray([(s['unusual_packet'],s['other_packet'],s['unusual_tsval'],s['other_tsval']) for s in null])
161
162	if params == None:
163	kf = KalmanFilter(n_dim_obs=4, n_dim_state=4,
164	initial_state_mean=[quadsummary([s['unusual_packet'] for s in train]),
165	quadsummary([s['other_packet'] for s in train]),
166	numpy.mean([s['unusual_tsval'] for s in train]),
167	numpy.mean([s['other_tsval'] for s in train])])
168	kf = KalmanFilter(n_dim_obs=4, n_dim_state=4)
169
170	start=time.time()
171	kf = kf.em(measurements[0:len(train)]+null_measurements[0:50000], n_iter=10,
172	em_vars=('transition_matrices',
173	'observation_matrices',
174	'transition_offsets',
175	'observation_offsets',
176	'transition_covariance',
177	'observation_covariance',
178	'initial_state_mean',
179	'initial_state_covariance'))
180	params = {'transition_matrices': kf.transition_matrices.tolist(),
181	'observation_matrices': kf.observation_matrices.tolist(),
182	'transition_offsets': kf.transition_offsets.tolist(),
183	'observation_offsets': kf.observation_offsets.tolist(),
184	'transition_covariance': kf.transition_covariance.tolist(),
185	'observation_covariance': kf.observation_covariance.tolist(),
186	'initial_state_mean': kf.initial_state_mean.tolist(),
187	'initial_state_covariance': kf.initial_state_covariance.tolist()}
188	print("Learned Params:\n")
189	import pprint
190	pprint.pprint(params)
191	print("pykalman em time: %f" % (time.time()-start))
192
193	#kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, **params)
194
195	num_obs=5000
196	for offset in range(50000,100000+num_obs,num_obs):
197	start=time.time()
198	m = measurements[offset:offset+num_obs]
199	#params['initial_state_mean']=[quadsummary([s[0] for s in m]),
200	# quadsummary([s[1] for s in m]),
201	# numpy.mean([s[2] for s in m]),
202	# numpy.mean([s[3] for s in m])]
203	kf = KalmanFilter(n_dim_obs=4, n_dim_state=4, **params)
204	(smoothed_state_means, smoothed_state_covariances) = kf.smooth(m)
205	#print("pykalman smooth time: %f" % (time.time()-start))
206	up = numpy.mean([m[0] for m in smoothed_state_means])
207	op = numpy.mean([m[1] for m in smoothed_state_means])
208	#print("packet_rtt pykalman final:", smoothed_state_means[-1][0]-smoothed_state_means[-1][1])
209	print("packet_rtt pykalman mean:", up-op)
210	print("packet_rtt mean:", numpy.mean([s[0]-s[1] for s in m]))
211	#up = numpy.mean([m[2] for m in smoothed_state_means])
212	#op = numpy.mean([m[3] for m in smoothed_state_means])
213	#print("tsval_rtt pykalman final:", smoothed_state_means[-1][2]-smoothed_state_means[-1][3])
214	#print("tsval_rtt pykalman mean:", up-op)
215	#print("tsval_rtt mean:", numpy.mean([s[2]-s[3] for s in m]))
216
217	for offset in range(0,len(null_measurements)+num_obs,num_obs):
218	start=time.time()
219	m = null_measurements[offset:offset+num_obs]
220	#params['initial_state_mean']=[quadsummary([s[0] for s in m]),
221	# quadsummary([s[1] for s in m]),
222	# numpy.mean([s[2] for s in m]),
223	# numpy.mean([s[3] for s in m])]
224	kf = KalmanFilter(n_dim_obs=4, n_dim_state=4, **params)
225	(smoothed_state_means, smoothed_state_covariances) = kf.smooth(m)
226	up = numpy.mean([m[0] for m in smoothed_state_means])
227	op = numpy.mean([m[1] for m in smoothed_state_means])
228	#print("null packet_rtt pykalman final:", smoothed_state_means[-1][0]-smoothed_state_means[-1][1])
229	print("null packet_rtt pykalman mean:", up-op)
230	print("null packet_rtt mean:", numpy.mean([s[0]-s[1] for s in m]))
231	#up = numpy.mean([m[2] for m in smoothed_state_means])
232	#op = numpy.mean([m[3] for m in smoothed_state_means])
233	#print("null tsval_rtt pykalman final:", smoothed_state_means[-1][2]-smoothed_state_means[-1][3])
234	#print("null tsval_rtt pykalman mean:", up-op)
235	#print("null tsval_rtt mean:", numpy.mean([s[2]-s[3] for s in m]))
236
237
238	#testKalman4D(echo_vm_5k)
239
240
241
242	def testKalman(params=None):
243	from pykalman import AdditiveUnscentedKalmanFilter,KalmanFilter
244	train = db.subseries('train','long', offset=0)
245	test = db.subseries('test','long', offset=0)
246	measurements = numpy.asarray([(s['unusual_packet'],s['other_packet']) for s in (train+test)])
247
248	#kf = KalmanFilter(transition_matrices = [[1, 1], [0, 1]], observation_matrices = [[0.1, 0.5], [-0.3, 0.0]])
249	kf = KalmanFilter(n_dim_obs=2, n_dim_state=2,
250	initial_state_mean=[quadsummary([s['unusual_packet'] for s in train]),
251	quadsummary([s['other_packet'] for s in train])])
252	#kf = AdditiveUnscentedKalmanFilter(n_dim_obs=2, n_dim_state=2)
253
254	if params == None:
255	start=time.time()
256	kf = kf.em(measurements[0:len(train)], n_iter=10,
257	em_vars=('transition_matrices',
258	'observation_matrices',
259	'transition_offsets',
260	'observation_offsets',
261	'transition_covariance',
262	'observation_covariance',
263	'initial_state_covariance'))
264	params = {'transition_matrices': kf.transition_matrices.tolist(),
265	'observation_matrices': kf.observation_matrices.tolist(),
266	'transition_offsets': kf.transition_offsets.tolist(),
267	'observation_offsets': kf.observation_offsets.tolist(),
268	'transition_covariance': kf.transition_covariance.tolist(),
269	'observation_covariance': kf.observation_covariance.tolist(),
270	'initial_state_mean': kf.initial_state_mean.tolist(),
271	'initial_state_covariance': kf.initial_state_covariance.tolist()}
272	print("Learned Params:\n")
273	import pprint
274	pprint.pprint(params)
275	print("pykalman em time: %f" % (time.time()-start))
276
277	#kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, **params)
278
279	num_obs=10000
280	for offset in range(50000,100000+num_obs,num_obs):
281	start=time.time()
282	kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, **params)
283	m = measurements[offset:offset+num_obs]
284	(smoothed_state_means, smoothed_state_covariances) = kf.smooth(m)
285	print("pykalman smooth time: %f" % (time.time()-start))
286	up = numpy.mean([m[0] for m in smoothed_state_means])
287	op = numpy.mean([m[1] for m in smoothed_state_means])
288	print("packet_rtt pykalman final:", smoothed_state_means[-1][0]-smoothed_state_means[-1][1])
289	print("packet_rtt pykalman mean:", up-op)
290	print("packet_rtt mean:", numpy.mean([s[0]-s[1] for s in m]))
291
292
293	#testKalman(ten_iter)
294
295
296	def getTCPTSPrecision():
297	cursor = db.conn.cursor()
298	query="""SELECT tcpts_mean FROM meta"""
299	cursor.execute(query)
300	row = cursor.fetchone()
301	if row:
302	return row[0]
303	return None
304
305
306	def tsFilteredHistogram():
307	tcpts_precision = getTCPTSPrecision()
308
309	num_bins = 500
310	all = db.subseries('train','long')+db.subseries('test','long')
311	diffs = [s['unusual_packet']-s['other_packet'] for s in all]
312	ts0_diffs = [s['unusual_packet']-s['other_packet'] for s in all if s['unusual_tsval']-s['other_tsval'] == 0]
313	#ts1_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(s['unusual_tsval']-s['other_tsval']) > 0]
314	#ts2_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision)) <= 1.0]
315	ts1_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) == 1]
316	ts2_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) >= 2]
317	#ts3_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) == 3]
318	#ts4_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) == 4]
319
320	#ts_mode = statistics.mode([s['unusual_tsval'] for s in all]+[s['other_tsval'] for s in all])
321	#ts_diff_mode = statistics.mode([s['unusual_tsval']-s['other_tsval'] for s in all])
322	#ts_common_mode = [s['unusual_packet']-s['other_packet'] for s in all if s['unusual_tsval']<=ts_mode and s['other_tsval']<=ts_mode]
323	#ts_common_diff_mode = [s['unusual_packet']-s['other_packet'] for s in all if s['unusual_tsval']-s['other_tsval']==ts_diff_mode]
324
325	#print('packet_rtt diff quadsummary: %f' % quadsummary(diffs))
326	#print('packet_rtt tsval diff=0 quadsummary: %f' % quadsummary(ts0_diffs))
327	#print('packet_rtt tsval diff>0 quadsummary: %f' % quadsummary(ts1_diffs))
328	#print('packet_rtt tsval diff<=1 quadsummary: %f' % quadsummary(ts2_diffs))
329	#print('packet_rtt tsval mode quadsummary: %f' % quadsummary(ts_common_mode))
330	#print(len(diffs), len(ts0_diffs)+len(ts1_diffs))
331	diffs.sort()
332	cut_off_low = diffs[int(len(diffs)*0.008)]
333	cut_off_high = diffs[int(len(diffs)*0.992)]
334
335	plt.clf()
336	# the histogram of the data
337	n, bins, patches = plt.hist(diffs, num_bins, normed=0, color='black', histtype='step', alpha=0.8,
338	range=(cut_off_low,cut_off_high), label='All Packets')
339	n, bins, patches = plt.hist(ts0_diffs, num_bins, normed=0, color='blue', histtype='step', alpha=0.8,
340	range=(cut_off_low,cut_off_high), label='TSval Difference == 0')
341	n, bins, patches = plt.hist(ts1_diffs, num_bins, normed=0, color='orange', histtype='step', alpha=0.8,
342	range=(cut_off_low,cut_off_high), label='TSval Difference == 1')
343	n, bins, patches = plt.hist(ts2_diffs, num_bins, normed=0, color='red', histtype='step', alpha=0.8,
344	range=(cut_off_low,cut_off_high), label='TSval Difference >= 2')
345	#n, bins, patches = plt.hist(ts3_diffs, num_bins, normed=0, color='red', histtype='step', alpha=0.8,
346	# range=(cut_off_low,cut_off_high), label='tsval diff == 3')
347	#n, bins, patches = plt.hist(ts4_diffs, num_bins, normed=0, color='brown', histtype='step', alpha=0.8,
348	# range=(cut_off_low,cut_off_high), label='tsval diff == 4')
349	#n, bins, patches = plt.hist(ts_common_mode, num_bins, normed=0, color='green', histtype='step', alpha=0.8,
350	# range=(cut_off_low,cut_off_high), label='tsval common mode')
351	#n, bins, patches = plt.hist(ts_common_diff_mode, num_bins, normed=0, color='green', histtype='step', alpha=0.8,
352	# range=(cut_off_low,cut_off_high), label='tsval common diff mode')
353	plt.xlabel('RTT Difference')
354	#plt.ylabel('Probability')
355	#plt.title(r'Histogram - distribution of differences by tsval')
356
357	# Tweak spacing to prevent clipping of ylabel
358	plt.subplots_adjust(left=0.15)
359	plt.legend()
360	plt.show()
361	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
362
363	tsFilteredHistogram()
364
365
366	def exampleSummaryHistogram():
367	num_bins = 300
368	all = db.subseries('train','long')+db.subseries('test','long')
369	diffs = [s['unusual_packet']-s['other_packet'] for s in all]
370
371	diffs.sort()
372	cut_off_low = diffs[int(len(diffs)*0.003)]
373	cut_off_high = diffs[int(len(diffs)*0.997)]
374
375	plt.clf()
376	# the histogram of the data
377	n, bins, patches = plt.hist(diffs, num_bins, normed=0, color='black', histtype='step', alpha=0.8,
378	range=(cut_off_low,cut_off_high), label='all')
379
380	plt.xlabel('RTT Difference')
381	plt.ylabel('Probability')
382	#plt.title(r'Histogram - distribution of differences by tsval')
383
384	w = 25
385	l1,r1,l2,r2,l3,r3 = numpy.percentile(diffs, (50-w,50+w,50-w/2,50+w/2,(50-w)/2,(50+w)/2+50))
386	#plt.plot([l1, 0], [l1, 0.0001], "k--")
387	#plt.plot([r1, 0], [r1, 0.0001], "k--")
388	from matplotlib.patches import FancyBboxPatch
389	currentAxis = plt.gca()
390	currentAxis.add_patch(FancyBboxPatch((l1, 0), 2500, 5000, boxstyle='square', facecolor="blue", alpha=0.4, edgecolor='none'))
391	currentAxis.add_patch(FancyBboxPatch((r1, 0), 2500, 5000, boxstyle='square', facecolor="blue", alpha=0.4, edgecolor='none'))
392	currentAxis.add_patch(FancyBboxPatch((l2, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
393	currentAxis.add_patch(FancyBboxPatch((r2, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
394	currentAxis.add_patch(FancyBboxPatch((l3, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
395	currentAxis.add_patch(FancyBboxPatch((r3, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
396	currentAxis.add_patch(FancyBboxPatch((50, 0), 2500, 5000, boxstyle='square', facecolor="black", alpha=0.4, edgecolor='none'))
397	currentAxis.add_patch(FancyBboxPatch((numpy.mean((l1,r1,l2,r2)), 0), 2500, 5000, boxstyle='square', facecolor="red", alpha=0.4, edgecolor='none'))
398	#currentAxis.add_patch(FancyBboxPatch((numpy.mean((1000)), 0), 1500, 5000, boxstyle='square', facecolor="black", alpha=0.4, edgecolor='none'))
399
400	# Tweak spacing to prevent clipping of ylabel
401	plt.subplots_adjust(left=0.15)
402	#plt.legend()
403	plt.show()
404	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
405
406	#exampleSummaryHistogram()
407
408
409
410	#all_data = longs+shorts
411	#all_data.sort()
412	#cut_off_low = all_data[0]
413	#cut_off_high = all_data[int(len(all_data)*0.997)]
414
415
416	def plotSingleProbe(probe_id=None):
417	if probe_id == None:
418	cursor = db.conn.cursor()
419	query="""SELECT probe_id FROM analysis WHERE suspect='' ORDER BY probe_id DESC limit 1 OFFSET 10"""
420	cursor.execute(query)
421	probe_id = cursor.fetchone()[0]
422
423	cursor = db.conn.cursor()
424	query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=1"""
425	cursor.execute(query, (probe_id,))
426	pkts = cursor.fetchall()
427	sent_payload = [row[0] for row in pkts if row[1] != 0]
428	sent_other = [row[0] for row in pkts if row[1] == 0]
429
430	query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=0"""
431	cursor.execute(query, (probe_id,))
432	pkts = cursor.fetchall()
433	rcvd_payload = [row[0] for row in pkts if row[1] != 0]
434	rcvd_other = [row[0] for row in pkts if row[1] == 0]
435
436	#query="""SELECT reported,time_of_day FROM probes WHERE id=?"""
437	#cursor.execute(query, (probe_id,))
438	#reported,tod = cursor.fetchone()
439	#userspace_times = [sent_times[0]-reported/3.0, sent_times[0]+reported]
440
441	print("single probe counts:",len(sent_payload),len(sent_other),len(rcvd_payload),len(rcvd_other))
442	plt.clf()
443	plt.title("Single HTTP Request - Packet Times")
444	sp = plt.eventplot(sent_payload, colors=('red',), lineoffsets=8, linewidths=2, alpha=0.6,label='sent')
445	so = plt.eventplot(sent_other, colors=('red',), lineoffsets=6, linewidths=2, alpha=0.6,label='sent')
446	rp = plt.eventplot(rcvd_payload, colors=('blue',), lineoffsets=4, linewidths=2, alpha=0.6,label='received')
447	ro = plt.eventplot(rcvd_other, colors=('blue',), lineoffsets=2, linewidths=2, alpha=0.6,label='received')
448	#plt.legend((s,r), ('sent','received'))
449	#plt.savefig('../img/http-packet-times.svg')
450	plt.show()
451
452	#plotSingleProbe()
453
454
455	def graphTestResults():
456	basename = os.path.basename(options.db_file)
457	basename,ext = os.path.splitext(basename)
458
459	chartname = "/home/tim/blindspot/research/timing-analysis/paper/figures/results/%s.svg" % (basename)
460	#print(chartname)
461
462	plt.clf()
463	plt.title("Test Results")
464	plt.xlabel('sample size')
465	plt.ylabel('error rate')
466	legend = []
467	colors = ['red','blue','green','purple','orange','black','brown']
468	color_id = 0
469
470	best_obs,best_error = evaluateTestResults(db)
471	best_obs = sorted(best_obs, key=lambda x: x['num_observations'])
472	best_error = sorted(best_error, key=lambda x: x['error'])
473	winner = None
474	for bo in best_obs:
475	sys.stdout.write("%(num_observations)d obs / %(classifier)s / %(params)s" % bo)
476	if winner == None:
477	sys.stdout.write(" (winner)")
478	winner = bo
479	print()
480
481	for be in best_error:
482	sys.stdout.write("%(error)f%% error / %(classifier)s / %(params)s" % be)
483	if winner == None:
484	sys.stdout.write(" (winner)")
485	winner = be
486	print()
487
488	all = sorted(best_obs+best_error, key=lambda x: x['classifier'])
489	max_obs = 0
490	for result in all:
491	query="""
492	SELECT num_observations,(false_positives+false_negatives)/2.0 FROM classifier_results
493	WHERE trial_type='test'
494	AND classifier=:classifier
495	AND params=:params
496	ORDER BY num_observations
497	"""
498	cursor = db.conn.cursor()
499	cursor.execute(query, {'classifier':result['classifier'],'params':result['params']})
500
501	num_obs = []
502	performance = []
503	for row in cursor:
504	max_obs = max(max_obs, row[0])
505	num_obs.append(row[0])
506	performance.append(row[1])
507	#print(num_obs,performance)
508	path = plt.scatter(num_obs, performance, color=colors[color_id], s=4, alpha=0.8, linewidths=3.0)
509	plt.plot(num_obs, performance, color=colors[color_id], alpha=0.8)
510	legend.append((result['classifier'],path))
511	color_id = (color_id+1) % len(colors)
512
513	plt.legend([l[1] for l in legend], [l[0] for l in legend], scatterpoints=1, fontsize='x-small')
514	plt.plot([0, max_obs], [5.0, 5.0], "k--")
515	plt.xlabel('Number of Observations')
516	plt.ylabel('Error Rate')
517	#plt.savefig(chartname)
518	plt.show()
519
520	graphTestResults()
521
522	sys.exit(0)
523
524	short_overtime = [(sample['time_of_day'],sample['short']) for sample in timeSeries(db,'train','short')]
525	long_overtime = [(sample['time_of_day'],sample['long']) for sample in timeSeries(db,'train','long')]
526	diff_overtime = [(sample['time_of_day'],sample['long']-sample['other_cases']) for sample in timeSeries(db,'train','long')]
527	short_overtime.sort()
528	long_overtime.sort()
529	diff_overtime.sort()
530
531	plt.clf()
532	plt.title("Packet RTT over time")
533	plt.xlabel('Time of Day')
534	plt.ylabel('RTT')
535	s = plt.scatter([t for t,rtt in short_overtime], [rtt for t,rtt in short_overtime], s=1, color='red', alpha=0.6)
536	l = plt.scatter([t for t,rtt in long_overtime], [rtt for t,rtt in long_overtime], s=1, color='blue', alpha=0.6)
537	d = plt.scatter([t for t,rtt in diff_overtime], [rtt for t,rtt in diff_overtime], s=1, color='purple', alpha=0.6)
538	plt.legend((s,l,d), ('short','long','difference'), scatterpoints=1)
539	#plt.savefig('paper/figures/comcast-powerboost1.png')
540	plt.show()
541
542
543
544	plt.clf()
545	plt.title("Simple HTTP Request")
546	plt.xlabel('Time of Day')
547	plt.ylabel('')
548	s = plt.scatter(sent_times, [2]*len(sent_times), s=3, color='red', alpha=0.9)
549	r = plt.scatter(rcvd_times, [1]*len(rcvd_times), s=3, color='blue', alpha=0.9)
550	plt.legend((s,r), ('sent','received'), scatterpoints=1)
551	plt.show()
552
553	sys.exit(0)
554	short_overtime,long_overtime,diff_overtime = None,None,None
555
556
557	num_bins = 300
558	reported_diffs.sort()
559	cut_off_low = reported_diffs[int(len(diffs)*0.003)]
560	cut_off_high = reported_diffs[int(len(diffs)*0.997)]
561
562	plt.clf()
563	# the histogram of the data
564	n, bins, patches = plt.hist(reported_diffs, num_bins, normed=1, color='black', histtype='step', alpha=0.8,
565	range=(cut_off_low,cut_off_high))
566	plt.xlabel('RTT Difference')
567	plt.ylabel('Probability')
568	plt.title(r'Histogram - distribution of differences')
569
570	# Tweak spacing to prevent clipping of ylabel
571	plt.subplots_adjust(left=0.15)
572	#plt.legend()
573	plt.show()
574	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
575
576
577
578
579	num_bins = 300
580	diffs.sort()
581	cut_off_low = diffs[int(len(diffs)*0.003)]
582	cut_off_high = diffs[int(len(diffs)*0.997)]
583
584	plt.clf()
585	# the histogram of the data
586	n, bins, patches = plt.hist(diffs, num_bins, normed=1, color='purple', histtype='step', alpha=0.8,
587	range=(cut_off_low,cut_off_high))
588	plt.xlabel('RTT Difference')
589	plt.ylabel('Probability')
590	plt.title(r'Histogram - distribution of differences')
591
592	# Tweak spacing to prevent clipping of ylabel
593	plt.subplots_adjust(left=0.15)
594	#plt.legend()
595	plt.show()
596	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
597
598	sys.exit(0)
599
600
601
602	num_bins = 150
603	# the histogram of the data
604	n, bins, patches = plt.hist((shorts,longs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8,
605	range=(cut_off_low,cut_off_high))
606	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
607	# add a 'best fit' line
608	#y = mlab.normpdf(bins, mu, sigma)
609	#plt.plot(bins, y, 'r--')
610	plt.xlabel('packet_rtt')
611	plt.ylabel('Probability')
612	plt.title(r'Histogram - RTT short and long')
613
614	# Tweak spacing to prevent clipping of ylabel
615	plt.subplots_adjust(left=0.15)
616	plt.legend()
617	#plt.show()
618	plt.savefig('paper/figures/comcast-powerboost2.svg')
619
620
621
622
623	num_trials = 200
624
625
626	subsample_sizes = (50,150,300,500,700,1000,2000,3000,5000,7000,10000,15000,20000)
627	estimator = functools.partial(boxTest, 0.07, 0.08)
628	performance = []
629	for subsample_size in subsample_sizes:
630	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
631	performance.append(100.0*len([e for e in estimates if e == 1])/num_trials)
632
633	null_performance = []
634	for subsample_size in subsample_sizes:
635	null_estimates = bootstrap(null_derived, subsample_size, num_trials, estimator)
636	null_performance.append(100.0*len([e for e in null_estimates if e == 0])/num_trials)
637
638	plt.clf()
639	plt.title("boxTest bootstrap")
640	plt.xlabel('sample size')
641	plt.ylabel('performance')
642	plt.scatter(subsample_sizes, performance, s=2, color='red', alpha=0.6)
643	plt.scatter(subsample_sizes, null_performance, s=2, color='blue', alpha=0.6)
644	plt.show()
645
646
647
648	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
649	estimator = diffMedian
650	performance = []
651	for subsample_size in subsample_sizes:
652	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
653	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
654
655	plt.clf()
656	plt.title("diff median bootstrap")
657	plt.xlabel('sample size')
658	plt.ylabel('performance')
659	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
660	plt.show()
661
662
663
664
665	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
666	weight_funcs = (linearWeights, prunedWeights)
667	for wf in weight_funcs:
668	estimator = functools.partial(estimateMean, hypotenuse, wf, 0.40)
669	performance = []
670	for subsample_size in subsample_sizes:
671	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
672	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
673
674	plt.clf()
675	plt.title(repr(wf))
676	plt.xlabel('sample size')
677	plt.ylabel('performance')
678	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
679	plt.show()
680
681
682
683	num_bins = 300
684	# the histogram of the data
685	n, bins, patches = plt.hist((tsshorts,tslongs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8)
686	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
687	# add a 'best fit' line
688	#y = mlab.normpdf(bins, mu, sigma)
689	#plt.plot(bins, y, 'r--')
690	plt.xlabel('packet_rtt')
691	plt.ylabel('Probability')
692	plt.title(r'Histogram - tsval_rtt short vs long')
693
694	# Tweak spacing to prevent clipping of ylabel
695	plt.subplots_adjust(left=0.15)
696	plt.legend()
697	plt.show()
698
699
700
701
702	####
703	#trust_methods = [min,max,sum,difference,product]
704	trust_methods = [sum,product,hypotenuse]
705	colors = ['red','blue','green','purple','orange','black']
706	weight_methods = [prunedWeights, linearWeights]
707	alphas = [i/100.0 for i in range(0,100,2)]
708
709
710
711
712	plt.clf()
713	plt.title(r'Trust Method Comparison - Linear')
714	plt.xlabel('Alpha')
715	plt.ylabel('Mean error')
716	paths = []
717	for tm in trust_methods:
718	trust = trustValues(derived, tm)
719	series = []
720	for alpha in alphas:
721	weights = linearWeights(derived, trust, alpha)
722	series.append(weightedMean(derived, weights) - expected_mean)
723
724	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
725
726	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
727	plt.show()
728
729
730
731	plt.clf()
732	plt.title(r'Trust Method Comparison - Pruned')
733	plt.xlabel('Alpha')
734	plt.ylabel('Mean error')
735	paths = []
736	for tm in trust_methods:
737	trust = trustValues(derived, tm)
738	series = []
739	for alpha in alphas:
740	weights = prunedWeights(derived, trust, alpha)
741	series.append(weightedMean(derived, weights) - expected_mean)
742
743	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
744
745	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
746	plt.show()
747
748
749	sys.exit(0)
750
751	plt.clf()
752	plt.title(r'Trust Method Comparison - Inverted')
753	plt.xlabel('Alpha')
754	plt.ylabel('Mean error')
755	paths = []
756	for tm in trust_methods:
757	trust = trustValues(derived, tm)
758	series = []
759	for alpha in alphas:
760	weights = invertedWeights(derived, trust, alpha)
761	series.append(weightedMean(derived, weights) - expected_mean)
762
763	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
764
765	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
766	plt.show()
767
768
769	plt.clf()
770	plt.title(r'Trust Method Comparison - Arctangent')
771	plt.xlabel('Alpha')
772	plt.ylabel('Mean error')
773	paths = []
774	for tm in trust_methods:
775	trust = trustValues(derived, tm)
776	series = []
777	for alpha in alphas:
778	weights = arctanWeights(derived, trust, alpha)
779	series.append(weightedMean(derived, weights) - expected_mean)
780
781	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
782
783	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
784	plt.show()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/bin/graph @ 24

Download in other formats: