Context Navigation

graph @ 25

Last change on this file since 25 was 16, checked in by tim, 10 years ago
.
Property svn:executable set to ``*
File size: 32.4 KB

Rev	Line
[6]	1	#!/usr/bin/env python3
	2
	3	import sys
	4	import os
	5	import time
	6	import random
	7	import tempfile
	8	import argparse
	9	import socket
	10	import json
	11
[10]	12	import numpy
[6]	13	import matplotlib.mlab as mlab
	14	import matplotlib.pyplot as plt
	15
	16
	17	VERSION = "{DEVELOPMENT}"
	18	if VERSION == "{DEVELOPMENT}":
	19	script_dir = '.'
	20	try:
	21	script_dir = os.path.dirname(os.path.realpath(__file__))
	22	except:
	23	try:
	24	script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
	25	except:
	26	pass
	27	sys.path.append("%s/../lib" % script_dir)
	28
	29	from nanownlib import *
	30	from nanownlib.stats import *
	31	import nanownlib.storage
	32
	33
	34	parser = argparse.ArgumentParser(
	35	description="")
	36	parser.add_argument('db_file', default=None,
	37	help='')
[16]	38	parser.add_argument('unusual_case', nargs='?', type=str, default=None,
	39	help='The test case that is most unusual from the others. (default: auto detect)')
[6]	40	options = parser.parse_args()
	41	db = nanownlib.storage.db(options.db_file)
[16]	42	if options.unusual_case == None:
	43	unusual_case,delta = findUnusualTestCase(db)
[6]	44
	45
[11]	46	def differences(db, unusual_case, rtt_type='packet'):
	47	ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train', unusual_case)]
	48	ret_val += [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('test', unusual_case)]
	49	return ret_val
[6]	50
[11]	51	def null_differences(db, unusual_case, rtt_type='packet'):
	52	ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train_null', unusual_case)]
	53	return ret_val
[6]	54
[11]	55
[6]	56	def timeSeries(db, probe_type, unusual_case):
	57	cursor = db.conn.cursor()
	58	query="""
	59	SELECT time_of_day,packet_rtt AS uc,(SELECT avg(packet_rtt) FROM probes,analysis
	60	WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS oc
	61	FROM (SELECT time_of_day,probes.sample,packet_rtt FROM probes,analysis
	62	WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
	63	"""
	64
	65	params = {"probe_type":probe_type,"unusual_case":unusual_case}
	66	cursor.execute(query, params)
	67	for row in cursor:
	68	yield {'time_of_day':row['time_of_day'],unusual_case:row['uc'],'other_cases':row['oc']}
	69	#samples,derived,null_derived = parse_data(input1)
	70
	71	#trust = trustValues(derived, sum)
	72	#weights = linearWeights(derived, trust, 0.25)
	73	#print('(test): %f' % weightedMean(derived,weights))
	74
[16]	75	diffs = list(differences(db, unusual_case))
	76	reported_diffs = list(differences(db, unusual_case, 'reported'))
[6]	77	#shorts = [s['packet_rtt'] for s in samples.values() if s['test_case']=='short']
	78	#longs = [s['packet_rtt'] for s in samples.values() if s['test_case']=='long']
	79
	80
[14]	81	def basicStatistics():
[16]	82	print('packet_rtt diff midhinge: %10.2f' % midsummary(diffs))
	83	print('packet_rtt diff quadsummary: %10.2f' % quadsummary(diffs))
	84	print('packet_rtt diff septasummary: %10.2f' % septasummary(diffs))
	85	print('packet_rtt diff MAD: %10.2f' % mad(diffs))
[14]	86	try:
[16]	87	print('reported diff midhinge: %10.2f' % midsummary(reported_diffs))
	88	print('reported diff quadsummary: %10.2f' % quadsummary(reported_diffs))
	89	print('reported diff septasummary: %10.2f' % septasummary(reported_diffs))
	90	print('reported diff MAD: %10.2f' % mad(reported_diffs))
[6]	91
[14]	92	#import cProfile
	93	#start = time.time()
	94	#kresults = kfilter({},diffs)
	95	#print('packet_rtt diff kfilter: ', numpy.mean(kresults['est']), kresults['var'])
	96	#print('packet_rtt diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
	97	#kresults = kfilter({},reported_diffs)
	98	#print('reported diff kfilter: ', numpy.mean(kresults['est']), kresults['var'][-1])
	99	#print('reported diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
	100	#print("kfilter time: %f" % (time.time()-start))
	101	except:
	102	pass
[6]	103
[14]	104	#print('tsval diff mean: %f' % numpy.mean(differences(db, 'long', 'tsval')))
	105	#print('tsval null diff mean: %f' % numpy.mean(null_differences(db, 'long', 'tsval')))
	106	#print('tsval diff weighted mean: %f' % tsvalwmean(db.subseries('train','long')+db.subseries('test','long')))
	107	#print('tsval null diff weighted mean: %f' % tsvalwmean(db.subseries('train_null','long')))
[10]	108
[16]	109	basicStatistics()
[11]	110
[14]	111	def exampleBoxTestHistogram(low,high):
	112	num_bins = 300
[16]	113	all = db.subseries('train',unusual_case)+db.subseries('test',unusual_case)
[14]	114	s = [s['other_packet'] for s in all]
	115	l = [s['unusual_packet'] for s in all]
[13]	116
[14]	117	s_low,s_high = numpy.percentile(s, (low,high))
	118	l_low,l_high = numpy.percentile(l, (low,high))
	119
	120	s.sort()
	121	cut_off_low = s[int(len(diffs)*0.002)]
	122	cut_off_high = s[int(len(diffs)*0.998)]
	123
	124	plt.clf()
	125	# the histogram of the data
	126	#n, bins, patches = plt.hist(s, num_bins, normed=1, color='blue', histtype='step', alpha=0.8,
	127	# label='Test Case 1')
	128	#n, bins, patches = plt.hist(l, num_bins, normed=1, color='red', histtype='step', alpha=0.8,
	129	# label='Test Case 2')
	130	#
	131	n, bins, patches = plt.hist((s,l), num_bins, normed=1, color=('blue','red'), histtype='step', alpha=0.8,
	132	label=('Test Case 1','Test Case 2'), range=(cut_off_low,cut_off_high))
	133
	134	from matplotlib.patches import FancyBboxPatch
	135	currentAxis = plt.gca()
	136	currentAxis.add_patch(FancyBboxPatch((s_low, 0), s_high-s_low, 0.0001, boxstyle='square', facecolor="blue", alpha=0.4))
	137	currentAxis.add_patch(FancyBboxPatch((l_low, 0), l_high-l_low, 0.0001, boxstyle='square', facecolor="red", alpha=0.4))
	138
	139
	140	plt.xlabel('RTT Difference')
	141	plt.ylabel('Probability')
	142	#plt.title(r'Box Test Example - Overlapping Boxes')
	143
	144	# Tweak spacing to prevent clipping of ylabel
	145	plt.subplots_adjust(left=0.15)
	146	plt.legend()
	147	plt.show()
	148	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	149
	150
	151	#exampleBoxTestHistogram(6,8)
	152
	153
[13]	154	def testKalman4D(params=None):
	155	from pykalman import KalmanFilter
	156	train = db.subseries('train','long', offset=0)
	157	test = db.subseries('test','long', offset=0)
	158	null = db.subseries('train_null','long', offset=0)
	159	measurements = numpy.asarray([(s['unusual_packet'],s['other_packet'],s['unusual_tsval'],s['other_tsval']) for s in (train+test)])
	160	null_measurements = numpy.asarray([(s['unusual_packet'],s['other_packet'],s['unusual_tsval'],s['other_tsval']) for s in null])
	161
	162	if params == None:
	163	kf = KalmanFilter(n_dim_obs=4, n_dim_state=4,
	164	initial_state_mean=[quadsummary([s['unusual_packet'] for s in train]),
	165	quadsummary([s['other_packet'] for s in train]),
	166	numpy.mean([s['unusual_tsval'] for s in train]),
	167	numpy.mean([s['other_tsval'] for s in train])])
	168	kf = KalmanFilter(n_dim_obs=4, n_dim_state=4)
	169
	170	start=time.time()
	171	kf = kf.em(measurements[0:len(train)]+null_measurements[0:50000], n_iter=10,
	172	em_vars=('transition_matrices',
	173	'observation_matrices',
	174	'transition_offsets',
	175	'observation_offsets',
	176	'transition_covariance',
	177	'observation_covariance',
	178	'initial_state_mean',
	179	'initial_state_covariance'))
	180	params = {'transition_matrices': kf.transition_matrices.tolist(),
	181	'observation_matrices': kf.observation_matrices.tolist(),
	182	'transition_offsets': kf.transition_offsets.tolist(),
	183	'observation_offsets': kf.observation_offsets.tolist(),
	184	'transition_covariance': kf.transition_covariance.tolist(),
	185	'observation_covariance': kf.observation_covariance.tolist(),
	186	'initial_state_mean': kf.initial_state_mean.tolist(),
	187	'initial_state_covariance': kf.initial_state_covariance.tolist()}
	188	print("Learned Params:\n")
	189	import pprint
	190	pprint.pprint(params)
	191	print("pykalman em time: %f" % (time.time()-start))
	192
	193	#kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, **params)
	194
	195	num_obs=5000
	196	for offset in range(50000,100000+num_obs,num_obs):
	197	start=time.time()
	198	m = measurements[offset:offset+num_obs]
	199	#params['initial_state_mean']=[quadsummary([s[0] for s in m]),
	200	# quadsummary([s[1] for s in m]),
	201	# numpy.mean([s[2] for s in m]),
	202	# numpy.mean([s[3] for s in m])]
	203	kf = KalmanFilter(n_dim_obs=4, n_dim_state=4, **params)
	204	(smoothed_state_means, smoothed_state_covariances) = kf.smooth(m)
	205	#print("pykalman smooth time: %f" % (time.time()-start))
	206	up = numpy.mean([m[0] for m in smoothed_state_means])
	207	op = numpy.mean([m[1] for m in smoothed_state_means])
	208	#print("packet_rtt pykalman final:", smoothed_state_means[-1][0]-smoothed_state_means[-1][1])
	209	print("packet_rtt pykalman mean:", up-op)
	210	print("packet_rtt mean:", numpy.mean([s[0]-s[1] for s in m]))
	211	#up = numpy.mean([m[2] for m in smoothed_state_means])
	212	#op = numpy.mean([m[3] for m in smoothed_state_means])
	213	#print("tsval_rtt pykalman final:", smoothed_state_means[-1][2]-smoothed_state_means[-1][3])
	214	#print("tsval_rtt pykalman mean:", up-op)
	215	#print("tsval_rtt mean:", numpy.mean([s[2]-s[3] for s in m]))
	216
	217	for offset in range(0,len(null_measurements)+num_obs,num_obs):
	218	start=time.time()
	219	m = null_measurements[offset:offset+num_obs]
	220	#params['initial_state_mean']=[quadsummary([s[0] for s in m]),
	221	# quadsummary([s[1] for s in m]),
	222	# numpy.mean([s[2] for s in m]),
	223	# numpy.mean([s[3] for s in m])]
	224	kf = KalmanFilter(n_dim_obs=4, n_dim_state=4, **params)
	225	(smoothed_state_means, smoothed_state_covariances) = kf.smooth(m)
	226	up = numpy.mean([m[0] for m in smoothed_state_means])
	227	op = numpy.mean([m[1] for m in smoothed_state_means])
	228	#print("null packet_rtt pykalman final:", smoothed_state_means[-1][0]-smoothed_state_means[-1][1])
	229	print("null packet_rtt pykalman mean:", up-op)
	230	print("null packet_rtt mean:", numpy.mean([s[0]-s[1] for s in m]))
	231	#up = numpy.mean([m[2] for m in smoothed_state_means])
	232	#op = numpy.mean([m[3] for m in smoothed_state_means])
	233	#print("null tsval_rtt pykalman final:", smoothed_state_means[-1][2]-smoothed_state_means[-1][3])
	234	#print("null tsval_rtt pykalman mean:", up-op)
	235	#print("null tsval_rtt mean:", numpy.mean([s[2]-s[3] for s in m]))
	236
	237
	238	#testKalman4D(echo_vm_5k)
	239
	240
	241
	242	def testKalman(params=None):
	243	from pykalman import AdditiveUnscentedKalmanFilter,KalmanFilter
	244	train = db.subseries('train','long', offset=0)
	245	test = db.subseries('test','long', offset=0)
	246	measurements = numpy.asarray([(s['unusual_packet'],s['other_packet']) for s in (train+test)])
	247
	248	#kf = KalmanFilter(transition_matrices = [[1, 1], [0, 1]], observation_matrices = [[0.1, 0.5], [-0.3, 0.0]])
	249	kf = KalmanFilter(n_dim_obs=2, n_dim_state=2,
	250	initial_state_mean=[quadsummary([s['unusual_packet'] for s in train]),
	251	quadsummary([s['other_packet'] for s in train])])
	252	#kf = AdditiveUnscentedKalmanFilter(n_dim_obs=2, n_dim_state=2)
	253
	254	if params == None:
	255	start=time.time()
	256	kf = kf.em(measurements[0:len(train)], n_iter=10,
	257	em_vars=('transition_matrices',
	258	'observation_matrices',
	259	'transition_offsets',
	260	'observation_offsets',
	261	'transition_covariance',
	262	'observation_covariance',
	263	'initial_state_covariance'))
	264	params = {'transition_matrices': kf.transition_matrices.tolist(),
	265	'observation_matrices': kf.observation_matrices.tolist(),
	266	'transition_offsets': kf.transition_offsets.tolist(),
	267	'observation_offsets': kf.observation_offsets.tolist(),
	268	'transition_covariance': kf.transition_covariance.tolist(),
	269	'observation_covariance': kf.observation_covariance.tolist(),
	270	'initial_state_mean': kf.initial_state_mean.tolist(),
	271	'initial_state_covariance': kf.initial_state_covariance.tolist()}
	272	print("Learned Params:\n")
	273	import pprint
	274	pprint.pprint(params)
	275	print("pykalman em time: %f" % (time.time()-start))
	276
	277	#kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, **params)
	278
	279	num_obs=10000
	280	for offset in range(50000,100000+num_obs,num_obs):
	281	start=time.time()
	282	kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, **params)
	283	m = measurements[offset:offset+num_obs]
	284	(smoothed_state_means, smoothed_state_covariances) = kf.smooth(m)
	285	print("pykalman smooth time: %f" % (time.time()-start))
	286	up = numpy.mean([m[0] for m in smoothed_state_means])
	287	op = numpy.mean([m[1] for m in smoothed_state_means])
	288	print("packet_rtt pykalman final:", smoothed_state_means[-1][0]-smoothed_state_means[-1][1])
	289	print("packet_rtt pykalman mean:", up-op)
	290	print("packet_rtt mean:", numpy.mean([s[0]-s[1] for s in m]))
	291
	292
	293	#testKalman(ten_iter)
	294
	295
[12]	296	def getTCPTSPrecision():
	297	cursor = db.conn.cursor()
[13]	298	query="""SELECT tcpts_mean FROM meta"""
[12]	299	cursor.execute(query)
	300	row = cursor.fetchone()
	301	if row:
	302	return row[0]
	303	return None
	304
	305
	306	def tsFilteredHistogram():
	307	tcpts_precision = getTCPTSPrecision()
	308
	309	num_bins = 500
	310	all = db.subseries('train','long')+db.subseries('test','long')
	311	diffs = [s['unusual_packet']-s['other_packet'] for s in all]
	312	ts0_diffs = [s['unusual_packet']-s['other_packet'] for s in all if s['unusual_tsval']-s['other_tsval'] == 0]
[14]	313	#ts1_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(s['unusual_tsval']-s['other_tsval']) > 0]
	314	#ts2_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision)) <= 1.0]
	315	ts1_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) == 1]
	316	ts2_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) >= 2]
	317	#ts3_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) == 3]
	318	#ts4_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) == 4]
[12]	319
[14]	320	#ts_mode = statistics.mode([s['unusual_tsval'] for s in all]+[s['other_tsval'] for s in all])
	321	#ts_diff_mode = statistics.mode([s['unusual_tsval']-s['other_tsval'] for s in all])
	322	#ts_common_mode = [s['unusual_packet']-s['other_packet'] for s in all if s['unusual_tsval']<=ts_mode and s['other_tsval']<=ts_mode]
	323	#ts_common_diff_mode = [s['unusual_packet']-s['other_packet'] for s in all if s['unusual_tsval']-s['other_tsval']==ts_diff_mode]
[12]	324
[14]	325	#print('packet_rtt diff quadsummary: %f' % quadsummary(diffs))
	326	#print('packet_rtt tsval diff=0 quadsummary: %f' % quadsummary(ts0_diffs))
	327	#print('packet_rtt tsval diff>0 quadsummary: %f' % quadsummary(ts1_diffs))
	328	#print('packet_rtt tsval diff<=1 quadsummary: %f' % quadsummary(ts2_diffs))
	329	#print('packet_rtt tsval mode quadsummary: %f' % quadsummary(ts_common_mode))
	330	#print(len(diffs), len(ts0_diffs)+len(ts1_diffs))
[12]	331	diffs.sort()
[14]	332	cut_off_low = diffs[int(len(diffs)*0.008)]
	333	cut_off_high = diffs[int(len(diffs)*0.992)]
[12]	334
	335	plt.clf()
	336	# the histogram of the data
	337	n, bins, patches = plt.hist(diffs, num_bins, normed=0, color='black', histtype='step', alpha=0.8,
[14]	338	range=(cut_off_low,cut_off_high), label='All Packets')
[12]	339	n, bins, patches = plt.hist(ts0_diffs, num_bins, normed=0, color='blue', histtype='step', alpha=0.8,
[14]	340	range=(cut_off_low,cut_off_high), label='TSval Difference == 0')
	341	n, bins, patches = plt.hist(ts1_diffs, num_bins, normed=0, color='orange', histtype='step', alpha=0.8,
	342	range=(cut_off_low,cut_off_high), label='TSval Difference == 1')
	343	n, bins, patches = plt.hist(ts2_diffs, num_bins, normed=0, color='red', histtype='step', alpha=0.8,
	344	range=(cut_off_low,cut_off_high), label='TSval Difference >= 2')
	345	#n, bins, patches = plt.hist(ts3_diffs, num_bins, normed=0, color='red', histtype='step', alpha=0.8,
	346	# range=(cut_off_low,cut_off_high), label='tsval diff == 3')
	347	#n, bins, patches = plt.hist(ts4_diffs, num_bins, normed=0, color='brown', histtype='step', alpha=0.8,
	348	# range=(cut_off_low,cut_off_high), label='tsval diff == 4')
[12]	349	#n, bins, patches = plt.hist(ts_common_mode, num_bins, normed=0, color='green', histtype='step', alpha=0.8,
	350	# range=(cut_off_low,cut_off_high), label='tsval common mode')
[14]	351	#n, bins, patches = plt.hist(ts_common_diff_mode, num_bins, normed=0, color='green', histtype='step', alpha=0.8,
	352	# range=(cut_off_low,cut_off_high), label='tsval common diff mode')
[12]	353	plt.xlabel('RTT Difference')
[14]	354	#plt.ylabel('Probability')
	355	#plt.title(r'Histogram - distribution of differences by tsval')
[12]	356
	357	# Tweak spacing to prevent clipping of ylabel
	358	plt.subplots_adjust(left=0.15)
	359	plt.legend()
	360	plt.show()
	361	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	362
[16]	363	tsFilteredHistogram()
[12]	364
	365
[14]	366	def exampleSummaryHistogram():
	367	num_bins = 300
	368	all = db.subseries('train','long')+db.subseries('test','long')
	369	diffs = [s['unusual_packet']-s['other_packet'] for s in all]
[12]	370
[14]	371	diffs.sort()
	372	cut_off_low = diffs[int(len(diffs)*0.003)]
	373	cut_off_high = diffs[int(len(diffs)*0.997)]
[12]	374
[14]	375	plt.clf()
	376	# the histogram of the data
	377	n, bins, patches = plt.hist(diffs, num_bins, normed=0, color='black', histtype='step', alpha=0.8,
	378	range=(cut_off_low,cut_off_high), label='all')
[12]	379
[14]	380	plt.xlabel('RTT Difference')
	381	plt.ylabel('Probability')
	382	#plt.title(r'Histogram - distribution of differences by tsval')
	383
	384	w = 25
	385	l1,r1,l2,r2,l3,r3 = numpy.percentile(diffs, (50-w,50+w,50-w/2,50+w/2,(50-w)/2,(50+w)/2+50))
	386	#plt.plot([l1, 0], [l1, 0.0001], "k--")
	387	#plt.plot([r1, 0], [r1, 0.0001], "k--")
	388	from matplotlib.patches import FancyBboxPatch
	389	currentAxis = plt.gca()
	390	currentAxis.add_patch(FancyBboxPatch((l1, 0), 2500, 5000, boxstyle='square', facecolor="blue", alpha=0.4, edgecolor='none'))
	391	currentAxis.add_patch(FancyBboxPatch((r1, 0), 2500, 5000, boxstyle='square', facecolor="blue", alpha=0.4, edgecolor='none'))
	392	currentAxis.add_patch(FancyBboxPatch((l2, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
	393	currentAxis.add_patch(FancyBboxPatch((r2, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
	394	currentAxis.add_patch(FancyBboxPatch((l3, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
	395	currentAxis.add_patch(FancyBboxPatch((r3, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
	396	currentAxis.add_patch(FancyBboxPatch((50, 0), 2500, 5000, boxstyle='square', facecolor="black", alpha=0.4, edgecolor='none'))
	397	currentAxis.add_patch(FancyBboxPatch((numpy.mean((l1,r1,l2,r2)), 0), 2500, 5000, boxstyle='square', facecolor="red", alpha=0.4, edgecolor='none'))
	398	#currentAxis.add_patch(FancyBboxPatch((numpy.mean((1000)), 0), 1500, 5000, boxstyle='square', facecolor="black", alpha=0.4, edgecolor='none'))
	399
	400	# Tweak spacing to prevent clipping of ylabel
	401	plt.subplots_adjust(left=0.15)
	402	#plt.legend()
	403	plt.show()
	404	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	405
	406	#exampleSummaryHistogram()
	407
	408
	409
[6]	410	#all_data = longs+shorts
	411	#all_data.sort()
	412	#cut_off_low = all_data[0]
	413	#cut_off_high = all_data[int(len(all_data)*0.997)]
	414
	415
[11]	416	def plotSingleProbe(probe_id=None):
	417	if probe_id == None:
	418	cursor = db.conn.cursor()
	419	query="""SELECT probe_id FROM analysis WHERE suspect='' ORDER BY probe_id DESC limit 1 OFFSET 10"""
	420	cursor.execute(query)
	421	probe_id = cursor.fetchone()[0]
	422
	423	cursor = db.conn.cursor()
	424	query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=1"""
	425	cursor.execute(query, (probe_id,))
	426	pkts = cursor.fetchall()
	427	sent_payload = [row[0] for row in pkts if row[1] != 0]
	428	sent_other = [row[0] for row in pkts if row[1] == 0]
	429
	430	query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=0"""
	431	cursor.execute(query, (probe_id,))
	432	pkts = cursor.fetchall()
	433	rcvd_payload = [row[0] for row in pkts if row[1] != 0]
	434	rcvd_other = [row[0] for row in pkts if row[1] == 0]
	435
	436	#query="""SELECT reported,time_of_day FROM probes WHERE id=?"""
	437	#cursor.execute(query, (probe_id,))
	438	#reported,tod = cursor.fetchone()
	439	#userspace_times = [sent_times[0]-reported/3.0, sent_times[0]+reported]
	440
	441	print("single probe counts:",len(sent_payload),len(sent_other),len(rcvd_payload),len(rcvd_other))
	442	plt.clf()
	443	plt.title("Single HTTP Request - Packet Times")
	444	sp = plt.eventplot(sent_payload, colors=('red',), lineoffsets=8, linewidths=2, alpha=0.6,label='sent')
	445	so = plt.eventplot(sent_other, colors=('red',), lineoffsets=6, linewidths=2, alpha=0.6,label='sent')
	446	rp = plt.eventplot(rcvd_payload, colors=('blue',), lineoffsets=4, linewidths=2, alpha=0.6,label='received')
	447	ro = plt.eventplot(rcvd_other, colors=('blue',), lineoffsets=2, linewidths=2, alpha=0.6,label='received')
	448	#plt.legend((s,r), ('sent','received'))
	449	#plt.savefig('../img/http-packet-times.svg')
	450	plt.show()
	451
	452	#plotSingleProbe()
	453
	454
	455	def graphTestResults():
[14]	456	basename = os.path.basename(options.db_file)
	457	basename,ext = os.path.splitext(basename)
	458
	459	chartname = "/home/tim/blindspot/research/timing-analysis/paper/figures/results/%s.svg" % (basename)
[16]	460	#print(chartname)
[14]	461
[11]	462	plt.clf()
	463	plt.title("Test Results")
	464	plt.xlabel('sample size')
	465	plt.ylabel('error rate')
	466	legend = []
	467	colors = ['red','blue','green','purple','orange','black','brown']
	468	color_id = 0
	469
[16]	470	best_obs,best_error = evaluateTestResults(db)
	471	best_obs = sorted(best_obs, key=lambda x: x['num_observations'])
	472	best_error = sorted(best_error, key=lambda x: x['error'])
	473	winner = None
	474	for bo in best_obs:
	475	sys.stdout.write("%(num_observations)d obs / %(classifier)s / %(params)s" % bo)
	476	if winner == None:
	477	sys.stdout.write(" (winner)")
	478	winner = bo
	479	print()
	480
	481	for be in best_error:
	482	sys.stdout.write("%(error)f%% error / %(classifier)s / %(params)s" % be)
	483	if winner == None:
	484	sys.stdout.write(" (winner)")
	485	winner = be
	486	print()
[11]	487
[16]	488	all = sorted(best_obs+best_error, key=lambda x: x['classifier'])
[13]	489	max_obs = 0
[16]	490	for result in all:
[11]	491	query="""
	492	SELECT num_observations,(false_positives+false_negatives)/2.0 FROM classifier_results
	493	WHERE trial_type='test'
	494	AND classifier=:classifier
	495	AND params=:params
	496	ORDER BY num_observations
	497	"""
[16]	498	cursor = db.conn.cursor()
	499	cursor.execute(query, {'classifier':result['classifier'],'params':result['params']})
[11]	500
	501	num_obs = []
	502	performance = []
	503	for row in cursor:
[13]	504	max_obs = max(max_obs, row[0])
[11]	505	num_obs.append(row[0])
	506	performance.append(row[1])
	507	#print(num_obs,performance)
	508	path = plt.scatter(num_obs, performance, color=colors[color_id], s=4, alpha=0.8, linewidths=3.0)
	509	plt.plot(num_obs, performance, color=colors[color_id], alpha=0.8)
[16]	510	legend.append((result['classifier'],path))
[11]	511	color_id = (color_id+1) % len(colors)
	512
[14]	513	plt.legend([l[1] for l in legend], [l[0] for l in legend], scatterpoints=1, fontsize='x-small')
[13]	514	plt.plot([0, max_obs], [5.0, 5.0], "k--")
[14]	515	plt.xlabel('Number of Observations')
	516	plt.ylabel('Error Rate')
[16]	517	#plt.savefig(chartname)
	518	plt.show()
[14]	519
[11]	520	graphTestResults()
	521
	522	sys.exit(0)
	523
[14]	524	short_overtime = [(sample['time_of_day'],sample['short']) for sample in timeSeries(db,'train','short')]
	525	long_overtime = [(sample['time_of_day'],sample['long']) for sample in timeSeries(db,'train','long')]
	526	diff_overtime = [(sample['time_of_day'],sample['long']-sample['other_cases']) for sample in timeSeries(db,'train','long')]
	527	short_overtime.sort()
	528	long_overtime.sort()
	529	diff_overtime.sort()
	530
[6]	531	plt.clf()
	532	plt.title("Packet RTT over time")
	533	plt.xlabel('Time of Day')
	534	plt.ylabel('RTT')
	535	s = plt.scatter([t for t,rtt in short_overtime], [rtt for t,rtt in short_overtime], s=1, color='red', alpha=0.6)
	536	l = plt.scatter([t for t,rtt in long_overtime], [rtt for t,rtt in long_overtime], s=1, color='blue', alpha=0.6)
	537	d = plt.scatter([t for t,rtt in diff_overtime], [rtt for t,rtt in diff_overtime], s=1, color='purple', alpha=0.6)
	538	plt.legend((s,l,d), ('short','long','difference'), scatterpoints=1)
	539	#plt.savefig('paper/figures/comcast-powerboost1.png')
	540	plt.show()
	541
[11]	542
	543
	544	plt.clf()
	545	plt.title("Simple HTTP Request")
	546	plt.xlabel('Time of Day')
	547	plt.ylabel('')
	548	s = plt.scatter(sent_times, [2]*len(sent_times), s=3, color='red', alpha=0.9)
	549	r = plt.scatter(rcvd_times, [1]*len(rcvd_times), s=3, color='blue', alpha=0.9)
	550	plt.legend((s,r), ('sent','received'), scatterpoints=1)
	551	plt.show()
	552
	553	sys.exit(0)
[6]	554	short_overtime,long_overtime,diff_overtime = None,None,None
	555
	556
	557	num_bins = 300
	558	reported_diffs.sort()
	559	cut_off_low = reported_diffs[int(len(diffs)*0.003)]
	560	cut_off_high = reported_diffs[int(len(diffs)*0.997)]
	561
	562	plt.clf()
	563	# the histogram of the data
	564	n, bins, patches = plt.hist(reported_diffs, num_bins, normed=1, color='black', histtype='step', alpha=0.8,
	565	range=(cut_off_low,cut_off_high))
	566	plt.xlabel('RTT Difference')
	567	plt.ylabel('Probability')
	568	plt.title(r'Histogram - distribution of differences')
	569
	570	# Tweak spacing to prevent clipping of ylabel
	571	plt.subplots_adjust(left=0.15)
	572	#plt.legend()
	573	plt.show()
	574	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	575
	576
	577
	578
	579	num_bins = 300
	580	diffs.sort()
	581	cut_off_low = diffs[int(len(diffs)*0.003)]
	582	cut_off_high = diffs[int(len(diffs)*0.997)]
	583
	584	plt.clf()
	585	# the histogram of the data
	586	n, bins, patches = plt.hist(diffs, num_bins, normed=1, color='purple', histtype='step', alpha=0.8,
	587	range=(cut_off_low,cut_off_high))
	588	plt.xlabel('RTT Difference')
	589	plt.ylabel('Probability')
	590	plt.title(r'Histogram - distribution of differences')
	591
	592	# Tweak spacing to prevent clipping of ylabel
	593	plt.subplots_adjust(left=0.15)
	594	#plt.legend()
	595	plt.show()
	596	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	597
	598	sys.exit(0)
	599
	600
	601
	602	num_bins = 150
	603	# the histogram of the data
	604	n, bins, patches = plt.hist((shorts,longs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8,
	605	range=(cut_off_low,cut_off_high))
	606	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
	607	# add a 'best fit' line
	608	#y = mlab.normpdf(bins, mu, sigma)
	609	#plt.plot(bins, y, 'r--')
	610	plt.xlabel('packet_rtt')
	611	plt.ylabel('Probability')
	612	plt.title(r'Histogram - RTT short and long')
	613
	614	# Tweak spacing to prevent clipping of ylabel
	615	plt.subplots_adjust(left=0.15)
	616	plt.legend()
	617	#plt.show()
	618	plt.savefig('paper/figures/comcast-powerboost2.svg')
	619
	620
	621
	622
	623	num_trials = 200
	624
	625
	626	subsample_sizes = (50,150,300,500,700,1000,2000,3000,5000,7000,10000,15000,20000)
	627	estimator = functools.partial(boxTest, 0.07, 0.08)
	628	performance = []
	629	for subsample_size in subsample_sizes:
	630	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
	631	performance.append(100.0*len([e for e in estimates if e == 1])/num_trials)
	632
	633	null_performance = []
	634	for subsample_size in subsample_sizes:
	635	null_estimates = bootstrap(null_derived, subsample_size, num_trials, estimator)
	636	null_performance.append(100.0*len([e for e in null_estimates if e == 0])/num_trials)
	637
	638	plt.clf()
	639	plt.title("boxTest bootstrap")
	640	plt.xlabel('sample size')
	641	plt.ylabel('performance')
	642	plt.scatter(subsample_sizes, performance, s=2, color='red', alpha=0.6)
	643	plt.scatter(subsample_sizes, null_performance, s=2, color='blue', alpha=0.6)
	644	plt.show()
	645
	646
	647
	648	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
	649	estimator = diffMedian
	650	performance = []
	651	for subsample_size in subsample_sizes:
	652	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
	653	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
	654
	655	plt.clf()
	656	plt.title("diff median bootstrap")
	657	plt.xlabel('sample size')
	658	plt.ylabel('performance')
	659	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
	660	plt.show()
	661
	662
	663
	664
	665	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
	666	weight_funcs = (linearWeights, prunedWeights)
	667	for wf in weight_funcs:
	668	estimator = functools.partial(estimateMean, hypotenuse, wf, 0.40)
	669	performance = []
	670	for subsample_size in subsample_sizes:
	671	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
	672	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
	673
	674	plt.clf()
	675	plt.title(repr(wf))
	676	plt.xlabel('sample size')
	677	plt.ylabel('performance')
	678	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
	679	plt.show()
	680
	681
	682
	683	num_bins = 300
	684	# the histogram of the data
	685	n, bins, patches = plt.hist((tsshorts,tslongs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8)
	686	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
	687	# add a 'best fit' line
	688	#y = mlab.normpdf(bins, mu, sigma)
	689	#plt.plot(bins, y, 'r--')
	690	plt.xlabel('packet_rtt')
	691	plt.ylabel('Probability')
	692	plt.title(r'Histogram - tsval_rtt short vs long')
	693
	694	# Tweak spacing to prevent clipping of ylabel
	695	plt.subplots_adjust(left=0.15)
	696	plt.legend()
	697	plt.show()
	698
	699
	700
	701
	702	####
	703	#trust_methods = [min,max,sum,difference,product]
	704	trust_methods = [sum,product,hypotenuse]
	705	colors = ['red','blue','green','purple','orange','black']
	706	weight_methods = [prunedWeights, linearWeights]
	707	alphas = [i/100.0 for i in range(0,100,2)]
	708
	709
	710
	711
	712	plt.clf()
	713	plt.title(r'Trust Method Comparison - Linear')
	714	plt.xlabel('Alpha')
	715	plt.ylabel('Mean error')
	716	paths = []
	717	for tm in trust_methods:
	718	trust = trustValues(derived, tm)
	719	series = []
	720	for alpha in alphas:
	721	weights = linearWeights(derived, trust, alpha)
	722	series.append(weightedMean(derived, weights) - expected_mean)
	723
	724	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	725
	726	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	727	plt.show()
	728
	729
	730
	731	plt.clf()
	732	plt.title(r'Trust Method Comparison - Pruned')
	733	plt.xlabel('Alpha')
	734	plt.ylabel('Mean error')
	735	paths = []
	736	for tm in trust_methods:
	737	trust = trustValues(derived, tm)
	738	series = []
	739	for alpha in alphas:
	740	weights = prunedWeights(derived, trust, alpha)
	741	series.append(weightedMean(derived, weights) - expected_mean)
	742
	743	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	744
	745	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	746	plt.show()
	747
	748
	749	sys.exit(0)
	750
	751	plt.clf()
	752	plt.title(r'Trust Method Comparison - Inverted')
	753	plt.xlabel('Alpha')
	754	plt.ylabel('Mean error')
	755	paths = []
	756	for tm in trust_methods:
	757	trust = trustValues(derived, tm)
	758	series = []
	759	for alpha in alphas:
	760	weights = invertedWeights(derived, trust, alpha)
	761	series.append(weightedMean(derived, weights) - expected_mean)
	762
	763	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	764
	765	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	766	plt.show()
	767
	768
	769	plt.clf()
	770	plt.title(r'Trust Method Comparison - Arctangent')
	771	plt.xlabel('Alpha')
	772	plt.ylabel('Mean error')
	773	paths = []
	774	for tm in trust_methods:
	775	trust = trustValues(derived, tm)
	776	series = []
	777	for alpha in alphas:
	778	weights = arctanWeights(derived, trust, alpha)
	779	series.append(weightedMean(derived, weights) - expected_mean)
	780
	781	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	782
	783	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	784	plt.show()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/bin/graph @ 25

Download in other formats: