Context Navigation

source: trunk/bin/graph @ 15

Last change on this file since 15 was 14, checked in by tim, 10 years ago
.
Property svn:executable set to ``*
File size: 33.4 KB

Rev	Line
[6]	1	#!/usr/bin/env python3
	2
	3	import sys
	4	import os
	5	import time
	6	import random
	7	import tempfile
	8	import argparse
	9	import socket
	10	import json
	11
[10]	12	import numpy
[6]	13	import matplotlib.mlab as mlab
	14	import matplotlib.pyplot as plt
	15
	16
	17	VERSION = "{DEVELOPMENT}"
	18	if VERSION == "{DEVELOPMENT}":
	19	script_dir = '.'
	20	try:
	21	script_dir = os.path.dirname(os.path.realpath(__file__))
	22	except:
	23	try:
	24	script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
	25	except:
	26	pass
	27	sys.path.append("%s/../lib" % script_dir)
	28
	29	from nanownlib import *
	30	from nanownlib.stats import *
	31	import nanownlib.storage
	32
	33
	34	parser = argparse.ArgumentParser(
	35	description="")
	36	parser.add_argument('db_file', default=None,
	37	help='')
	38	options = parser.parse_args()
	39	db = nanownlib.storage.db(options.db_file)
	40
	41
[11]	42	def differences(db, unusual_case, rtt_type='packet'):
	43	ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train', unusual_case)]
	44	ret_val += [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('test', unusual_case)]
	45	return ret_val
[6]	46
[11]	47	def null_differences(db, unusual_case, rtt_type='packet'):
	48	ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train_null', unusual_case)]
	49	return ret_val
[6]	50
[11]	51
[6]	52	def timeSeries(db, probe_type, unusual_case):
	53	cursor = db.conn.cursor()
	54	query="""
	55	SELECT time_of_day,packet_rtt AS uc,(SELECT avg(packet_rtt) FROM probes,analysis
	56	WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS oc
	57	FROM (SELECT time_of_day,probes.sample,packet_rtt FROM probes,analysis
	58	WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
	59	"""
	60
	61	params = {"probe_type":probe_type,"unusual_case":unusual_case}
	62	cursor.execute(query, params)
	63	for row in cursor:
	64	yield {'time_of_day':row['time_of_day'],unusual_case:row['uc'],'other_cases':row['oc']}
	65	#samples,derived,null_derived = parse_data(input1)
	66
	67	#trust = trustValues(derived, sum)
	68	#weights = linearWeights(derived, trust, 0.25)
	69	#print('(test): %f' % weightedMean(derived,weights))
	70
	71	diffs = list(differences(db, 'long'))
	72	reported_diffs = list(differences(db, 'long', 'reported'))
	73	#shorts = [s['packet_rtt'] for s in samples.values() if s['test_case']=='short']
	74	#longs = [s['packet_rtt'] for s in samples.values() if s['test_case']=='long']
	75
	76
[14]	77	def basicStatistics():
	78	print('packet_rtt diff mean: %f' % statistics.mean(diffs))
	79	print('packet_rtt diff median: %f' % statistics.median(diffs))
	80	print('packet_rtt diff midhinge: %f' % midsummary(diffs))
	81	print('packet_rtt diff trimean: %f' % trimean(diffs))
	82	print('packet_rtt diff quadsummary: %f' % quadsummary(diffs))
	83	print('packet_rtt diff ubersummary: %f' % ubersummary(diffs))
	84	print('packet_rtt diff septasummary: %f' % septasummary(diffs))
	85	print('packet_rtt diff MAD: %f' % mad(diffs))
	86	try:
	87	print('reported diff trimean: %f' % trimean(reported_diffs))
	88	print('reported diff quadsummary: %f' % quadsummary(reported_diffs))
	89	print('reported diff ubersummary: %f' % ubersummary(reported_diffs))
	90	print('reported diff septasummary: %f' % septasummary(reported_diffs))
	91	print('reported diff MAD: %f' % mad(reported_diffs))
[6]	92
[14]	93	#import cProfile
	94	#start = time.time()
	95	#kresults = kfilter({},diffs)
	96	#print('packet_rtt diff kfilter: ', numpy.mean(kresults['est']), kresults['var'])
	97	#print('packet_rtt diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
	98	#kresults = kfilter({},reported_diffs)
	99	#print('reported diff kfilter: ', numpy.mean(kresults['est']), kresults['var'][-1])
	100	#print('reported diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
	101	#print("kfilter time: %f" % (time.time()-start))
	102	except:
	103	pass
[6]	104
[14]	105	#print('tsval diff mean: %f' % numpy.mean(differences(db, 'long', 'tsval')))
	106	#print('tsval null diff mean: %f' % numpy.mean(null_differences(db, 'long', 'tsval')))
	107	#print('tsval diff weighted mean: %f' % tsvalwmean(db.subseries('train','long')+db.subseries('test','long')))
	108	#print('tsval null diff weighted mean: %f' % tsvalwmean(db.subseries('train_null','long')))
[10]	109
[11]	110
[13]	111
[14]	112	def exampleBoxTestHistogram(low,high):
	113	num_bins = 300
	114	all = db.subseries('train','long')+db.subseries('test','long')
	115	s = [s['other_packet'] for s in all]
	116	l = [s['unusual_packet'] for s in all]
[13]	117
[14]	118	s_low,s_high = numpy.percentile(s, (low,high))
	119	l_low,l_high = numpy.percentile(l, (low,high))
	120
	121	s.sort()
	122	cut_off_low = s[int(len(diffs)*0.002)]
	123	cut_off_high = s[int(len(diffs)*0.998)]
	124
	125	plt.clf()
	126	# the histogram of the data
	127	#n, bins, patches = plt.hist(s, num_bins, normed=1, color='blue', histtype='step', alpha=0.8,
	128	# label='Test Case 1')
	129	#n, bins, patches = plt.hist(l, num_bins, normed=1, color='red', histtype='step', alpha=0.8,
	130	# label='Test Case 2')
	131	#
	132	n, bins, patches = plt.hist((s,l), num_bins, normed=1, color=('blue','red'), histtype='step', alpha=0.8,
	133	label=('Test Case 1','Test Case 2'), range=(cut_off_low,cut_off_high))
	134
	135	from matplotlib.patches import FancyBboxPatch
	136	currentAxis = plt.gca()
	137	currentAxis.add_patch(FancyBboxPatch((s_low, 0), s_high-s_low, 0.0001, boxstyle='square', facecolor="blue", alpha=0.4))
	138	currentAxis.add_patch(FancyBboxPatch((l_low, 0), l_high-l_low, 0.0001, boxstyle='square', facecolor="red", alpha=0.4))
	139
	140
	141	plt.xlabel('RTT Difference')
	142	plt.ylabel('Probability')
	143	#plt.title(r'Box Test Example - Overlapping Boxes')
	144
	145	# Tweak spacing to prevent clipping of ylabel
	146	plt.subplots_adjust(left=0.15)
	147	plt.legend()
	148	plt.show()
	149	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	150
	151
	152	#exampleBoxTestHistogram(6,8)
	153
	154
[13]	155	def testKalman4D(params=None):
	156	from pykalman import KalmanFilter
	157	train = db.subseries('train','long', offset=0)
	158	test = db.subseries('test','long', offset=0)
	159	null = db.subseries('train_null','long', offset=0)
	160	measurements = numpy.asarray([(s['unusual_packet'],s['other_packet'],s['unusual_tsval'],s['other_tsval']) for s in (train+test)])
	161	null_measurements = numpy.asarray([(s['unusual_packet'],s['other_packet'],s['unusual_tsval'],s['other_tsval']) for s in null])
	162
	163	if params == None:
	164	kf = KalmanFilter(n_dim_obs=4, n_dim_state=4,
	165	initial_state_mean=[quadsummary([s['unusual_packet'] for s in train]),
	166	quadsummary([s['other_packet'] for s in train]),
	167	numpy.mean([s['unusual_tsval'] for s in train]),
	168	numpy.mean([s['other_tsval'] for s in train])])
	169	kf = KalmanFilter(n_dim_obs=4, n_dim_state=4)
	170
	171	start=time.time()
	172	kf = kf.em(measurements[0:len(train)]+null_measurements[0:50000], n_iter=10,
	173	em_vars=('transition_matrices',
	174	'observation_matrices',
	175	'transition_offsets',
	176	'observation_offsets',
	177	'transition_covariance',
	178	'observation_covariance',
	179	'initial_state_mean',
	180	'initial_state_covariance'))
	181	params = {'transition_matrices': kf.transition_matrices.tolist(),
	182	'observation_matrices': kf.observation_matrices.tolist(),
	183	'transition_offsets': kf.transition_offsets.tolist(),
	184	'observation_offsets': kf.observation_offsets.tolist(),
	185	'transition_covariance': kf.transition_covariance.tolist(),
	186	'observation_covariance': kf.observation_covariance.tolist(),
	187	'initial_state_mean': kf.initial_state_mean.tolist(),
	188	'initial_state_covariance': kf.initial_state_covariance.tolist()}
	189	print("Learned Params:\n")
	190	import pprint
	191	pprint.pprint(params)
	192	print("pykalman em time: %f" % (time.time()-start))
	193
	194	#kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, **params)
	195
	196	num_obs=5000
	197	for offset in range(50000,100000+num_obs,num_obs):
	198	start=time.time()
	199	m = measurements[offset:offset+num_obs]
	200	#params['initial_state_mean']=[quadsummary([s[0] for s in m]),
	201	# quadsummary([s[1] for s in m]),
	202	# numpy.mean([s[2] for s in m]),
	203	# numpy.mean([s[3] for s in m])]
	204	kf = KalmanFilter(n_dim_obs=4, n_dim_state=4, **params)
	205	(smoothed_state_means, smoothed_state_covariances) = kf.smooth(m)
	206	#print("pykalman smooth time: %f" % (time.time()-start))
	207	up = numpy.mean([m[0] for m in smoothed_state_means])
	208	op = numpy.mean([m[1] for m in smoothed_state_means])
	209	#print("packet_rtt pykalman final:", smoothed_state_means[-1][0]-smoothed_state_means[-1][1])
	210	print("packet_rtt pykalman mean:", up-op)
	211	print("packet_rtt mean:", numpy.mean([s[0]-s[1] for s in m]))
	212	#up = numpy.mean([m[2] for m in smoothed_state_means])
	213	#op = numpy.mean([m[3] for m in smoothed_state_means])
	214	#print("tsval_rtt pykalman final:", smoothed_state_means[-1][2]-smoothed_state_means[-1][3])
	215	#print("tsval_rtt pykalman mean:", up-op)
	216	#print("tsval_rtt mean:", numpy.mean([s[2]-s[3] for s in m]))
	217
	218	for offset in range(0,len(null_measurements)+num_obs,num_obs):
	219	start=time.time()
	220	m = null_measurements[offset:offset+num_obs]
	221	#params['initial_state_mean']=[quadsummary([s[0] for s in m]),
	222	# quadsummary([s[1] for s in m]),
	223	# numpy.mean([s[2] for s in m]),
	224	# numpy.mean([s[3] for s in m])]
	225	kf = KalmanFilter(n_dim_obs=4, n_dim_state=4, **params)
	226	(smoothed_state_means, smoothed_state_covariances) = kf.smooth(m)
	227	up = numpy.mean([m[0] for m in smoothed_state_means])
	228	op = numpy.mean([m[1] for m in smoothed_state_means])
	229	#print("null packet_rtt pykalman final:", smoothed_state_means[-1][0]-smoothed_state_means[-1][1])
	230	print("null packet_rtt pykalman mean:", up-op)
	231	print("null packet_rtt mean:", numpy.mean([s[0]-s[1] for s in m]))
	232	#up = numpy.mean([m[2] for m in smoothed_state_means])
	233	#op = numpy.mean([m[3] for m in smoothed_state_means])
	234	#print("null tsval_rtt pykalman final:", smoothed_state_means[-1][2]-smoothed_state_means[-1][3])
	235	#print("null tsval_rtt pykalman mean:", up-op)
	236	#print("null tsval_rtt mean:", numpy.mean([s[2]-s[3] for s in m]))
	237
	238
	239	#testKalman4D(echo_vm_5k)
	240
	241
	242
	243	def testKalman(params=None):
	244	from pykalman import AdditiveUnscentedKalmanFilter,KalmanFilter
	245	train = db.subseries('train','long', offset=0)
	246	test = db.subseries('test','long', offset=0)
	247	measurements = numpy.asarray([(s['unusual_packet'],s['other_packet']) for s in (train+test)])
	248
	249	#kf = KalmanFilter(transition_matrices = [[1, 1], [0, 1]], observation_matrices = [[0.1, 0.5], [-0.3, 0.0]])
	250	kf = KalmanFilter(n_dim_obs=2, n_dim_state=2,
	251	initial_state_mean=[quadsummary([s['unusual_packet'] for s in train]),
	252	quadsummary([s['other_packet'] for s in train])])
	253	#kf = AdditiveUnscentedKalmanFilter(n_dim_obs=2, n_dim_state=2)
	254
	255	if params == None:
	256	start=time.time()
	257	kf = kf.em(measurements[0:len(train)], n_iter=10,
	258	em_vars=('transition_matrices',
	259	'observation_matrices',
	260	'transition_offsets',
	261	'observation_offsets',
	262	'transition_covariance',
	263	'observation_covariance',
	264	'initial_state_covariance'))
	265	params = {'transition_matrices': kf.transition_matrices.tolist(),
	266	'observation_matrices': kf.observation_matrices.tolist(),
	267	'transition_offsets': kf.transition_offsets.tolist(),
	268	'observation_offsets': kf.observation_offsets.tolist(),
	269	'transition_covariance': kf.transition_covariance.tolist(),
	270	'observation_covariance': kf.observation_covariance.tolist(),
	271	'initial_state_mean': kf.initial_state_mean.tolist(),
	272	'initial_state_covariance': kf.initial_state_covariance.tolist()}
	273	print("Learned Params:\n")
	274	import pprint
	275	pprint.pprint(params)
	276	print("pykalman em time: %f" % (time.time()-start))
	277
	278	#kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, **params)
	279
	280	num_obs=10000
	281	for offset in range(50000,100000+num_obs,num_obs):
	282	start=time.time()
	283	kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, **params)
	284	m = measurements[offset:offset+num_obs]
	285	(smoothed_state_means, smoothed_state_covariances) = kf.smooth(m)
	286	print("pykalman smooth time: %f" % (time.time()-start))
	287	up = numpy.mean([m[0] for m in smoothed_state_means])
	288	op = numpy.mean([m[1] for m in smoothed_state_means])
	289	print("packet_rtt pykalman final:", smoothed_state_means[-1][0]-smoothed_state_means[-1][1])
	290	print("packet_rtt pykalman mean:", up-op)
	291	print("packet_rtt mean:", numpy.mean([s[0]-s[1] for s in m]))
	292
	293
	294	#testKalman(ten_iter)
	295
	296
[12]	297	def getTCPTSPrecision():
	298	cursor = db.conn.cursor()
[13]	299	query="""SELECT tcpts_mean FROM meta"""
[12]	300	cursor.execute(query)
	301	row = cursor.fetchone()
	302	if row:
	303	return row[0]
	304	return None
	305
	306
	307	def tsFilteredHistogram():
	308	tcpts_precision = getTCPTSPrecision()
	309
	310	num_bins = 500
	311	all = db.subseries('train','long')+db.subseries('test','long')
	312	diffs = [s['unusual_packet']-s['other_packet'] for s in all]
	313	ts0_diffs = [s['unusual_packet']-s['other_packet'] for s in all if s['unusual_tsval']-s['other_tsval'] == 0]
[14]	314	#ts1_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(s['unusual_tsval']-s['other_tsval']) > 0]
	315	#ts2_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision)) <= 1.0]
	316	ts1_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) == 1]
	317	ts2_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) >= 2]
	318	#ts3_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) == 3]
	319	#ts4_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(int(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision))) == 4]
[12]	320
[14]	321	#ts_mode = statistics.mode([s['unusual_tsval'] for s in all]+[s['other_tsval'] for s in all])
	322	#ts_diff_mode = statistics.mode([s['unusual_tsval']-s['other_tsval'] for s in all])
	323	#ts_common_mode = [s['unusual_packet']-s['other_packet'] for s in all if s['unusual_tsval']<=ts_mode and s['other_tsval']<=ts_mode]
	324	#ts_common_diff_mode = [s['unusual_packet']-s['other_packet'] for s in all if s['unusual_tsval']-s['other_tsval']==ts_diff_mode]
[12]	325
[14]	326	#print('packet_rtt diff quadsummary: %f' % quadsummary(diffs))
	327	#print('packet_rtt tsval diff=0 quadsummary: %f' % quadsummary(ts0_diffs))
	328	#print('packet_rtt tsval diff>0 quadsummary: %f' % quadsummary(ts1_diffs))
	329	#print('packet_rtt tsval diff<=1 quadsummary: %f' % quadsummary(ts2_diffs))
	330	#print('packet_rtt tsval mode quadsummary: %f' % quadsummary(ts_common_mode))
	331	#print(len(diffs), len(ts0_diffs)+len(ts1_diffs))
[12]	332	diffs.sort()
[14]	333	cut_off_low = diffs[int(len(diffs)*0.008)]
	334	cut_off_high = diffs[int(len(diffs)*0.992)]
[12]	335
	336	plt.clf()
	337	# the histogram of the data
	338	n, bins, patches = plt.hist(diffs, num_bins, normed=0, color='black', histtype='step', alpha=0.8,
[14]	339	range=(cut_off_low,cut_off_high), label='All Packets')
[12]	340	n, bins, patches = plt.hist(ts0_diffs, num_bins, normed=0, color='blue', histtype='step', alpha=0.8,
[14]	341	range=(cut_off_low,cut_off_high), label='TSval Difference == 0')
	342	n, bins, patches = plt.hist(ts1_diffs, num_bins, normed=0, color='orange', histtype='step', alpha=0.8,
	343	range=(cut_off_low,cut_off_high), label='TSval Difference == 1')
	344	n, bins, patches = plt.hist(ts2_diffs, num_bins, normed=0, color='red', histtype='step', alpha=0.8,
	345	range=(cut_off_low,cut_off_high), label='TSval Difference >= 2')
	346	#n, bins, patches = plt.hist(ts3_diffs, num_bins, normed=0, color='red', histtype='step', alpha=0.8,
	347	# range=(cut_off_low,cut_off_high), label='tsval diff == 3')
	348	#n, bins, patches = plt.hist(ts4_diffs, num_bins, normed=0, color='brown', histtype='step', alpha=0.8,
	349	# range=(cut_off_low,cut_off_high), label='tsval diff == 4')
[12]	350	#n, bins, patches = plt.hist(ts_common_mode, num_bins, normed=0, color='green', histtype='step', alpha=0.8,
	351	# range=(cut_off_low,cut_off_high), label='tsval common mode')
[14]	352	#n, bins, patches = plt.hist(ts_common_diff_mode, num_bins, normed=0, color='green', histtype='step', alpha=0.8,
	353	# range=(cut_off_low,cut_off_high), label='tsval common diff mode')
[12]	354	plt.xlabel('RTT Difference')
[14]	355	#plt.ylabel('Probability')
	356	#plt.title(r'Histogram - distribution of differences by tsval')
[12]	357
	358	# Tweak spacing to prevent clipping of ylabel
	359	plt.subplots_adjust(left=0.15)
	360	plt.legend()
	361	plt.show()
	362	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	363
[13]	364	#tsFilteredHistogram()
[12]	365
	366
[14]	367	def exampleSummaryHistogram():
	368	num_bins = 300
	369	all = db.subseries('train','long')+db.subseries('test','long')
	370	diffs = [s['unusual_packet']-s['other_packet'] for s in all]
[12]	371
[14]	372	diffs.sort()
	373	cut_off_low = diffs[int(len(diffs)*0.003)]
	374	cut_off_high = diffs[int(len(diffs)*0.997)]
[12]	375
[14]	376	plt.clf()
	377	# the histogram of the data
	378	n, bins, patches = plt.hist(diffs, num_bins, normed=0, color='black', histtype='step', alpha=0.8,
	379	range=(cut_off_low,cut_off_high), label='all')
[12]	380
[14]	381	plt.xlabel('RTT Difference')
	382	plt.ylabel('Probability')
	383	#plt.title(r'Histogram - distribution of differences by tsval')
	384
	385	w = 25
	386	l1,r1,l2,r2,l3,r3 = numpy.percentile(diffs, (50-w,50+w,50-w/2,50+w/2,(50-w)/2,(50+w)/2+50))
	387	#plt.plot([l1, 0], [l1, 0.0001], "k--")
	388	#plt.plot([r1, 0], [r1, 0.0001], "k--")
	389	from matplotlib.patches import FancyBboxPatch
	390	currentAxis = plt.gca()
	391	currentAxis.add_patch(FancyBboxPatch((l1, 0), 2500, 5000, boxstyle='square', facecolor="blue", alpha=0.4, edgecolor='none'))
	392	currentAxis.add_patch(FancyBboxPatch((r1, 0), 2500, 5000, boxstyle='square', facecolor="blue", alpha=0.4, edgecolor='none'))
	393	currentAxis.add_patch(FancyBboxPatch((l2, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
	394	currentAxis.add_patch(FancyBboxPatch((r2, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
	395	currentAxis.add_patch(FancyBboxPatch((l3, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
	396	currentAxis.add_patch(FancyBboxPatch((r3, 0), 2500, 5000, boxstyle='square', facecolor="green", alpha=0.4, edgecolor='none'))
	397	currentAxis.add_patch(FancyBboxPatch((50, 0), 2500, 5000, boxstyle='square', facecolor="black", alpha=0.4, edgecolor='none'))
	398	currentAxis.add_patch(FancyBboxPatch((numpy.mean((l1,r1,l2,r2)), 0), 2500, 5000, boxstyle='square', facecolor="red", alpha=0.4, edgecolor='none'))
	399	#currentAxis.add_patch(FancyBboxPatch((numpy.mean((1000)), 0), 1500, 5000, boxstyle='square', facecolor="black", alpha=0.4, edgecolor='none'))
	400
	401	# Tweak spacing to prevent clipping of ylabel
	402	plt.subplots_adjust(left=0.15)
	403	#plt.legend()
	404	plt.show()
	405	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	406
	407	#exampleSummaryHistogram()
	408
	409
	410
[6]	411	#all_data = longs+shorts
	412	#all_data.sort()
	413	#cut_off_low = all_data[0]
	414	#cut_off_high = all_data[int(len(all_data)*0.997)]
	415
	416
[11]	417	def plotSingleProbe(probe_id=None):
	418	if probe_id == None:
	419	cursor = db.conn.cursor()
	420	query="""SELECT probe_id FROM analysis WHERE suspect='' ORDER BY probe_id DESC limit 1 OFFSET 10"""
	421	cursor.execute(query)
	422	probe_id = cursor.fetchone()[0]
	423
	424	cursor = db.conn.cursor()
	425	query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=1"""
	426	cursor.execute(query, (probe_id,))
	427	pkts = cursor.fetchall()
	428	sent_payload = [row[0] for row in pkts if row[1] != 0]
	429	sent_other = [row[0] for row in pkts if row[1] == 0]
	430
	431	query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=0"""
	432	cursor.execute(query, (probe_id,))
	433	pkts = cursor.fetchall()
	434	rcvd_payload = [row[0] for row in pkts if row[1] != 0]
	435	rcvd_other = [row[0] for row in pkts if row[1] == 0]
	436
	437	#query="""SELECT reported,time_of_day FROM probes WHERE id=?"""
	438	#cursor.execute(query, (probe_id,))
	439	#reported,tod = cursor.fetchone()
	440	#userspace_times = [sent_times[0]-reported/3.0, sent_times[0]+reported]
	441
	442	print("single probe counts:",len(sent_payload),len(sent_other),len(rcvd_payload),len(rcvd_other))
	443	plt.clf()
	444	plt.title("Single HTTP Request - Packet Times")
	445	sp = plt.eventplot(sent_payload, colors=('red',), lineoffsets=8, linewidths=2, alpha=0.6,label='sent')
	446	so = plt.eventplot(sent_other, colors=('red',), lineoffsets=6, linewidths=2, alpha=0.6,label='sent')
	447	rp = plt.eventplot(rcvd_payload, colors=('blue',), lineoffsets=4, linewidths=2, alpha=0.6,label='received')
	448	ro = plt.eventplot(rcvd_other, colors=('blue',), lineoffsets=2, linewidths=2, alpha=0.6,label='received')
	449	#plt.legend((s,r), ('sent','received'))
	450	#plt.savefig('../img/http-packet-times.svg')
	451	plt.show()
	452
	453	#plotSingleProbe()
	454
	455
	456	def graphTestResults():
[14]	457	basename = os.path.basename(options.db_file)
	458	basename,ext = os.path.splitext(basename)
	459
	460	chartname = "/home/tim/blindspot/research/timing-analysis/paper/figures/results/%s.svg" % (basename)
	461	print(chartname)
	462
[11]	463	plt.clf()
	464	plt.title("Test Results")
	465	plt.xlabel('sample size')
	466	plt.ylabel('error rate')
	467	legend = []
	468	colors = ['red','blue','green','purple','orange','black','brown']
	469	color_id = 0
	470
	471	cursor = db.conn.cursor()
	472	query = """
	473	SELECT classifier FROM classifier_results GROUP BY classifier ORDER BY classifier;
	474	"""
	475	cursor.execute(query)
	476	classifiers = []
	477	for c in cursor:
	478	classifiers.append(c[0])
	479
[14]	480	best_obs = []
	481	best_error = []
[13]	482	max_obs = 0
[11]	483	for classifier in classifiers:
	484	query="""
[14]	485	SELECT params,num_observations FROM classifier_results
[11]	486	WHERE trial_type='test'
	487	AND classifier=:classifier
	488	AND (false_positives+false_negatives)/2.0 < 5.0
	489	ORDER BY num_observations,(false_positives+false_negatives)
	490	LIMIT 1
	491	"""
	492	cursor.execute(query, {'classifier':classifier})
	493	row = cursor.fetchone()
	494	if row == None:
	495	query="""
[14]	496	SELECT params,(false_positives+false_negatives)/2 FROM classifier_results
[11]	497	WHERE trial_type='test' and classifier=:classifier
	498	ORDER BY (false_positives+false_negatives),num_observations
	499	LIMIT 1
	500	"""
	501	cursor.execute(query, {'classifier':classifier})
	502	row = cursor.fetchone()
	503	if row == None:
	504	sys.stderr.write("WARN: couldn't find test results for classifier '%s'.\n" % classifier)
	505	continue
	506
[14]	507	best_error.append((row[1], classifier))
	508	else:
	509	best_obs.append((row[1], classifier))
	510
[11]	511	best_params = row[0]
	512	query="""
	513	SELECT num_observations,(false_positives+false_negatives)/2.0 FROM classifier_results
	514	WHERE trial_type='test'
	515	AND classifier=:classifier
	516	AND params=:params
	517	ORDER BY num_observations
	518	"""
	519	cursor.execute(query, {'classifier':classifier,'params':best_params})
	520
	521	num_obs = []
	522	performance = []
	523	for row in cursor:
[13]	524	max_obs = max(max_obs, row[0])
[11]	525	num_obs.append(row[0])
	526	performance.append(row[1])
	527	#print(num_obs,performance)
	528	path = plt.scatter(num_obs, performance, color=colors[color_id], s=4, alpha=0.8, linewidths=3.0)
	529	plt.plot(num_obs, performance, color=colors[color_id], alpha=0.8)
	530	legend.append((classifier,path))
	531	color_id = (color_id+1) % len(colors)
	532
[14]	533	best_obs.sort()
	534	best_error.sort()
	535	winner = None
	536	for bo in best_obs:
	537	sys.stdout.write("%d obs / %s" % bo)
	538	if winner == None:
	539	sys.stdout.write(" (winner)")
	540	winner = bo
	541	print()
	542
	543	for be in best_error:
	544	sys.stdout.write("%f%% error / %s" % be)
	545	if winner == None:
	546	sys.stdout.write(" (winner)")
	547	winner = be
	548	print()
	549
	550	plt.legend([l[1] for l in legend], [l[0] for l in legend], scatterpoints=1, fontsize='x-small')
[13]	551	plt.plot([0, max_obs], [5.0, 5.0], "k--")
[14]	552	plt.xlabel('Number of Observations')
	553	plt.ylabel('Error Rate')
	554	plt.savefig(chartname)
	555	#plt.show()
	556
[11]	557	graphTestResults()
	558
	559	sys.exit(0)
	560
[14]	561	short_overtime = [(sample['time_of_day'],sample['short']) for sample in timeSeries(db,'train','short')]
	562	long_overtime = [(sample['time_of_day'],sample['long']) for sample in timeSeries(db,'train','long')]
	563	diff_overtime = [(sample['time_of_day'],sample['long']-sample['other_cases']) for sample in timeSeries(db,'train','long')]
	564	short_overtime.sort()
	565	long_overtime.sort()
	566	diff_overtime.sort()
	567
[6]	568	plt.clf()
	569	plt.title("Packet RTT over time")
	570	plt.xlabel('Time of Day')
	571	plt.ylabel('RTT')
	572	s = plt.scatter([t for t,rtt in short_overtime], [rtt for t,rtt in short_overtime], s=1, color='red', alpha=0.6)
	573	l = plt.scatter([t for t,rtt in long_overtime], [rtt for t,rtt in long_overtime], s=1, color='blue', alpha=0.6)
	574	d = plt.scatter([t for t,rtt in diff_overtime], [rtt for t,rtt in diff_overtime], s=1, color='purple', alpha=0.6)
	575	plt.legend((s,l,d), ('short','long','difference'), scatterpoints=1)
	576	#plt.savefig('paper/figures/comcast-powerboost1.png')
	577	plt.show()
	578
[11]	579
	580
	581	plt.clf()
	582	plt.title("Simple HTTP Request")
	583	plt.xlabel('Time of Day')
	584	plt.ylabel('')
	585	s = plt.scatter(sent_times, [2]*len(sent_times), s=3, color='red', alpha=0.9)
	586	r = plt.scatter(rcvd_times, [1]*len(rcvd_times), s=3, color='blue', alpha=0.9)
	587	plt.legend((s,r), ('sent','received'), scatterpoints=1)
	588	plt.show()
	589
	590	sys.exit(0)
[6]	591	short_overtime,long_overtime,diff_overtime = None,None,None
	592
	593
	594	num_bins = 300
	595	reported_diffs.sort()
	596	cut_off_low = reported_diffs[int(len(diffs)*0.003)]
	597	cut_off_high = reported_diffs[int(len(diffs)*0.997)]
	598
	599	plt.clf()
	600	# the histogram of the data
	601	n, bins, patches = plt.hist(reported_diffs, num_bins, normed=1, color='black', histtype='step', alpha=0.8,
	602	range=(cut_off_low,cut_off_high))
	603	plt.xlabel('RTT Difference')
	604	plt.ylabel('Probability')
	605	plt.title(r'Histogram - distribution of differences')
	606
	607	# Tweak spacing to prevent clipping of ylabel
	608	plt.subplots_adjust(left=0.15)
	609	#plt.legend()
	610	plt.show()
	611	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	612
	613
	614
	615
	616	num_bins = 300
	617	diffs.sort()
	618	cut_off_low = diffs[int(len(diffs)*0.003)]
	619	cut_off_high = diffs[int(len(diffs)*0.997)]
	620
	621	plt.clf()
	622	# the histogram of the data
	623	n, bins, patches = plt.hist(diffs, num_bins, normed=1, color='purple', histtype='step', alpha=0.8,
	624	range=(cut_off_low,cut_off_high))
	625	plt.xlabel('RTT Difference')
	626	plt.ylabel('Probability')
	627	plt.title(r'Histogram - distribution of differences')
	628
	629	# Tweak spacing to prevent clipping of ylabel
	630	plt.subplots_adjust(left=0.15)
	631	#plt.legend()
	632	plt.show()
	633	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	634
	635	sys.exit(0)
	636
	637
	638
	639	num_bins = 150
	640	# the histogram of the data
	641	n, bins, patches = plt.hist((shorts,longs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8,
	642	range=(cut_off_low,cut_off_high))
	643	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
	644	# add a 'best fit' line
	645	#y = mlab.normpdf(bins, mu, sigma)
	646	#plt.plot(bins, y, 'r--')
	647	plt.xlabel('packet_rtt')
	648	plt.ylabel('Probability')
	649	plt.title(r'Histogram - RTT short and long')
	650
	651	# Tweak spacing to prevent clipping of ylabel
	652	plt.subplots_adjust(left=0.15)
	653	plt.legend()
	654	#plt.show()
	655	plt.savefig('paper/figures/comcast-powerboost2.svg')
	656
	657
	658
	659
	660	num_trials = 200
	661
	662
	663	subsample_sizes = (50,150,300,500,700,1000,2000,3000,5000,7000,10000,15000,20000)
	664	estimator = functools.partial(boxTest, 0.07, 0.08)
	665	performance = []
	666	for subsample_size in subsample_sizes:
	667	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
	668	performance.append(100.0*len([e for e in estimates if e == 1])/num_trials)
	669
	670	null_performance = []
	671	for subsample_size in subsample_sizes:
	672	null_estimates = bootstrap(null_derived, subsample_size, num_trials, estimator)
	673	null_performance.append(100.0*len([e for e in null_estimates if e == 0])/num_trials)
	674
	675	plt.clf()
	676	plt.title("boxTest bootstrap")
	677	plt.xlabel('sample size')
	678	plt.ylabel('performance')
	679	plt.scatter(subsample_sizes, performance, s=2, color='red', alpha=0.6)
	680	plt.scatter(subsample_sizes, null_performance, s=2, color='blue', alpha=0.6)
	681	plt.show()
	682
	683
	684
	685	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
	686	estimator = diffMedian
	687	performance = []
	688	for subsample_size in subsample_sizes:
	689	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
	690	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
	691
	692	plt.clf()
	693	plt.title("diff median bootstrap")
	694	plt.xlabel('sample size')
	695	plt.ylabel('performance')
	696	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
	697	plt.show()
	698
	699
	700
	701
	702	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
	703	weight_funcs = (linearWeights, prunedWeights)
	704	for wf in weight_funcs:
	705	estimator = functools.partial(estimateMean, hypotenuse, wf, 0.40)
	706	performance = []
	707	for subsample_size in subsample_sizes:
	708	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
	709	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
	710
	711	plt.clf()
	712	plt.title(repr(wf))
	713	plt.xlabel('sample size')
	714	plt.ylabel('performance')
	715	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
	716	plt.show()
	717
	718
	719
	720	num_bins = 300
	721	# the histogram of the data
	722	n, bins, patches = plt.hist((tsshorts,tslongs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8)
	723	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
	724	# add a 'best fit' line
	725	#y = mlab.normpdf(bins, mu, sigma)
	726	#plt.plot(bins, y, 'r--')
	727	plt.xlabel('packet_rtt')
	728	plt.ylabel('Probability')
	729	plt.title(r'Histogram - tsval_rtt short vs long')
	730
	731	# Tweak spacing to prevent clipping of ylabel
	732	plt.subplots_adjust(left=0.15)
	733	plt.legend()
	734	plt.show()
	735
	736
	737
	738
	739	####
	740	#trust_methods = [min,max,sum,difference,product]
	741	trust_methods = [sum,product,hypotenuse]
	742	colors = ['red','blue','green','purple','orange','black']
	743	weight_methods = [prunedWeights, linearWeights]
	744	alphas = [i/100.0 for i in range(0,100,2)]
	745
	746
	747
	748
	749	plt.clf()
	750	plt.title(r'Trust Method Comparison - Linear')
	751	plt.xlabel('Alpha')
	752	plt.ylabel('Mean error')
	753	paths = []
	754	for tm in trust_methods:
	755	trust = trustValues(derived, tm)
	756	series = []
	757	for alpha in alphas:
	758	weights = linearWeights(derived, trust, alpha)
	759	series.append(weightedMean(derived, weights) - expected_mean)
	760
	761	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	762
	763	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	764	plt.show()
	765
	766
	767
	768	plt.clf()
	769	plt.title(r'Trust Method Comparison - Pruned')
	770	plt.xlabel('Alpha')
	771	plt.ylabel('Mean error')
	772	paths = []
	773	for tm in trust_methods:
	774	trust = trustValues(derived, tm)
	775	series = []
	776	for alpha in alphas:
	777	weights = prunedWeights(derived, trust, alpha)
	778	series.append(weightedMean(derived, weights) - expected_mean)
	779
	780	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	781
	782	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	783	plt.show()
	784
	785
	786	sys.exit(0)
	787
	788	plt.clf()
	789	plt.title(r'Trust Method Comparison - Inverted')
	790	plt.xlabel('Alpha')
	791	plt.ylabel('Mean error')
	792	paths = []
	793	for tm in trust_methods:
	794	trust = trustValues(derived, tm)
	795	series = []
	796	for alpha in alphas:
	797	weights = invertedWeights(derived, trust, alpha)
	798	series.append(weightedMean(derived, weights) - expected_mean)
	799
	800	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	801
	802	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	803	plt.show()
	804
	805
	806	plt.clf()
	807	plt.title(r'Trust Method Comparison - Arctangent')
	808	plt.xlabel('Alpha')
	809	plt.ylabel('Mean error')
	810	paths = []
	811	for tm in trust_methods:
	812	trust = trustValues(derived, tm)
	813	series = []
	814	for alpha in alphas:
	815	weights = arctanWeights(derived, trust, alpha)
	816	series.append(weightedMean(derived, weights) - expected_mean)
	817
	818	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	819
	820	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	821	plt.show()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: