Context Navigation

source: trunk/bin/graph @ 11

Last change on this file since 11 was 11, checked in by tim, 10 years ago
.
Property svn:executable set to ``*
File size: 16.2 KB

Rev	Line
[6]	1	#!/usr/bin/env python3
	2
	3	import sys
	4	import os
	5	import time
	6	import random
	7	import tempfile
	8	import argparse
	9	import socket
	10	import json
	11
[10]	12	import numpy
[6]	13	import matplotlib.mlab as mlab
	14	import matplotlib.pyplot as plt
	15
	16
	17	VERSION = "{DEVELOPMENT}"
	18	if VERSION == "{DEVELOPMENT}":
	19	script_dir = '.'
	20	try:
	21	script_dir = os.path.dirname(os.path.realpath(__file__))
	22	except:
	23	try:
	24	script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
	25	except:
	26	pass
	27	sys.path.append("%s/../lib" % script_dir)
	28
	29	from nanownlib import *
	30	from nanownlib.stats import *
	31	import nanownlib.storage
	32
	33
	34	parser = argparse.ArgumentParser(
	35	description="")
	36	parser.add_argument('db_file', default=None,
	37	help='')
	38	options = parser.parse_args()
	39	db = nanownlib.storage.db(options.db_file)
	40
	41
[11]	42	def differences(db, unusual_case, rtt_type='packet'):
	43	ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train', unusual_case)]
	44	ret_val += [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('test', unusual_case)]
	45	return ret_val
[6]	46
[11]	47	def null_differences(db, unusual_case, rtt_type='packet'):
	48	ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train_null', unusual_case)]
	49	return ret_val
[6]	50
[11]	51
[6]	52	def timeSeries(db, probe_type, unusual_case):
	53	cursor = db.conn.cursor()
	54	query="""
	55	SELECT time_of_day,packet_rtt AS uc,(SELECT avg(packet_rtt) FROM probes,analysis
	56	WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS oc
	57	FROM (SELECT time_of_day,probes.sample,packet_rtt FROM probes,analysis
	58	WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
	59	"""
	60
	61	params = {"probe_type":probe_type,"unusual_case":unusual_case}
	62	cursor.execute(query, params)
	63	for row in cursor:
	64	yield {'time_of_day':row['time_of_day'],unusual_case:row['uc'],'other_cases':row['oc']}
	65	#samples,derived,null_derived = parse_data(input1)
	66
	67	#trust = trustValues(derived, sum)
	68	#weights = linearWeights(derived, trust, 0.25)
	69	#print('(test): %f' % weightedMean(derived,weights))
	70
	71	diffs = list(differences(db, 'long'))
	72	reported_diffs = list(differences(db, 'long', 'reported'))
	73	#shorts = [s['packet_rtt'] for s in samples.values() if s['test_case']=='short']
	74	#longs = [s['packet_rtt'] for s in samples.values() if s['test_case']=='long']
	75
	76	short_overtime = [(sample['time_of_day'],sample['short']) for sample in timeSeries(db,'train','short')]
	77	long_overtime = [(sample['time_of_day'],sample['long']) for sample in timeSeries(db,'train','long')]
	78	diff_overtime = [(sample['time_of_day'],sample['long']-sample['other_cases']) for sample in timeSeries(db,'train','long')]
	79	short_overtime.sort()
	80	long_overtime.sort()
	81	diff_overtime.sort()
	82
	83	print('packet_rtt diff median: %f' % statistics.median(diffs))
[10]	84	print('packet_rtt diff midhinge: %f' % midsummary(diffs))
[6]	85	print('packet_rtt diff trimean: %f' % trimean(diffs))
[10]	86	print('packet_rtt diff quadsummary: %f' % quadsummary(diffs))
	87	print('packet_rtt diff ubersummary: %f' % ubersummary(diffs))
[6]	88	print('packet_rtt diff MAD: %f' % mad(diffs))
[11]	89	try:
	90	print('reported diff trimean: %f' % trimean(reported_diffs))
	91	print('reported diff quadsummary: %f' % quadsummary(reported_diffs))
	92	print('reported diff ubersummary: %f' % ubersummary(reported_diffs))
	93	print('reported diff MAD: %f' % mad(reported_diffs))
[6]	94
[11]	95	import cProfile
	96	start = time.time()
	97	kresults = kfilter({},diffs)
	98	#print('packet_rtt diff kfilter: ', numpy.mean(kresults['est']), kresults['var'])
	99	print('packet_rtt diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
	100	kresults = kfilter({},reported_diffs)
	101	#print('reported diff kfilter: ', numpy.mean(kresults['est']), kresults['var'][-1])
	102	print('reported diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
	103	print("kfilter time: %f" % (time.time()-start))
	104	except:
	105	pass
[6]	106
[11]	107	print('tsval diff mean: %f' % numpy.mean(differences(db, 'long', 'tsval')))
	108	print('tsval null diff mean: %f' % numpy.mean(null_differences(db, 'long', 'tsval')))
	109	print('tsval diff weighted mean: %f' % tsvalwmean(db.subseries('train','long')+db.subseries('test','long')))
	110	print('tsval null diff weighted mean: %f' % tsvalwmean(db.subseries('train_null','long')))
[10]	111
[11]	112
[6]	113	#all_data = longs+shorts
	114	#all_data.sort()
	115	#cut_off_low = all_data[0]
	116	#cut_off_high = all_data[int(len(all_data)*0.997)]
	117
	118
[11]	119	def plotSingleProbe(probe_id=None):
	120	if probe_id == None:
	121	cursor = db.conn.cursor()
	122	query="""SELECT probe_id FROM analysis WHERE suspect='' ORDER BY probe_id DESC limit 1 OFFSET 10"""
	123	cursor.execute(query)
	124	probe_id = cursor.fetchone()[0]
	125
	126	cursor = db.conn.cursor()
	127	query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=1"""
	128	cursor.execute(query, (probe_id,))
	129	pkts = cursor.fetchall()
	130	sent_payload = [row[0] for row in pkts if row[1] != 0]
	131	sent_other = [row[0] for row in pkts if row[1] == 0]
	132
	133	query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=0"""
	134	cursor.execute(query, (probe_id,))
	135	pkts = cursor.fetchall()
	136	rcvd_payload = [row[0] for row in pkts if row[1] != 0]
	137	rcvd_other = [row[0] for row in pkts if row[1] == 0]
	138
	139	#query="""SELECT reported,time_of_day FROM probes WHERE id=?"""
	140	#cursor.execute(query, (probe_id,))
	141	#reported,tod = cursor.fetchone()
	142	#userspace_times = [sent_times[0]-reported/3.0, sent_times[0]+reported]
	143
	144	print("single probe counts:",len(sent_payload),len(sent_other),len(rcvd_payload),len(rcvd_other))
	145	plt.clf()
	146	plt.title("Single HTTP Request - Packet Times")
	147	sp = plt.eventplot(sent_payload, colors=('red',), lineoffsets=8, linewidths=2, alpha=0.6,label='sent')
	148	so = plt.eventplot(sent_other, colors=('red',), lineoffsets=6, linewidths=2, alpha=0.6,label='sent')
	149	rp = plt.eventplot(rcvd_payload, colors=('blue',), lineoffsets=4, linewidths=2, alpha=0.6,label='received')
	150	ro = plt.eventplot(rcvd_other, colors=('blue',), lineoffsets=2, linewidths=2, alpha=0.6,label='received')
	151	#plt.legend((s,r), ('sent','received'))
	152	#plt.savefig('../img/http-packet-times.svg')
	153	plt.show()
	154
	155	#plotSingleProbe()
	156
	157
	158	def graphTestResults():
	159	plt.clf()
	160	plt.title("Test Results")
	161	plt.xlabel('sample size')
	162	plt.ylabel('error rate')
	163	legend = []
	164	colors = ['red','blue','green','purple','orange','black','brown']
	165	color_id = 0
	166
	167	cursor = db.conn.cursor()
	168	query = """
	169	SELECT classifier FROM classifier_results GROUP BY classifier ORDER BY classifier;
	170	"""
	171	cursor.execute(query)
	172	classifiers = []
	173	for c in cursor:
	174	classifiers.append(c[0])
	175
	176	for classifier in classifiers:
	177	query="""
	178	SELECT params FROM classifier_results
	179	WHERE trial_type='test'
	180	AND classifier=:classifier
	181	AND (false_positives+false_negatives)/2.0 < 5.0
	182	ORDER BY num_observations,(false_positives+false_negatives)
	183	LIMIT 1
	184	"""
	185	cursor.execute(query, {'classifier':classifier})
	186	row = cursor.fetchone()
	187	if row == None:
	188	query="""
	189	SELECT params FROM classifier_results
	190	WHERE trial_type='test' and classifier=:classifier
	191	ORDER BY (false_positives+false_negatives),num_observations
	192	LIMIT 1
	193	"""
	194	cursor.execute(query, {'classifier':classifier})
	195	row = cursor.fetchone()
	196	if row == None:
	197	sys.stderr.write("WARN: couldn't find test results for classifier '%s'.\n" % classifier)
	198	continue
	199
	200	best_params = row[0]
	201	query="""
	202	SELECT num_observations,(false_positives+false_negatives)/2.0 FROM classifier_results
	203	WHERE trial_type='test'
	204	AND classifier=:classifier
	205	AND params=:params
	206	ORDER BY num_observations
	207	"""
	208	cursor.execute(query, {'classifier':classifier,'params':best_params})
	209
	210	num_obs = []
	211	performance = []
	212	for row in cursor:
	213	num_obs.append(row[0])
	214	performance.append(row[1])
	215	#print(num_obs,performance)
	216	path = plt.scatter(num_obs, performance, color=colors[color_id], s=4, alpha=0.8, linewidths=3.0)
	217	plt.plot(num_obs, performance, color=colors[color_id], alpha=0.8)
	218	legend.append((classifier,path))
	219	color_id = (color_id+1) % len(colors)
	220
	221	plt.legend([l[1] for l in legend], [l[0] for l in legend], scatterpoints=1, fontsize='xx-small')
	222	plt.show()
	223
	224	graphTestResults()
	225
	226	sys.exit(0)
	227
[6]	228	plt.clf()
	229	plt.title("Packet RTT over time")
	230	plt.xlabel('Time of Day')
	231	plt.ylabel('RTT')
	232	s = plt.scatter([t for t,rtt in short_overtime], [rtt for t,rtt in short_overtime], s=1, color='red', alpha=0.6)
	233	l = plt.scatter([t for t,rtt in long_overtime], [rtt for t,rtt in long_overtime], s=1, color='blue', alpha=0.6)
	234	d = plt.scatter([t for t,rtt in diff_overtime], [rtt for t,rtt in diff_overtime], s=1, color='purple', alpha=0.6)
	235	plt.legend((s,l,d), ('short','long','difference'), scatterpoints=1)
	236	#plt.savefig('paper/figures/comcast-powerboost1.png')
	237	plt.show()
	238
[11]	239
	240
	241	plt.clf()
	242	plt.title("Simple HTTP Request")
	243	plt.xlabel('Time of Day')
	244	plt.ylabel('')
	245	s = plt.scatter(sent_times, [2]*len(sent_times), s=3, color='red', alpha=0.9)
	246	r = plt.scatter(rcvd_times, [1]*len(rcvd_times), s=3, color='blue', alpha=0.9)
	247	plt.legend((s,r), ('sent','received'), scatterpoints=1)
	248	plt.show()
	249
	250	sys.exit(0)
[6]	251	short_overtime,long_overtime,diff_overtime = None,None,None
	252
	253
	254	num_bins = 300
	255	reported_diffs.sort()
	256	cut_off_low = reported_diffs[int(len(diffs)*0.003)]
	257	cut_off_high = reported_diffs[int(len(diffs)*0.997)]
	258
	259	plt.clf()
	260	# the histogram of the data
	261	n, bins, patches = plt.hist(reported_diffs, num_bins, normed=1, color='black', histtype='step', alpha=0.8,
	262	range=(cut_off_low,cut_off_high))
	263	plt.xlabel('RTT Difference')
	264	plt.ylabel('Probability')
	265	plt.title(r'Histogram - distribution of differences')
	266
	267	# Tweak spacing to prevent clipping of ylabel
	268	plt.subplots_adjust(left=0.15)
	269	#plt.legend()
	270	plt.show()
	271	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	272
	273
	274
	275
	276	num_bins = 300
	277	diffs.sort()
	278	cut_off_low = diffs[int(len(diffs)*0.003)]
	279	cut_off_high = diffs[int(len(diffs)*0.997)]
	280
	281	plt.clf()
	282	# the histogram of the data
	283	n, bins, patches = plt.hist(diffs, num_bins, normed=1, color='purple', histtype='step', alpha=0.8,
	284	range=(cut_off_low,cut_off_high))
	285	plt.xlabel('RTT Difference')
	286	plt.ylabel('Probability')
	287	plt.title(r'Histogram - distribution of differences')
	288
	289	# Tweak spacing to prevent clipping of ylabel
	290	plt.subplots_adjust(left=0.15)
	291	#plt.legend()
	292	plt.show()
	293	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	294
	295	sys.exit(0)
	296
	297
	298
	299	num_bins = 150
	300	# the histogram of the data
	301	n, bins, patches = plt.hist((shorts,longs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8,
	302	range=(cut_off_low,cut_off_high))
	303	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
	304	# add a 'best fit' line
	305	#y = mlab.normpdf(bins, mu, sigma)
	306	#plt.plot(bins, y, 'r--')
	307	plt.xlabel('packet_rtt')
	308	plt.ylabel('Probability')
	309	plt.title(r'Histogram - RTT short and long')
	310
	311	# Tweak spacing to prevent clipping of ylabel
	312	plt.subplots_adjust(left=0.15)
	313	plt.legend()
	314	#plt.show()
	315	plt.savefig('paper/figures/comcast-powerboost2.svg')
	316
	317
	318
	319
	320	num_trials = 200
	321
	322
	323	subsample_sizes = (50,150,300,500,700,1000,2000,3000,5000,7000,10000,15000,20000)
	324	estimator = functools.partial(boxTest, 0.07, 0.08)
	325	performance = []
	326	for subsample_size in subsample_sizes:
	327	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
	328	performance.append(100.0*len([e for e in estimates if e == 1])/num_trials)
	329
	330	null_performance = []
	331	for subsample_size in subsample_sizes:
	332	null_estimates = bootstrap(null_derived, subsample_size, num_trials, estimator)
	333	null_performance.append(100.0*len([e for e in null_estimates if e == 0])/num_trials)
	334
	335	plt.clf()
	336	plt.title("boxTest bootstrap")
	337	plt.xlabel('sample size')
	338	plt.ylabel('performance')
	339	plt.scatter(subsample_sizes, performance, s=2, color='red', alpha=0.6)
	340	plt.scatter(subsample_sizes, null_performance, s=2, color='blue', alpha=0.6)
	341	plt.show()
	342
	343
	344
	345	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
	346	estimator = diffMedian
	347	performance = []
	348	for subsample_size in subsample_sizes:
	349	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
	350	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
	351
	352	plt.clf()
	353	plt.title("diff median bootstrap")
	354	plt.xlabel('sample size')
	355	plt.ylabel('performance')
	356	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
	357	plt.show()
	358
	359
	360
	361
	362	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
	363	weight_funcs = (linearWeights, prunedWeights)
	364	for wf in weight_funcs:
	365	estimator = functools.partial(estimateMean, hypotenuse, wf, 0.40)
	366	performance = []
	367	for subsample_size in subsample_sizes:
	368	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
	369	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
	370
	371	plt.clf()
	372	plt.title(repr(wf))
	373	plt.xlabel('sample size')
	374	plt.ylabel('performance')
	375	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
	376	plt.show()
	377
	378
	379
	380	num_bins = 300
	381	# the histogram of the data
	382	n, bins, patches = plt.hist((tsshorts,tslongs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8)
	383	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
	384	# add a 'best fit' line
	385	#y = mlab.normpdf(bins, mu, sigma)
	386	#plt.plot(bins, y, 'r--')
	387	plt.xlabel('packet_rtt')
	388	plt.ylabel('Probability')
	389	plt.title(r'Histogram - tsval_rtt short vs long')
	390
	391	# Tweak spacing to prevent clipping of ylabel
	392	plt.subplots_adjust(left=0.15)
	393	plt.legend()
	394	plt.show()
	395
	396
	397
	398
	399	####
	400	#trust_methods = [min,max,sum,difference,product]
	401	trust_methods = [sum,product,hypotenuse]
	402	colors = ['red','blue','green','purple','orange','black']
	403	weight_methods = [prunedWeights, linearWeights]
	404	alphas = [i/100.0 for i in range(0,100,2)]
	405
	406
	407
	408
	409	plt.clf()
	410	plt.title(r'Trust Method Comparison - Linear')
	411	plt.xlabel('Alpha')
	412	plt.ylabel('Mean error')
	413	paths = []
	414	for tm in trust_methods:
	415	trust = trustValues(derived, tm)
	416	series = []
	417	for alpha in alphas:
	418	weights = linearWeights(derived, trust, alpha)
	419	series.append(weightedMean(derived, weights) - expected_mean)
	420
	421	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	422
	423	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	424	plt.show()
	425
	426
	427
	428	plt.clf()
	429	plt.title(r'Trust Method Comparison - Pruned')
	430	plt.xlabel('Alpha')
	431	plt.ylabel('Mean error')
	432	paths = []
	433	for tm in trust_methods:
	434	trust = trustValues(derived, tm)
	435	series = []
	436	for alpha in alphas:
	437	weights = prunedWeights(derived, trust, alpha)
	438	series.append(weightedMean(derived, weights) - expected_mean)
	439
	440	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	441
	442	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	443	plt.show()
	444
	445
	446	sys.exit(0)
	447
	448	plt.clf()
	449	plt.title(r'Trust Method Comparison - Inverted')
	450	plt.xlabel('Alpha')
	451	plt.ylabel('Mean error')
	452	paths = []
	453	for tm in trust_methods:
	454	trust = trustValues(derived, tm)
	455	series = []
	456	for alpha in alphas:
	457	weights = invertedWeights(derived, trust, alpha)
	458	series.append(weightedMean(derived, weights) - expected_mean)
	459
	460	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	461
	462	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	463	plt.show()
	464
	465
	466	plt.clf()
	467	plt.title(r'Trust Method Comparison - Arctangent')
	468	plt.xlabel('Alpha')
	469	plt.ylabel('Mean error')
	470	paths = []
	471	for tm in trust_methods:
	472	trust = trustValues(derived, tm)
	473	series = []
	474	for alpha in alphas:
	475	weights = arctanWeights(derived, trust, alpha)
	476	series.append(weightedMean(derived, weights) - expected_mean)
	477
	478	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	479
	480	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	481	plt.show()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: