Context Navigation

source: trunk/bin/graph @ 10

Last change on this file since 10 was 10, checked in by tim, 10 years ago
.
Property svn:executable set to ``*
File size: 11.5 KB

Rev	Line
[6]	1	#!/usr/bin/env python3
	2
	3	import sys
	4	import os
	5	import time
	6	import random
	7	import tempfile
	8	import argparse
	9	import socket
	10	import json
	11
[10]	12	import numpy
[6]	13	import matplotlib.mlab as mlab
	14	import matplotlib.pyplot as plt
	15
	16
	17	VERSION = "{DEVELOPMENT}"
	18	if VERSION == "{DEVELOPMENT}":
	19	script_dir = '.'
	20	try:
	21	script_dir = os.path.dirname(os.path.realpath(__file__))
	22	except:
	23	try:
	24	script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
	25	except:
	26	pass
	27	sys.path.append("%s/../lib" % script_dir)
	28
	29	from nanownlib import *
	30	from nanownlib.stats import *
	31	import nanownlib.storage
	32
	33
	34	parser = argparse.ArgumentParser(
	35	description="")
	36	parser.add_argument('db_file', default=None,
	37	help='')
	38	options = parser.parse_args()
	39	db = nanownlib.storage.db(options.db_file)
	40
	41
	42	def differences(db, unusual_case, column='packet_rtt'):
	43	cursor = db.conn.cursor()
	44	query="""
	45	SELECT %(column)s-(SELECT avg(%(column)s) FROM probes,analysis
	46	WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type in ('train','test') AND sample=u.sample)
	47	FROM (SELECT probes.sample,%(column)s FROM probes,analysis
	48	WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type in ('train','test')) u
	49	""" % {"column":column}
	50	params = {"unusual_case":unusual_case}
	51	cursor.execute(query, params)
	52	for row in cursor:
	53	yield row[0]
	54
	55
	56	def timeSeries(db, probe_type, unusual_case):
	57	cursor = db.conn.cursor()
	58	query="""
	59	SELECT time_of_day,packet_rtt AS uc,(SELECT avg(packet_rtt) FROM probes,analysis
	60	WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS oc
	61	FROM (SELECT time_of_day,probes.sample,packet_rtt FROM probes,analysis
	62	WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
	63	"""
	64
	65	params = {"probe_type":probe_type,"unusual_case":unusual_case}
	66	cursor.execute(query, params)
	67	for row in cursor:
	68	yield {'time_of_day':row['time_of_day'],unusual_case:row['uc'],'other_cases':row['oc']}
	69	#samples,derived,null_derived = parse_data(input1)
	70
	71	#trust = trustValues(derived, sum)
	72	#weights = linearWeights(derived, trust, 0.25)
	73	#print('(test): %f' % weightedMean(derived,weights))
	74
	75	diffs = list(differences(db, 'long'))
	76	reported_diffs = list(differences(db, 'long', 'reported'))
	77	#shorts = [s['packet_rtt'] for s in samples.values() if s['test_case']=='short']
	78	#longs = [s['packet_rtt'] for s in samples.values() if s['test_case']=='long']
	79
	80	short_overtime = [(sample['time_of_day'],sample['short']) for sample in timeSeries(db,'train','short')]
	81	long_overtime = [(sample['time_of_day'],sample['long']) for sample in timeSeries(db,'train','long')]
	82	diff_overtime = [(sample['time_of_day'],sample['long']-sample['other_cases']) for sample in timeSeries(db,'train','long')]
	83	short_overtime.sort()
	84	long_overtime.sort()
	85	diff_overtime.sort()
	86
	87	print('packet_rtt diff median: %f' % statistics.median(diffs))
[10]	88	print('packet_rtt diff midhinge: %f' % midsummary(diffs))
[6]	89	print('packet_rtt diff trimean: %f' % trimean(diffs))
[10]	90	print('packet_rtt diff quadsummary: %f' % quadsummary(diffs))
	91	print('packet_rtt diff ubersummary: %f' % ubersummary(diffs))
[6]	92	print('packet_rtt diff MAD: %f' % mad(diffs))
	93	print('reported diff trimean: %f' % trimean(reported_diffs))
[10]	94	print('reported diff quadsummary: %f' % quadsummary(reported_diffs))
	95	print('reported diff ubersummary: %f' % ubersummary(reported_diffs))
[6]	96	print('reported diff MAD: %f' % mad(reported_diffs))
	97
[10]	98	import cProfile
	99	kresults = kfilter({},diffs)
	100	#print('packet_rtt diff kfilter: ', numpy.mean(kresults['est']), kresults['var'])
	101	print('packet_rtt diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
	102	kresults = kfilter({},reported_diffs)
	103	#print('reported diff kfilter: ', numpy.mean(kresults['est']), kresults['var'][-1])
	104	print('reported diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
[6]	105
[10]	106
[6]	107	#all_data = longs+shorts
	108	#all_data.sort()
	109	#cut_off_low = all_data[0]
	110	#cut_off_high = all_data[int(len(all_data)*0.997)]
	111
	112
	113	plt.clf()
	114	plt.title("Packet RTT over time")
	115	plt.xlabel('Time of Day')
	116	plt.ylabel('RTT')
	117	s = plt.scatter([t for t,rtt in short_overtime], [rtt for t,rtt in short_overtime], s=1, color='red', alpha=0.6)
	118	l = plt.scatter([t for t,rtt in long_overtime], [rtt for t,rtt in long_overtime], s=1, color='blue', alpha=0.6)
	119	d = plt.scatter([t for t,rtt in diff_overtime], [rtt for t,rtt in diff_overtime], s=1, color='purple', alpha=0.6)
	120	plt.legend((s,l,d), ('short','long','difference'), scatterpoints=1)
	121	#plt.savefig('paper/figures/comcast-powerboost1.png')
	122	plt.show()
	123
	124	short_overtime,long_overtime,diff_overtime = None,None,None
	125
	126
	127	num_bins = 300
	128	reported_diffs.sort()
	129	cut_off_low = reported_diffs[int(len(diffs)*0.003)]
	130	cut_off_high = reported_diffs[int(len(diffs)*0.997)]
	131
	132	plt.clf()
	133	# the histogram of the data
	134	n, bins, patches = plt.hist(reported_diffs, num_bins, normed=1, color='black', histtype='step', alpha=0.8,
	135	range=(cut_off_low,cut_off_high))
	136	plt.xlabel('RTT Difference')
	137	plt.ylabel('Probability')
	138	plt.title(r'Histogram - distribution of differences')
	139
	140	# Tweak spacing to prevent clipping of ylabel
	141	plt.subplots_adjust(left=0.15)
	142	#plt.legend()
	143	plt.show()
	144	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	145
	146
	147
	148
	149	num_bins = 300
	150	diffs.sort()
	151	cut_off_low = diffs[int(len(diffs)*0.003)]
	152	cut_off_high = diffs[int(len(diffs)*0.997)]
	153
	154	plt.clf()
	155	# the histogram of the data
	156	n, bins, patches = plt.hist(diffs, num_bins, normed=1, color='purple', histtype='step', alpha=0.8,
	157	range=(cut_off_low,cut_off_high))
	158	plt.xlabel('RTT Difference')
	159	plt.ylabel('Probability')
	160	plt.title(r'Histogram - distribution of differences')
	161
	162	# Tweak spacing to prevent clipping of ylabel
	163	plt.subplots_adjust(left=0.15)
	164	#plt.legend()
	165	plt.show()
	166	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	167
	168	sys.exit(0)
	169
	170
	171
	172	num_bins = 150
	173	# the histogram of the data
	174	n, bins, patches = plt.hist((shorts,longs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8,
	175	range=(cut_off_low,cut_off_high))
	176	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
	177	# add a 'best fit' line
	178	#y = mlab.normpdf(bins, mu, sigma)
	179	#plt.plot(bins, y, 'r--')
	180	plt.xlabel('packet_rtt')
	181	plt.ylabel('Probability')
	182	plt.title(r'Histogram - RTT short and long')
	183
	184	# Tweak spacing to prevent clipping of ylabel
	185	plt.subplots_adjust(left=0.15)
	186	plt.legend()
	187	#plt.show()
	188	plt.savefig('paper/figures/comcast-powerboost2.svg')
	189
	190
	191
	192
	193	num_trials = 200
	194
	195
	196	subsample_sizes = (50,150,300,500,700,1000,2000,3000,5000,7000,10000,15000,20000)
	197	estimator = functools.partial(boxTest, 0.07, 0.08)
	198	performance = []
	199	for subsample_size in subsample_sizes:
	200	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
	201	performance.append(100.0*len([e for e in estimates if e == 1])/num_trials)
	202
	203	null_performance = []
	204	for subsample_size in subsample_sizes:
	205	null_estimates = bootstrap(null_derived, subsample_size, num_trials, estimator)
	206	null_performance.append(100.0*len([e for e in null_estimates if e == 0])/num_trials)
	207
	208	plt.clf()
	209	plt.title("boxTest bootstrap")
	210	plt.xlabel('sample size')
	211	plt.ylabel('performance')
	212	plt.scatter(subsample_sizes, performance, s=2, color='red', alpha=0.6)
	213	plt.scatter(subsample_sizes, null_performance, s=2, color='blue', alpha=0.6)
	214	plt.show()
	215
	216
	217
	218	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
	219	estimator = diffMedian
	220	performance = []
	221	for subsample_size in subsample_sizes:
	222	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
	223	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
	224
	225	plt.clf()
	226	plt.title("diff median bootstrap")
	227	plt.xlabel('sample size')
	228	plt.ylabel('performance')
	229	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
	230	plt.show()
	231
	232
	233
	234
	235	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
	236	weight_funcs = (linearWeights, prunedWeights)
	237	for wf in weight_funcs:
	238	estimator = functools.partial(estimateMean, hypotenuse, wf, 0.40)
	239	performance = []
	240	for subsample_size in subsample_sizes:
	241	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
	242	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
	243
	244	plt.clf()
	245	plt.title(repr(wf))
	246	plt.xlabel('sample size')
	247	plt.ylabel('performance')
	248	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
	249	plt.show()
	250
	251
	252
	253	num_bins = 300
	254	# the histogram of the data
	255	n, bins, patches = plt.hist((tsshorts,tslongs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8)
	256	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
	257	# add a 'best fit' line
	258	#y = mlab.normpdf(bins, mu, sigma)
	259	#plt.plot(bins, y, 'r--')
	260	plt.xlabel('packet_rtt')
	261	plt.ylabel('Probability')
	262	plt.title(r'Histogram - tsval_rtt short vs long')
	263
	264	# Tweak spacing to prevent clipping of ylabel
	265	plt.subplots_adjust(left=0.15)
	266	plt.legend()
	267	plt.show()
	268
	269
	270
	271
	272	####
	273	#trust_methods = [min,max,sum,difference,product]
	274	trust_methods = [sum,product,hypotenuse]
	275	colors = ['red','blue','green','purple','orange','black']
	276	weight_methods = [prunedWeights, linearWeights]
	277	alphas = [i/100.0 for i in range(0,100,2)]
	278
	279
	280
	281
	282	plt.clf()
	283	plt.title(r'Trust Method Comparison - Linear')
	284	plt.xlabel('Alpha')
	285	plt.ylabel('Mean error')
	286	paths = []
	287	for tm in trust_methods:
	288	trust = trustValues(derived, tm)
	289	series = []
	290	for alpha in alphas:
	291	weights = linearWeights(derived, trust, alpha)
	292	series.append(weightedMean(derived, weights) - expected_mean)
	293
	294	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	295
	296	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	297	plt.show()
	298
	299
	300
	301	plt.clf()
	302	plt.title(r'Trust Method Comparison - Pruned')
	303	plt.xlabel('Alpha')
	304	plt.ylabel('Mean error')
	305	paths = []
	306	for tm in trust_methods:
	307	trust = trustValues(derived, tm)
	308	series = []
	309	for alpha in alphas:
	310	weights = prunedWeights(derived, trust, alpha)
	311	series.append(weightedMean(derived, weights) - expected_mean)
	312
	313	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	314
	315	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	316	plt.show()
	317
	318
	319	sys.exit(0)
	320
	321	plt.clf()
	322	plt.title(r'Trust Method Comparison - Inverted')
	323	plt.xlabel('Alpha')
	324	plt.ylabel('Mean error')
	325	paths = []
	326	for tm in trust_methods:
	327	trust = trustValues(derived, tm)
	328	series = []
	329	for alpha in alphas:
	330	weights = invertedWeights(derived, trust, alpha)
	331	series.append(weightedMean(derived, weights) - expected_mean)
	332
	333	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	334
	335	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	336	plt.show()
	337
	338
	339	plt.clf()
	340	plt.title(r'Trust Method Comparison - Arctangent')
	341	plt.xlabel('Alpha')
	342	plt.ylabel('Mean error')
	343	paths = []
	344	for tm in trust_methods:
	345	trust = trustValues(derived, tm)
	346	series = []
	347	for alpha in alphas:
	348	weights = arctanWeights(derived, trust, alpha)
	349	series.append(weightedMean(derived, weights) - expected_mean)
	350
	351	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	352
	353	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	354	plt.show()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: