Context Navigation

source: trunk/bin/graph @ 12

Last change on this file since 12 was 12, checked in by tim, 10 years ago
.
Property svn:executable set to ``*
File size: 21.0 KB

Rev	Line
[6]	1	#!/usr/bin/env python3
	2
	3	import sys
	4	import os
	5	import time
	6	import random
	7	import tempfile
	8	import argparse
	9	import socket
	10	import json
	11
[10]	12	import numpy
[6]	13	import matplotlib.mlab as mlab
	14	import matplotlib.pyplot as plt
	15
	16
	17	VERSION = "{DEVELOPMENT}"
	18	if VERSION == "{DEVELOPMENT}":
	19	script_dir = '.'
	20	try:
	21	script_dir = os.path.dirname(os.path.realpath(__file__))
	22	except:
	23	try:
	24	script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
	25	except:
	26	pass
	27	sys.path.append("%s/../lib" % script_dir)
	28
	29	from nanownlib import *
	30	from nanownlib.stats import *
	31	import nanownlib.storage
	32
	33
	34	parser = argparse.ArgumentParser(
	35	description="")
	36	parser.add_argument('db_file', default=None,
	37	help='')
	38	options = parser.parse_args()
	39	db = nanownlib.storage.db(options.db_file)
	40
	41
[11]	42	def differences(db, unusual_case, rtt_type='packet'):
	43	ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train', unusual_case)]
	44	ret_val += [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('test', unusual_case)]
	45	return ret_val
[6]	46
[11]	47	def null_differences(db, unusual_case, rtt_type='packet'):
	48	ret_val = [s['unusual_'+rtt_type]-s['other_'+rtt_type] for s in db.subseries('train_null', unusual_case)]
	49	return ret_val
[6]	50
[11]	51
[6]	52	def timeSeries(db, probe_type, unusual_case):
	53	cursor = db.conn.cursor()
	54	query="""
	55	SELECT time_of_day,packet_rtt AS uc,(SELECT avg(packet_rtt) FROM probes,analysis
	56	WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS oc
	57	FROM (SELECT time_of_day,probes.sample,packet_rtt FROM probes,analysis
	58	WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u
	59	"""
	60
	61	params = {"probe_type":probe_type,"unusual_case":unusual_case}
	62	cursor.execute(query, params)
	63	for row in cursor:
	64	yield {'time_of_day':row['time_of_day'],unusual_case:row['uc'],'other_cases':row['oc']}
	65	#samples,derived,null_derived = parse_data(input1)
	66
	67	#trust = trustValues(derived, sum)
	68	#weights = linearWeights(derived, trust, 0.25)
	69	#print('(test): %f' % weightedMean(derived,weights))
	70
	71	diffs = list(differences(db, 'long'))
	72	reported_diffs = list(differences(db, 'long', 'reported'))
	73	#shorts = [s['packet_rtt'] for s in samples.values() if s['test_case']=='short']
	74	#longs = [s['packet_rtt'] for s in samples.values() if s['test_case']=='long']
	75
	76	short_overtime = [(sample['time_of_day'],sample['short']) for sample in timeSeries(db,'train','short')]
	77	long_overtime = [(sample['time_of_day'],sample['long']) for sample in timeSeries(db,'train','long')]
	78	diff_overtime = [(sample['time_of_day'],sample['long']-sample['other_cases']) for sample in timeSeries(db,'train','long')]
	79	short_overtime.sort()
	80	long_overtime.sort()
	81	diff_overtime.sort()
	82
	83	print('packet_rtt diff median: %f' % statistics.median(diffs))
[10]	84	print('packet_rtt diff midhinge: %f' % midsummary(diffs))
[6]	85	print('packet_rtt diff trimean: %f' % trimean(diffs))
[10]	86	print('packet_rtt diff quadsummary: %f' % quadsummary(diffs))
	87	print('packet_rtt diff ubersummary: %f' % ubersummary(diffs))
[6]	88	print('packet_rtt diff MAD: %f' % mad(diffs))
[11]	89	try:
	90	print('reported diff trimean: %f' % trimean(reported_diffs))
	91	print('reported diff quadsummary: %f' % quadsummary(reported_diffs))
	92	print('reported diff ubersummary: %f' % ubersummary(reported_diffs))
	93	print('reported diff MAD: %f' % mad(reported_diffs))
[6]	94
[11]	95	import cProfile
	96	start = time.time()
	97	kresults = kfilter({},diffs)
	98	#print('packet_rtt diff kfilter: ', numpy.mean(kresults['est']), kresults['var'])
	99	print('packet_rtt diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
	100	kresults = kfilter({},reported_diffs)
	101	#print('reported diff kfilter: ', numpy.mean(kresults['est']), kresults['var'][-1])
	102	print('reported diff kfilter: ', kresults['est'][-1], kresults['var'][-1])
	103	print("kfilter time: %f" % (time.time()-start))
	104	except:
	105	pass
[6]	106
[12]	107	#print('tsval diff mean: %f' % numpy.mean(differences(db, 'long', 'tsval')))
	108	#print('tsval null diff mean: %f' % numpy.mean(null_differences(db, 'long', 'tsval')))
	109	#print('tsval diff weighted mean: %f' % tsvalwmean(db.subseries('train','long')+db.subseries('test','long')))
	110	#print('tsval null diff weighted mean: %f' % tsvalwmean(db.subseries('train_null','long')))
[10]	111
[11]	112
[12]	113	def getTCPTSPrecision():
	114	cursor = db.conn.cursor()
	115	query="""SELECT tcpts_mean FROM meta;"""
	116	cursor.execute(query)
	117	row = cursor.fetchone()
	118	if row:
	119	return row[0]
	120	return None
	121
	122
	123	def tsFilteredHistogram():
	124	tcpts_precision = getTCPTSPrecision()
	125
	126	num_bins = 500
	127	all = db.subseries('train','long')+db.subseries('test','long')
	128	diffs = [s['unusual_packet']-s['other_packet'] for s in all]
	129	ts0_diffs = [s['unusual_packet']-s['other_packet'] for s in all if s['unusual_tsval']-s['other_tsval'] == 0]
	130	ts1_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(s['unusual_tsval']-s['other_tsval']) > 0]
	131	ts2_diffs = [s['unusual_packet']-s['other_packet'] for s in all if abs(round((s['unusual_tsval']-s['other_tsval'])/tcpts_precision)) <= 1.0]
	132
	133	ts_mode = statistics.mode([s['unusual_tsval'] for s in all]+[s['other_tsval'] for s in all])
	134	ts_diff_mode = statistics.mode([s['unusual_tsval']-s['other_tsval'] for s in all])
	135	ts_common_mode = [s['unusual_packet']-s['other_packet'] for s in all if s['unusual_tsval']<=ts_mode and s['other_tsval']<=ts_mode]
	136	ts_common_diff_mode = [s['unusual_packet']-s['other_packet'] for s in all if s['unusual_tsval']-s['other_tsval']==ts_diff_mode]
	137
	138	print('packet_rtt diff quadsummary: %f' % quadsummary(diffs))
	139	print('packet_rtt tsval diff=0 quadsummary: %f' % quadsummary(ts0_diffs))
	140	print('packet_rtt tsval diff>0 quadsummary: %f' % quadsummary(ts1_diffs))
	141	print('packet_rtt tsval diff<=1 quadsummary: %f' % quadsummary(ts2_diffs))
	142	print('packet_rtt tsval mode quadsummary: %f' % quadsummary(ts_common_mode))
	143	print(len(diffs), len(ts0_diffs)+len(ts1_diffs))
	144	diffs.sort()
	145	cut_off_low = diffs[int(len(diffs)*0.005)]
	146	cut_off_high = diffs[int(len(diffs)*0.995)]
	147
	148	plt.clf()
	149	# the histogram of the data
	150	n, bins, patches = plt.hist(diffs, num_bins, normed=0, color='black', histtype='step', alpha=0.8,
	151	range=(cut_off_low,cut_off_high), label='all')
	152	n, bins, patches = plt.hist(ts0_diffs, num_bins, normed=0, color='blue', histtype='step', alpha=0.8,
	153	range=(cut_off_low,cut_off_high), label='tsval diff=0')
	154	n, bins, patches = plt.hist(ts1_diffs, num_bins, normed=0, color='red', histtype='step', alpha=0.8,
	155	range=(cut_off_low,cut_off_high), label='tsval diff>0')
	156	n, bins, patches = plt.hist(ts2_diffs, num_bins, normed=0, color='orange', histtype='step', alpha=0.8,
	157	range=(cut_off_low,cut_off_high), label='tsval diff<=1')
	158	#n, bins, patches = plt.hist(ts_common_mode, num_bins, normed=0, color='green', histtype='step', alpha=0.8,
	159	# range=(cut_off_low,cut_off_high), label='tsval common mode')
	160	n, bins, patches = plt.hist(ts_common_diff_mode, num_bins, normed=0, color='green', histtype='step', alpha=0.8,
	161	range=(cut_off_low,cut_off_high), label='tsval common diff mode')
	162	plt.xlabel('RTT Difference')
	163	plt.ylabel('Probability')
	164	plt.title(r'Histogram - distribution of differences by tsval')
	165
	166	# Tweak spacing to prevent clipping of ylabel
	167	plt.subplots_adjust(left=0.15)
	168	plt.legend()
	169	plt.show()
	170	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	171
	172	tsFilteredHistogram()
	173	sys.exit(0)
	174
	175
	176	from pykalman import KalmanFilter
	177	#kf = KalmanFilter(transition_matrices = [[1, 1], [0, 1]], observation_matrices = [[0.1, 0.5], [-0.3, 0.0]])
	178	kf = KalmanFilter(transition_matrices = [[1, 0], [0, 1]], n_dim_obs=2, observation_matrices = [[1.0, 0], [0, 1.0]])
	179
	180	#delta = 1e-5
	181	#trans_cov = delta / (1 - delta) * np.eye(2)
	182
	183	#kf = KalmanFilter(n_dim_obs=2, n_dim_state=2,
	184	# initial_state_mean=np.zeros(2),
	185	# initial_state_covariance=np.ones((2, 2)),
	186	# transition_matrices=np.eye(2),
	187	# observation_matrices=obs_mat,
	188	# observation_covariance=1.0,
	189	# transition_covariance=trans_cov)
	190
	191
	192	#measurements = numpy.asarray([[1,0], [0,0], [0,1]]) # 3 observations
	193	measurements = numpy.asarray([(s['unusual_packet'],s['other_packet']) for s in (db.subseries('train','long')+db.subseries('test','long'))])
	194	kf = kf.em(measurements, n_iter=5)
	195	#(filtered_state_means, filtered_state_covariances) = kf.filter(measurements)
	196	#print("packet_rtt pykalman:", filtered_state_means[-1][0]-filtered_state_means[-1][1])
	197	#print("packet_rtt pykalman:", filtered_state_means[-1])
	198
	199	(smoothed_state_means, smoothed_state_covariances) = kf.smooth(measurements)
	200	#up = numpy.mean([m[0] for m in smoothed_state_means])
	201	#op = numpy.mean([m[1] for m in smoothed_state_means])
	202	print("packet_rtt pykalman:", smoothed_state_means[-1], smoothed_state_means[-1][0]-smoothed_state_means[-1][1])
	203	#print("packet_rtt pykalman:", up, op, up-op)
	204
	205
[6]	206	#all_data = longs+shorts
	207	#all_data.sort()
	208	#cut_off_low = all_data[0]
	209	#cut_off_high = all_data[int(len(all_data)*0.997)]
	210
	211
[11]	212	def plotSingleProbe(probe_id=None):
	213	if probe_id == None:
	214	cursor = db.conn.cursor()
	215	query="""SELECT probe_id FROM analysis WHERE suspect='' ORDER BY probe_id DESC limit 1 OFFSET 10"""
	216	cursor.execute(query)
	217	probe_id = cursor.fetchone()[0]
	218
	219	cursor = db.conn.cursor()
	220	query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=1"""
	221	cursor.execute(query, (probe_id,))
	222	pkts = cursor.fetchall()
	223	sent_payload = [row[0] for row in pkts if row[1] != 0]
	224	sent_other = [row[0] for row in pkts if row[1] == 0]
	225
	226	query="""SELECT observed,payload_len FROM packets WHERE probe_id=? AND sent=0"""
	227	cursor.execute(query, (probe_id,))
	228	pkts = cursor.fetchall()
	229	rcvd_payload = [row[0] for row in pkts if row[1] != 0]
	230	rcvd_other = [row[0] for row in pkts if row[1] == 0]
	231
	232	#query="""SELECT reported,time_of_day FROM probes WHERE id=?"""
	233	#cursor.execute(query, (probe_id,))
	234	#reported,tod = cursor.fetchone()
	235	#userspace_times = [sent_times[0]-reported/3.0, sent_times[0]+reported]
	236
	237	print("single probe counts:",len(sent_payload),len(sent_other),len(rcvd_payload),len(rcvd_other))
	238	plt.clf()
	239	plt.title("Single HTTP Request - Packet Times")
	240	sp = plt.eventplot(sent_payload, colors=('red',), lineoffsets=8, linewidths=2, alpha=0.6,label='sent')
	241	so = plt.eventplot(sent_other, colors=('red',), lineoffsets=6, linewidths=2, alpha=0.6,label='sent')
	242	rp = plt.eventplot(rcvd_payload, colors=('blue',), lineoffsets=4, linewidths=2, alpha=0.6,label='received')
	243	ro = plt.eventplot(rcvd_other, colors=('blue',), lineoffsets=2, linewidths=2, alpha=0.6,label='received')
	244	#plt.legend((s,r), ('sent','received'))
	245	#plt.savefig('../img/http-packet-times.svg')
	246	plt.show()
	247
	248	#plotSingleProbe()
	249
	250
	251	def graphTestResults():
	252	plt.clf()
	253	plt.title("Test Results")
	254	plt.xlabel('sample size')
	255	plt.ylabel('error rate')
	256	legend = []
	257	colors = ['red','blue','green','purple','orange','black','brown']
	258	color_id = 0
	259
	260	cursor = db.conn.cursor()
	261	query = """
	262	SELECT classifier FROM classifier_results GROUP BY classifier ORDER BY classifier;
	263	"""
	264	cursor.execute(query)
	265	classifiers = []
	266	for c in cursor:
	267	classifiers.append(c[0])
	268
	269	for classifier in classifiers:
	270	query="""
	271	SELECT params FROM classifier_results
	272	WHERE trial_type='test'
	273	AND classifier=:classifier
	274	AND (false_positives+false_negatives)/2.0 < 5.0
	275	ORDER BY num_observations,(false_positives+false_negatives)
	276	LIMIT 1
	277	"""
	278	cursor.execute(query, {'classifier':classifier})
	279	row = cursor.fetchone()
	280	if row == None:
	281	query="""
	282	SELECT params FROM classifier_results
	283	WHERE trial_type='test' and classifier=:classifier
	284	ORDER BY (false_positives+false_negatives),num_observations
	285	LIMIT 1
	286	"""
	287	cursor.execute(query, {'classifier':classifier})
	288	row = cursor.fetchone()
	289	if row == None:
	290	sys.stderr.write("WARN: couldn't find test results for classifier '%s'.\n" % classifier)
	291	continue
	292
	293	best_params = row[0]
	294	query="""
	295	SELECT num_observations,(false_positives+false_negatives)/2.0 FROM classifier_results
	296	WHERE trial_type='test'
	297	AND classifier=:classifier
	298	AND params=:params
	299	ORDER BY num_observations
	300	"""
	301	cursor.execute(query, {'classifier':classifier,'params':best_params})
	302
	303	num_obs = []
	304	performance = []
	305	for row in cursor:
	306	num_obs.append(row[0])
	307	performance.append(row[1])
	308	#print(num_obs,performance)
	309	path = plt.scatter(num_obs, performance, color=colors[color_id], s=4, alpha=0.8, linewidths=3.0)
	310	plt.plot(num_obs, performance, color=colors[color_id], alpha=0.8)
	311	legend.append((classifier,path))
	312	color_id = (color_id+1) % len(colors)
	313
	314	plt.legend([l[1] for l in legend], [l[0] for l in legend], scatterpoints=1, fontsize='xx-small')
	315	plt.show()
	316
	317	graphTestResults()
	318
	319	sys.exit(0)
	320
[6]	321	plt.clf()
	322	plt.title("Packet RTT over time")
	323	plt.xlabel('Time of Day')
	324	plt.ylabel('RTT')
	325	s = plt.scatter([t for t,rtt in short_overtime], [rtt for t,rtt in short_overtime], s=1, color='red', alpha=0.6)
	326	l = plt.scatter([t for t,rtt in long_overtime], [rtt for t,rtt in long_overtime], s=1, color='blue', alpha=0.6)
	327	d = plt.scatter([t for t,rtt in diff_overtime], [rtt for t,rtt in diff_overtime], s=1, color='purple', alpha=0.6)
	328	plt.legend((s,l,d), ('short','long','difference'), scatterpoints=1)
	329	#plt.savefig('paper/figures/comcast-powerboost1.png')
	330	plt.show()
	331
[11]	332
	333
	334	plt.clf()
	335	plt.title("Simple HTTP Request")
	336	plt.xlabel('Time of Day')
	337	plt.ylabel('')
	338	s = plt.scatter(sent_times, [2]*len(sent_times), s=3, color='red', alpha=0.9)
	339	r = plt.scatter(rcvd_times, [1]*len(rcvd_times), s=3, color='blue', alpha=0.9)
	340	plt.legend((s,r), ('sent','received'), scatterpoints=1)
	341	plt.show()
	342
	343	sys.exit(0)
[6]	344	short_overtime,long_overtime,diff_overtime = None,None,None
	345
	346
	347	num_bins = 300
	348	reported_diffs.sort()
	349	cut_off_low = reported_diffs[int(len(diffs)*0.003)]
	350	cut_off_high = reported_diffs[int(len(diffs)*0.997)]
	351
	352	plt.clf()
	353	# the histogram of the data
	354	n, bins, patches = plt.hist(reported_diffs, num_bins, normed=1, color='black', histtype='step', alpha=0.8,
	355	range=(cut_off_low,cut_off_high))
	356	plt.xlabel('RTT Difference')
	357	plt.ylabel('Probability')
	358	plt.title(r'Histogram - distribution of differences')
	359
	360	# Tweak spacing to prevent clipping of ylabel
	361	plt.subplots_adjust(left=0.15)
	362	#plt.legend()
	363	plt.show()
	364	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	365
	366
	367
	368
	369	num_bins = 300
	370	diffs.sort()
	371	cut_off_low = diffs[int(len(diffs)*0.003)]
	372	cut_off_high = diffs[int(len(diffs)*0.997)]
	373
	374	plt.clf()
	375	# the histogram of the data
	376	n, bins, patches = plt.hist(diffs, num_bins, normed=1, color='purple', histtype='step', alpha=0.8,
	377	range=(cut_off_low,cut_off_high))
	378	plt.xlabel('RTT Difference')
	379	plt.ylabel('Probability')
	380	plt.title(r'Histogram - distribution of differences')
	381
	382	# Tweak spacing to prevent clipping of ylabel
	383	plt.subplots_adjust(left=0.15)
	384	#plt.legend()
	385	plt.show()
	386	#plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg')
	387
	388	sys.exit(0)
	389
	390
	391
	392	num_bins = 150
	393	# the histogram of the data
	394	n, bins, patches = plt.hist((shorts,longs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8,
	395	range=(cut_off_low,cut_off_high))
	396	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
	397	# add a 'best fit' line
	398	#y = mlab.normpdf(bins, mu, sigma)
	399	#plt.plot(bins, y, 'r--')
	400	plt.xlabel('packet_rtt')
	401	plt.ylabel('Probability')
	402	plt.title(r'Histogram - RTT short and long')
	403
	404	# Tweak spacing to prevent clipping of ylabel
	405	plt.subplots_adjust(left=0.15)
	406	plt.legend()
	407	#plt.show()
	408	plt.savefig('paper/figures/comcast-powerboost2.svg')
	409
	410
	411
	412
	413	num_trials = 200
	414
	415
	416	subsample_sizes = (50,150,300,500,700,1000,2000,3000,5000,7000,10000,15000,20000)
	417	estimator = functools.partial(boxTest, 0.07, 0.08)
	418	performance = []
	419	for subsample_size in subsample_sizes:
	420	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
	421	performance.append(100.0*len([e for e in estimates if e == 1])/num_trials)
	422
	423	null_performance = []
	424	for subsample_size in subsample_sizes:
	425	null_estimates = bootstrap(null_derived, subsample_size, num_trials, estimator)
	426	null_performance.append(100.0*len([e for e in null_estimates if e == 0])/num_trials)
	427
	428	plt.clf()
	429	plt.title("boxTest bootstrap")
	430	plt.xlabel('sample size')
	431	plt.ylabel('performance')
	432	plt.scatter(subsample_sizes, performance, s=2, color='red', alpha=0.6)
	433	plt.scatter(subsample_sizes, null_performance, s=2, color='blue', alpha=0.6)
	434	plt.show()
	435
	436
	437
	438	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
	439	estimator = diffMedian
	440	performance = []
	441	for subsample_size in subsample_sizes:
	442	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
	443	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
	444
	445	plt.clf()
	446	plt.title("diff median bootstrap")
	447	plt.xlabel('sample size')
	448	plt.ylabel('performance')
	449	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
	450	plt.show()
	451
	452
	453
	454
	455	subsample_sizes = (50,150,300,400,500,700,1000,2000,3000,4000,5000,7000,10000)
	456	weight_funcs = (linearWeights, prunedWeights)
	457	for wf in weight_funcs:
	458	estimator = functools.partial(estimateMean, hypotenuse, wf, 0.40)
	459	performance = []
	460	for subsample_size in subsample_sizes:
	461	estimates = bootstrap(derived, subsample_size, num_trials, estimator)
	462	performance.append(100.0len([e for e in estimates if e > expected_mean0.9 and e < expected_mean*1.1])/num_trials)
	463
	464	plt.clf()
	465	plt.title(repr(wf))
	466	plt.xlabel('sample size')
	467	plt.ylabel('performance')
	468	plt.scatter(subsample_sizes, performance, s=1, color='red', alpha=0.6)
	469	plt.show()
	470
	471
	472
	473	num_bins = 300
	474	# the histogram of the data
	475	n, bins, patches = plt.hist((tsshorts,tslongs), num_bins, normed=1, label=['short', 'long'], color=['red','blue'], histtype='step', alpha=0.8)
	476	#n, bins, patches = plt.hist(shorts2+longs2, num_bins, normed=1, facecolor='blue', histtype='step', alpha=0.3)
	477	# add a 'best fit' line
	478	#y = mlab.normpdf(bins, mu, sigma)
	479	#plt.plot(bins, y, 'r--')
	480	plt.xlabel('packet_rtt')
	481	plt.ylabel('Probability')
	482	plt.title(r'Histogram - tsval_rtt short vs long')
	483
	484	# Tweak spacing to prevent clipping of ylabel
	485	plt.subplots_adjust(left=0.15)
	486	plt.legend()
	487	plt.show()
	488
	489
	490
	491
	492	####
	493	#trust_methods = [min,max,sum,difference,product]
	494	trust_methods = [sum,product,hypotenuse]
	495	colors = ['red','blue','green','purple','orange','black']
	496	weight_methods = [prunedWeights, linearWeights]
	497	alphas = [i/100.0 for i in range(0,100,2)]
	498
	499
	500
	501
	502	plt.clf()
	503	plt.title(r'Trust Method Comparison - Linear')
	504	plt.xlabel('Alpha')
	505	plt.ylabel('Mean error')
	506	paths = []
	507	for tm in trust_methods:
	508	trust = trustValues(derived, tm)
	509	series = []
	510	for alpha in alphas:
	511	weights = linearWeights(derived, trust, alpha)
	512	series.append(weightedMean(derived, weights) - expected_mean)
	513
	514	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	515
	516	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	517	plt.show()
	518
	519
	520
	521	plt.clf()
	522	plt.title(r'Trust Method Comparison - Pruned')
	523	plt.xlabel('Alpha')
	524	plt.ylabel('Mean error')
	525	paths = []
	526	for tm in trust_methods:
	527	trust = trustValues(derived, tm)
	528	series = []
	529	for alpha in alphas:
	530	weights = prunedWeights(derived, trust, alpha)
	531	series.append(weightedMean(derived, weights) - expected_mean)
	532
	533	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	534
	535	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	536	plt.show()
	537
	538
	539	sys.exit(0)
	540
	541	plt.clf()
	542	plt.title(r'Trust Method Comparison - Inverted')
	543	plt.xlabel('Alpha')
	544	plt.ylabel('Mean error')
	545	paths = []
	546	for tm in trust_methods:
	547	trust = trustValues(derived, tm)
	548	series = []
	549	for alpha in alphas:
	550	weights = invertedWeights(derived, trust, alpha)
	551	series.append(weightedMean(derived, weights) - expected_mean)
	552
	553	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	554
	555	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	556	plt.show()
	557
	558
	559	plt.clf()
	560	plt.title(r'Trust Method Comparison - Arctangent')
	561	plt.xlabel('Alpha')
	562	plt.ylabel('Mean error')
	563	paths = []
	564	for tm in trust_methods:
	565	trust = trustValues(derived, tm)
	566	series = []
	567	for alpha in alphas:
	568	weights = arctanWeights(derived, trust, alpha)
	569	series.append(weightedMean(derived, weights) - expected_mean)
	570
	571	paths.append(plt.scatter(alphas, series, s=1, color=colors[len(paths)],alpha=0.6))
	572
	573	plt.legend(paths, [repr(tm) for tm in trust_methods], scatterpoints=1)
	574	plt.show()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: