Context Navigation

source: trunk/lib/nanownlib/stats.py @ 11

Last change on this file since 11 was 11, checked in by tim, 9 years ago
.
File size: 8.6 KB

Line
1
2	import sys
3	import os
4	import functools
5	import math
6	import statistics
7	import gzip
8	import random
9	import scipy
10	import scipy.stats
11	import numpy
12
13	# Don't trust numpy's seeding
14	numpy.random.seed(random.SystemRandom().randint(0,2**32-1))
15
16
17	def mad(arr):
18	""" Median Absolute Deviation: a "Robust" version of standard deviation.
19	Indices variabililty of the sample.
20	https://en.wikipedia.org/wiki/Median_absolute_deviation
21	"""
22	arr = numpy.ma.array(arr).compressed() # should be faster to not use masked arrays.
23	med = numpy.median(arr)
24	return numpy.median(numpy.abs(arr - med))
25
26
27	def cov(x,y):
28	mx = statistics.mean(x)
29	my = statistics.mean(y)
30	products = []
31	for i in range(0,len(x)):
32	products.append((x[i] - mx)*(y[i] - my))
33
34	return statistics.mean(products)
35
36
37	def difference(ls):
38	return ls[0]-ls[1]
39
40	def product(ls):
41	return ls[0]*ls[1]
42
43	def hypotenuse(ls):
44	return math.hypot(ls[0],ls[1])
45
46	def trustValues(derived, trustFunc):
47	ret_val = []
48	for k,v in derived.items():
49	ret_val.append((trustFunc((v['long'],v['short'])), k))
50
51	ret_val.sort()
52	return ret_val
53
54
55	def prunedWeights(derived, trust, alpha):
56	weights = {}
57
58	threshold = len(trust)*(1.0-alpha)
59	for i in range(0,len(trust)):
60	if i < threshold:
61	weights[trust[i][1]] = 1.0
62	else:
63	weights[trust[i][1]] = 0.0
64
65	return weights
66
67
68	def linearWeights(derived, trust, alpha):
69	x1 = trust[0][0]
70	y1 = 1.0 + (alpha*10)
71	x2 = trust[(len(trust)-1)//3][0]
72	y2 = 1.0
73	m = (y1-y2)/(x1-x2)
74	b = y1 - m*x1
75
76	weights = {}
77	for t,k in trust:
78	weights[k] = m*t+b
79	if weights[k] < 0.0:
80	weights[k] = 0.0
81
82	return weights
83
84
85	def invertedWeights(derived,trust,alpha):
86	# (x+1-first_sample)^(-alpha)
87	#scale = trust[0][0]
88
89	#weights = {}
90	#for t,k in trust:
91	# weights[k] = (t+1-scale)*(-1.0alpha)
92	# if weights[k] < 0.0:
93	# weights[k] = 0.0
94
95	weights = {}
96	for i in range(len(trust)):
97	w = 10.0/(i+2.0)-0.2
98	if w < 0.0:
99	w = 0.0
100	weights[trust[i][1]] = w
101
102
103	return weights
104
105
106
107	def arctanWeights(derived,trust,alpha):
108	shift = trust[int((len(trust)-1)*(1.0-alpha))][0]
109	minimum = trust[0][0]
110
111	weights = {}
112	for i in range(len(trust)):
113	w = math.pi/2.0 - math.atan(2*(trust[i][0] - shift)/(shift-minimum))
114	if w < 0.0:
115	w = 0.0
116	weights[trust[i][1]] = w
117
118	return weights
119
120
121	def arctanWeights2(derived,trust,alpha):
122	shift = trust[int((len(trust)-1)*(1.0-alpha))][0]
123	minimum = trust[0][0]
124	stretch = trust[int((len(trust)-1)*0.5)][0] - minimum # near median
125
126	weights = {}
127	for i in range(len(trust)):
128	w = math.pi/2.0 - math.atan(3*(trust[i][0] - shift)/(shift-minimum))
129	if w < 0.0:
130	w = 0.0
131	weights[trust[i][1]] = w
132
133	return weights
134
135
136	def midsummary(values, distance=25):
137	#return (numpy.percentile(values, 50-distance) + numpy.percentile(values, 50+distance))/2.0
138	l,h = numpy.percentile(values, (50-distance,50+distance))
139	return (l+h)/2.0
140
141	def trimean(values, distance=25):
142	return (midsummary(values, distance) + statistics.median(values))/2
143
144	def ubersummary(values, distance=25):
145	left2 = 50-distance
146	left3 = 50-(distance/2.0)
147	left1 = left2/2.0
148	right2 = 50+distance
149	right3 = 50+(distance/2.0)
150	right1 = (right2+100)/2.0
151	l1,l2,l3,r3,r2,r1 = numpy.percentile(values, (left1,left2,left3,right3,right2,right1))
152	#print(l1,l2,l3,m,r3,r2,r1)
153	return (l1+l24+l3+r3+r24+r1)/12.0
154	#return statistics.mean((l1,l2,l3,m,r3,r2,r1))
155
156
157	def quadsummary(values, distance=25):
158	left1 = 50-distance
159	left2 = (left1+50)/2.0
160	right1 = 50+distance
161	right2 = (right1+50)/2.0
162	l1,l2,r2,r1 = numpy.percentile(values, (left1,left2,right2,right1))
163	#print(left1,left2,left3,50,right3,right2,right1)
164	#print(l1,l2,l3,m,r3,r2,r1)
165	return (l1+l2+r2+r1)/4.0
166	#return statistics.mean((l1,l2,l3,m,r3,r2,r1))
167
168
169	def tsvalwmean(subseries):
170	weights = [(s['unusual_packet']+s['other_packet'])**2 for s in subseries]
171	normalizer = sum(weights)/len(weights)
172	return numpy.mean([weights[i]*(subseries[i]['unusual_tsval']-subseries[i]['other_tsval'])/normalizer
173	for i in range(len(weights))])
174
175	#def tsvalwmean(subseries):
176	# return numpy.mean([(s['unusual_tsval']-s['other_tsval']) for s in subseries])
177
178
179	def weightedMean(derived, weights):
180	normalizer = sum(weights.values())/len(weights)
181	return statistics.mean([w*(derived[k]['long']-derived[k]['short'])/normalizer for k,w in weights.items()])
182
183	def weightedMeanTsval(derived, weights):
184	normalizer = sum(weights.values())/len(weights)
185	return statistics.mean([w*(derived[k]['long_tsval']-derived[k]['short_tsval'])/normalizer for k,w in weights.items()])
186
187
188
189
190	def estimateMean(trustFunc, weightFunc, alpha, derived):
191	trust = trustValues(derived, trustFunc)
192	weights = weightFunc(derived, trust, alpha)
193	return weightedMean(derived, weights)
194
195
196	def estimateMeanTsval(trustFunc, weightFunc, alpha, derived):
197	trust = trustValues(derived, trustFunc)
198	weights = weightFunc(derived, trust, alpha)
199	return weightedMeanTsval(derived, weights)
200
201
202	def bootstrap3(estimator, db, probe_type, unusual_case, subseries_size, num_trials):
203	ret_val = []
204	for t in range(num_trials):
205	ret_val.append(estimator(db.subseries(probe_type, unusual_case, subseries_size)))
206
207	return ret_val
208
209
210	# Returns 1 if unusual_case is unusual in the expected direction
211	# 0 if it isn't unusual
212	# -1 if it is unusual in the wrong direction
213	def multiBoxTest(params, greater, samples):
214	uc = [s['unusual_packet'] for s in samples]
215	rest = [s['other_packet'] for s in samples]
216
217	uc_high,uc_low = numpy.percentile(uc, (params['high'],params['low']))
218	rest_high,rest_low = numpy.percentile(rest, (params['high'],params['low']))
219	if uc_high < rest_low:
220	if greater:
221	return -1
222	else:
223	return 1
224
225	if rest_high < uc_low:
226	if greater:
227	return 1
228	else:
229	return -1
230
231	return 0
232
233
234	# Returns 1 if unusual_case is unusual in the expected direction
235	# 0 otherwise
236	def summaryTest(f, params, greater, samples):
237	diffs = [s['unusual_packet']-s['other_packet'] for s in samples]
238
239	mh = f(diffs, params['distance'])
240	if greater:
241	if mh > params['threshold']:
242	return 1
243	else:
244	return 0
245	else:
246	if mh < params['threshold']:
247	return 1
248	else:
249	return 0
250
251
252	midsummaryTest = functools.partial(summaryTest, midsummary)
253	trimeanTest = functools.partial(summaryTest, trimean)
254	ubersummaryTest = functools.partial(summaryTest, ubersummary)
255	quadsummaryTest = functools.partial(summaryTest, quadsummary)
256
257	def rmse(expected, measurements):
258	s = sum([(expected-m)**2 for m in measurements])/len(measurements)
259	return math.sqrt(s)
260
261	def nrmse(expected, measurements):
262	return rmse(expected, measurements)/(max(measurements)-min(measurements))
263
264
265	class KalmanFilter1D:
266	def __init__(self, x0, P, R, Q):
267	self.x = x0
268	self.P = P
269	self.R = R
270	self.Q = Q
271
272	def update(self, z):
273	self.x = (self.P * z + self.x * self.R) / (self.P + self.R)
274	self.P = 1. / (1./self.P + 1./self.R)
275
276	def predict(self, u=0.0):
277	self.x += u
278	self.P += self.Q
279
280
281	def kfilter(params, observations):
282	x = numpy.array(observations)
283	movement = 0
284	est = []
285	var = []
286	kf = KalmanFilter1D(x0 = quadsummary(x), # initial state
287	#P = 10000, # initial variance
288	P = 10, # initial variance
289	R = numpy.std(x), # msensor noise
290	Q = 0) # movement noise
291	for round in range(1):
292	for d in x:
293	kf.predict(movement)
294	kf.update(d)
295	est.append(kf.x)
296	var.append(kf.P)
297
298	return({'est':est, 'var':var})
299
300
301	def kalmanTest(params, greater, samples):
302	diffs = [s['unusual_packet']-s['other_packet'] for s in samples]
303
304	m = kfilter(params, diffs)['est'][-1]
305	if greater:
306	if m > params['threshold']:
307	return 1
308	else:
309	return 0
310	else:
311	if m < params['threshold']:
312	return 1
313	else:
314	return 0
315
316
317	def tsvalwmeanTest(params, greater, samples):
318	m = tsvalwmean(samples)
319	if greater:
320	if m > params['threshold']:
321	return 1
322	else:
323	return 0
324	else:
325	if m < params['threshold']:
326	return 1
327	else:
328	return 0

Note: See TracBrowser for help on using the repository browser.

Download in other formats: