Ignore:
Timestamp:
07/13/15 19:16:30 (9 years ago)
Author:
tim
Message:

.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/nanownlib/stats.py

    r8 r10  
    22import sys
    33import os
     4import functools
    45import math
    56import statistics
     
    133134
    134135
    135 def midhinge(values, distance=25):
    136     return (numpy.percentile(values, 50-distance) + numpy.percentile(values, 50+distance))/2.0
     136def midsummary(values, distance=25):
     137    #return (numpy.percentile(values, 50-distance) + numpy.percentile(values, 50+distance))/2.0
     138    l,h = numpy.percentile(values, (50-distance,50+distance))
     139    return (l+h)/2.0
    137140
    138141def trimean(values, distance=25):
    139     return (midhinge(values, distance) + statistics.median(values))/2
    140 
     142    return (midsummary(values, distance) + statistics.median(values))/2
     143
     144def ubersummary(values, distance=25):
     145    left2 = 50-distance
     146    left1 = left2/2.0
     147    left3 = (left2+50)/2.0
     148    right2 = 50+distance
     149    right3 = (right2+50)/2.0
     150    right1 = (right2+100)/2.0
     151    l1,l2,l3,r3,r2,r1 = numpy.percentile(values, (left1,left2,left3,right3,right2,right1))
     152    #print(left1,left2,left3,50,right3,right2,right1)
     153    #print(l1,l2,l3,m,r3,r2,r1)
     154    return (l1+l2*4+l3+r3+r2*4+r1)/12.0
     155    #return statistics.mean((l1,l2,l3,m,r3,r2,r1))
     156
     157def quadsummary(values, distance=25):
     158    left2 = 50-distance
     159    left1 = left2/2.0
     160    right2 = 50+distance
     161    right1 = (right2+100)/2.0
     162    l1,l2,r2,r1 = numpy.percentile(values, (left1,left2,right2,right1))
     163    #print(left1,left2,left3,50,right3,right2,right1)
     164    #print(l1,l2,l3,m,r3,r2,r1)
     165    return (l1+l2+r2+r1)/4.0
     166    #return statistics.mean((l1,l2,l3,m,r3,r2,r1))
     167
     168def quadsummary(values, distance=25):
     169    left1 = 50-distance
     170    left2 = (left1+50)/2.0
     171    right1 = 50+distance
     172    right2 = (right1+50)/2.0
     173    l1,l2,r2,r1 = numpy.percentile(values, (left1,left2,right2,right1))
     174    #print(left1,left2,left3,50,right3,right2,right1)
     175    #print(l1,l2,l3,m,r3,r2,r1)
     176    return (l1+l2+r2+r1)/4.0
     177    #return statistics.mean((l1,l2,l3,m,r3,r2,r1))
     178
     179   
    141180def weightedMean(derived, weights):
    142181    normalizer = sum(weights.values())/len(weights)
     
    170209
    171210
    172 def estimateMidhinge(derived):
    173     return midhinge([(d['long']-d['short']) for d in derived.values()])
     211def estimateMidsummary(derived):
     212    return midsummary([(d['long']-d['short']) for d in derived.values()])
    174213
    175214
     
    348387    rest = [s['other_cases'] for s in samples]
    349388   
    350     uc_high = numpy.percentile(uc, params['high'])
    351     rest_low = numpy.percentile(rest, params['low'])
     389    uc_high,uc_low = numpy.percentile(uc, (params['high'],params['low']))
     390    rest_high,rest_low = numpy.percentile(rest, (params['high'],params['low']))
    352391    if uc_high < rest_low:
    353392        if greater:
     
    356395            return 1
    357396
    358     uc_low = numpy.percentile(uc, params['low'])
    359     rest_high = numpy.percentile(rest, params['high'])
    360397    if rest_high < uc_low:
    361398        if greater:
     
    369406# Returns 1 if unusual_case is unusual in the expected direction
    370407#         0 otherwise
    371 def midhingeTest(params, greater, samples):
     408def summaryTest(f, params, greater, samples):
    372409    diffs = [s['unusual_case']-s['other_cases'] for s in samples]
    373410
    374     mh = midhinge(diffs, params['distance'])
    375     #mh = trimean(diffs, params['distance'])
     411    mh = f(diffs, params['distance'])
    376412    if greater:
    377413        if mh > params['threshold']:
     
    385421            return 0
    386422
     423midsummaryTest = functools.partial(summaryTest, midsummary)
     424trimeanTest = functools.partial(summaryTest, trimean)
     425ubersummaryTest = functools.partial(summaryTest, ubersummary)
     426quadsummaryTest = functools.partial(summaryTest, quadsummary)
    387427
    388428def rmse(expected, measurements):
     
    392432def nrmse(expected, measurements):
    393433    return rmse(expected, measurements)/(max(measurements)-min(measurements))
     434
     435
     436class KalmanFilter1D:
     437    def __init__(self, x0, P, R, Q):
     438        self.x = x0
     439        self.P = P
     440        self.R = R
     441        self.Q = Q
     442
     443    def update(self, z):
     444        self.x = (self.P * z + self.x * self.R) / (self.P + self.R)
     445        self.P = 1. / (1./self.P + 1./self.R)
     446
     447    def predict(self, u=0.0):
     448        self.x += u
     449        self.P += self.Q
     450
     451
     452def kfilter(params, observations):
     453    x = numpy.array(observations)
     454    movement = 0
     455    est = []   
     456    var = []
     457    kf = KalmanFilter1D(x0 = quadsummary(x), # initial state
     458                        #P  = 10000,          # initial variance
     459                        P  = 10,          # initial variance
     460                        R  = numpy.std(x),   # msensor noise
     461                        Q  = 0)              # movement noise
     462    for round in range(1):
     463        for d in x:
     464            kf.predict(movement)
     465            kf.update(d)
     466            est.append(kf.x)
     467            var.append(kf.P)
     468
     469    return({'est':est, 'var':var})
     470
     471
     472def kalmanTest(params, greater, samples):
     473    diffs = [s['unusual_case']-s['other_cases'] for s in samples]
     474
     475    m = kfilter(params, diffs)['est'][-1]
     476    if greater:
     477        if m > params['threshold']:
     478            return 1
     479        else:
     480            return 0
     481    else:
     482        if m < params['threshold']:
     483            return 1
     484        else:
     485            return 0
     486
     487
     488def kalmanTest2(params, greater, samples):
     489    diffs = [s['unusual_case']-s['other_cases'] for s in samples]
     490
     491    estimates = []
     492    size = 500
     493    for i in range(100):
     494        off = random.randrange(0,len(diffs))
     495        sub = diffs[off:size]
     496        if len(sub) < size:
     497            sub += diffs[0:size-len(sub)]
     498        estimates.append(kfilter(params, sub)['est'][-1])
     499           
     500    m = quadsummary(estimates)
     501    if greater:
     502        if m > params['threshold']:
     503            return 1
     504        else:
     505            return 0
     506    else:
     507        if m < params['threshold']:
     508            return 1
     509        else:
     510            return 0
     511
Note: See TracChangeset for help on using the changeset viewer.