- Timestamp:
- 07/19/15 15:05:42 (9 years ago)
- Location:
- trunk
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/bin/graph
r12 r13 81 81 diff_overtime.sort() 82 82 83 print('packet_rtt diff mean: %f' % statistics.mean(diffs)) 83 84 print('packet_rtt diff median: %f' % statistics.median(diffs)) 84 85 print('packet_rtt diff midhinge: %f' % midsummary(diffs)) … … 86 87 print('packet_rtt diff quadsummary: %f' % quadsummary(diffs)) 87 88 print('packet_rtt diff ubersummary: %f' % ubersummary(diffs)) 89 print('packet_rtt diff septasummary: %f' % septasummary(diffs)) 88 90 print('packet_rtt diff MAD: %f' % mad(diffs)) 89 91 try: … … 91 93 print('reported diff quadsummary: %f' % quadsummary(reported_diffs)) 92 94 print('reported diff ubersummary: %f' % ubersummary(reported_diffs)) 95 print('reported diff septasummary: %f' % septasummary(reported_diffs)) 93 96 print('reported diff MAD: %f' % mad(reported_diffs)) 94 97 95 import cProfile96 start = time.time()97 kresults = kfilter({},diffs)98 #import cProfile 99 #start = time.time() 100 #kresults = kfilter({},diffs) 98 101 #print('packet_rtt diff kfilter: ', numpy.mean(kresults['est']), kresults['var']) 99 print('packet_rtt diff kfilter: ', kresults['est'][-1], kresults['var'][-1])100 kresults = kfilter({},reported_diffs)102 #print('packet_rtt diff kfilter: ', kresults['est'][-1], kresults['var'][-1]) 103 #kresults = kfilter({},reported_diffs) 101 104 #print('reported diff kfilter: ', numpy.mean(kresults['est']), kresults['var'][-1]) 102 print('reported diff kfilter: ', kresults['est'][-1], kresults['var'][-1])103 print("kfilter time: %f" % (time.time()-start))105 #print('reported diff kfilter: ', kresults['est'][-1], kresults['var'][-1]) 106 #print("kfilter time: %f" % (time.time()-start)) 104 107 except: 105 108 pass … … 111 114 112 115 116 117 118 def testKalman4D(params=None): 119 from pykalman import KalmanFilter 120 train = db.subseries('train','long', offset=0) 121 test = db.subseries('test','long', offset=0) 122 null = db.subseries('train_null','long', offset=0) 123 measurements = numpy.asarray([(s['unusual_packet'],s['other_packet'],s['unusual_tsval'],s['other_tsval']) for s in (train+test)]) 124 null_measurements = numpy.asarray([(s['unusual_packet'],s['other_packet'],s['unusual_tsval'],s['other_tsval']) for s in null]) 125 126 if params == None: 127 kf = KalmanFilter(n_dim_obs=4, n_dim_state=4, 128 initial_state_mean=[quadsummary([s['unusual_packet'] for s in train]), 129 quadsummary([s['other_packet'] for s in train]), 130 numpy.mean([s['unusual_tsval'] for s in train]), 131 numpy.mean([s['other_tsval'] for s in train])]) 132 kf = KalmanFilter(n_dim_obs=4, n_dim_state=4) 133 134 start=time.time() 135 kf = kf.em(measurements[0:len(train)]+null_measurements[0:50000], n_iter=10, 136 em_vars=('transition_matrices', 137 'observation_matrices', 138 'transition_offsets', 139 'observation_offsets', 140 'transition_covariance', 141 'observation_covariance', 142 'initial_state_mean', 143 'initial_state_covariance')) 144 params = {'transition_matrices': kf.transition_matrices.tolist(), 145 'observation_matrices': kf.observation_matrices.tolist(), 146 'transition_offsets': kf.transition_offsets.tolist(), 147 'observation_offsets': kf.observation_offsets.tolist(), 148 'transition_covariance': kf.transition_covariance.tolist(), 149 'observation_covariance': kf.observation_covariance.tolist(), 150 'initial_state_mean': kf.initial_state_mean.tolist(), 151 'initial_state_covariance': kf.initial_state_covariance.tolist()} 152 print("Learned Params:\n") 153 import pprint 154 pprint.pprint(params) 155 print("pykalman em time: %f" % (time.time()-start)) 156 157 #kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, **params) 158 159 num_obs=5000 160 for offset in range(50000,100000+num_obs,num_obs): 161 start=time.time() 162 m = measurements[offset:offset+num_obs] 163 #params['initial_state_mean']=[quadsummary([s[0] for s in m]), 164 # quadsummary([s[1] for s in m]), 165 # numpy.mean([s[2] for s in m]), 166 # numpy.mean([s[3] for s in m])] 167 kf = KalmanFilter(n_dim_obs=4, n_dim_state=4, **params) 168 (smoothed_state_means, smoothed_state_covariances) = kf.smooth(m) 169 #print("pykalman smooth time: %f" % (time.time()-start)) 170 up = numpy.mean([m[0] for m in smoothed_state_means]) 171 op = numpy.mean([m[1] for m in smoothed_state_means]) 172 #print("packet_rtt pykalman final:", smoothed_state_means[-1][0]-smoothed_state_means[-1][1]) 173 print("packet_rtt pykalman mean:", up-op) 174 print("packet_rtt mean:", numpy.mean([s[0]-s[1] for s in m])) 175 #up = numpy.mean([m[2] for m in smoothed_state_means]) 176 #op = numpy.mean([m[3] for m in smoothed_state_means]) 177 #print("tsval_rtt pykalman final:", smoothed_state_means[-1][2]-smoothed_state_means[-1][3]) 178 #print("tsval_rtt pykalman mean:", up-op) 179 #print("tsval_rtt mean:", numpy.mean([s[2]-s[3] for s in m])) 180 181 for offset in range(0,len(null_measurements)+num_obs,num_obs): 182 start=time.time() 183 m = null_measurements[offset:offset+num_obs] 184 #params['initial_state_mean']=[quadsummary([s[0] for s in m]), 185 # quadsummary([s[1] for s in m]), 186 # numpy.mean([s[2] for s in m]), 187 # numpy.mean([s[3] for s in m])] 188 kf = KalmanFilter(n_dim_obs=4, n_dim_state=4, **params) 189 (smoothed_state_means, smoothed_state_covariances) = kf.smooth(m) 190 up = numpy.mean([m[0] for m in smoothed_state_means]) 191 op = numpy.mean([m[1] for m in smoothed_state_means]) 192 #print("null packet_rtt pykalman final:", smoothed_state_means[-1][0]-smoothed_state_means[-1][1]) 193 print("null packet_rtt pykalman mean:", up-op) 194 print("null packet_rtt mean:", numpy.mean([s[0]-s[1] for s in m])) 195 #up = numpy.mean([m[2] for m in smoothed_state_means]) 196 #op = numpy.mean([m[3] for m in smoothed_state_means]) 197 #print("null tsval_rtt pykalman final:", smoothed_state_means[-1][2]-smoothed_state_means[-1][3]) 198 #print("null tsval_rtt pykalman mean:", up-op) 199 #print("null tsval_rtt mean:", numpy.mean([s[2]-s[3] for s in m])) 200 201 202 203 echo_vm_5k={'initial_state_covariance': [[33599047.5, 204 -18251285.25, 205 3242535690.59375, 206 -8560730487.84375], 207 [-18251285.25, 208 9914252.3125, 209 -1761372688.59375, 210 4650260880.1875], 211 [3242535690.59375, 212 -1761372688.59375, 213 312926663745.03125, 214 -826168494791.7188], 215 [-8560730487.84375, 216 4650260880.1875, 217 -826168494791.7188, 218 2181195982530.4688]], 219 'initial_state_mean': [12939012.5625, 220 12934563.71875, 221 13134751.608, 222 13138990.9985], 223 'observation_covariance': [[11960180434.411114, 224 4760272534.795976, 225 8797551081.431936, 226 6908794128.927051], 227 [4760272534.795962, 228 12383598172.428213, 229 5470747537.2599745, 230 11252625555.297853], 231 [8797551081.431955, 232 5470747537.2601185, 233 1466222848395.7058, 234 72565713883.12643], 235 [6908794128.927095, 236 11252625555.297981, 237 72565713883.12654, 238 1519760903943.507]], 239 'observation_matrices': [[1.4255288693095167, 240 -0.4254638445329988, 241 0.0003406844036817347, 242 -0.0005475021956726778], 243 [-0.46467270827589857, 244 1.4654311778340343, 245 -0.0003321330280128265, 246 -0.0002853945703691352], 247 [-0.2644570970067974, 248 -0.33955835481495455, 249 1.7494161615202275, 250 -0.15394117603733548], 251 [-0.3419097544041847, 252 -0.23992883666045373, 253 -0.15587790880447727, 254 1.7292393175137022]], 255 'observation_offsets': [165.2279084503762, 256 157.76807691937614, 257 168.4235495099334, 258 225.33433430227353], 259 'transition_covariance': [[2515479496.145993, 260 -401423541.70620924, 261 1409951418.1627903, 262 255932902.74454522], 263 [-401423541.706214, 264 2744353887.676857, 265 1162316.2019491254, 266 1857251491.3987627], 267 [1409951418.1628358, 268 1162316.2020361447, 269 543279068599.8229, 270 -39399311190.5746], 271 [255932902.74459982, 272 1857251491.398838, 273 -39399311190.574585, 274 537826124257.5266]], 275 'transition_matrices': [[0.52163952865412, 276 0.47872618354122665, 277 -0.0004322286766109684, 278 0.00017293351811531466], 279 [0.5167436693545113, 280 0.48319044922845933, 281 7.765428142114672e-05, 282 -0.00021518950285326355], 283 [0.2091705950622469, 284 0.41051399729482796, 285 0.19341113299389256, 286 0.19562916616052917], 287 [0.368592004009912, 288 0.22263632461118732, 289 0.20756792378812872, 290 0.20977025833570906]], 291 'transition_offsets': [592.5708159274, 292 583.3804671015271, 293 414.4187239098291, 294 562.166786712371]} 295 296 echo_vm_5k={'initial_state_covariance': [[0.375, 0.0, 0.0, 0.0], 297 [0.0, 0.375, 0.0, 0.0], 298 [0.0, 0.0, 0.375, 0.0], 299 [0.0, 0.0, 0.0, 0.375]], 300 'initial_state_mean': [15997944.198361743, 301 16029825.435899183, 302 17093077.26228404, 303 17524263.088803563], 304 'observation_covariance': [[36572556646.179054, 305 21816054953.37006, 306 31144379008.310543, 307 19651005729.823025], 308 [21816054953.372543, 309 440428106325.20325, 310 41103447776.740585, 311 427146570672.51227], 312 [31144379008.31037, 313 41103447776.74027, 314 3280009435458.6953, 315 458734528073.65686], 316 [19651005729.82234, 317 427146570672.5109, 318 458734528073.6557, 319 3769493190697.773]], 320 'observation_matrices': [[1.0248853427592337, 321 -0.031198859962501047, 322 0.001613706836380402, 323 0.004720209443291878], 324 [-0.8604422900368718, 325 1.8583369609057172, 326 -0.0022646214457040514, 327 0.004437933935378169], 328 [-0.5814771409524866, 329 0.22228184387142846, 330 1.6259599749174072, 331 -0.271594798325566], 332 [-0.5862601003257453, 333 0.2598285939005791, 334 -0.28286590143513024, 335 1.604087079832425]], 336 'observation_offsets': [1979.4518332096984, 337 1889.3380163762793, 338 2132.9112026744906, 339 1750.7759421584785], 340 'transition_covariance': [[6176492087.271547, 341 762254719.4171592, 342 4584288694.652873, 343 3044796192.4357214], 344 [762254719.4185101, 345 173302376079.4761, 346 5261303152.757347, 347 167562483383.9925], 348 [4584288694.651718, 349 5261303152.755746, 350 1056156956874.4131, 351 -115859156952.07962], 352 [3044796192.434162, 353 167562483383.9901, 354 -115859156952.08018, 355 1225788436266.3086]], 356 'transition_matrices': [[0.9673912485796876, 357 0.03252962227543321, 358 0.0006756067792537124, 359 -0.0006566638567164773], 360 [0.9548761966068113, 361 0.03841774395880293, 362 0.00426067282319309, 363 0.002303362691861821], 364 [0.6215040230859188, 365 -0.2584476837756142, 366 0.3176491193420503, 367 0.3241682768126566], 368 [0.6634028281470279, 369 -0.33548335246018723, 370 0.3298144902195048, 371 0.3475836278392421]], 372 'transition_offsets': [1751.3049487348183, 373 1764.989515773476, 374 1986.8405778425586, 375 2232.830254345267]} 376 #testKalman4D(echo_vm_5k) 377 378 379 380 def testKalman(params=None): 381 from pykalman import AdditiveUnscentedKalmanFilter,KalmanFilter 382 train = db.subseries('train','long', offset=0) 383 test = db.subseries('test','long', offset=0) 384 measurements = numpy.asarray([(s['unusual_packet'],s['other_packet']) for s in (train+test)]) 385 386 #kf = KalmanFilter(transition_matrices = [[1, 1], [0, 1]], observation_matrices = [[0.1, 0.5], [-0.3, 0.0]]) 387 kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, 388 initial_state_mean=[quadsummary([s['unusual_packet'] for s in train]), 389 quadsummary([s['other_packet'] for s in train])]) 390 #kf = AdditiveUnscentedKalmanFilter(n_dim_obs=2, n_dim_state=2) 391 392 if params == None: 393 start=time.time() 394 kf = kf.em(measurements[0:len(train)], n_iter=10, 395 em_vars=('transition_matrices', 396 'observation_matrices', 397 'transition_offsets', 398 'observation_offsets', 399 'transition_covariance', 400 'observation_covariance', 401 'initial_state_covariance')) 402 params = {'transition_matrices': kf.transition_matrices.tolist(), 403 'observation_matrices': kf.observation_matrices.tolist(), 404 'transition_offsets': kf.transition_offsets.tolist(), 405 'observation_offsets': kf.observation_offsets.tolist(), 406 'transition_covariance': kf.transition_covariance.tolist(), 407 'observation_covariance': kf.observation_covariance.tolist(), 408 'initial_state_mean': kf.initial_state_mean.tolist(), 409 'initial_state_covariance': kf.initial_state_covariance.tolist()} 410 print("Learned Params:\n") 411 import pprint 412 pprint.pprint(params) 413 print("pykalman em time: %f" % (time.time()-start)) 414 415 #kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, **params) 416 417 num_obs=10000 418 for offset in range(50000,100000+num_obs,num_obs): 419 start=time.time() 420 kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, **params) 421 m = measurements[offset:offset+num_obs] 422 (smoothed_state_means, smoothed_state_covariances) = kf.smooth(m) 423 print("pykalman smooth time: %f" % (time.time()-start)) 424 up = numpy.mean([m[0] for m in smoothed_state_means]) 425 op = numpy.mean([m[1] for m in smoothed_state_means]) 426 print("packet_rtt pykalman final:", smoothed_state_means[-1][0]-smoothed_state_means[-1][1]) 427 print("packet_rtt pykalman mean:", up-op) 428 print("packet_rtt mean:", numpy.mean([s[0]-s[1] for s in m])) 429 430 431 five_iter = {'observation_offsets': [-54.53185823, -55.25219184], 432 'observation_covariance': [[ 1.15059170e+10, 4.36743765e+09], 433 [ 4.36743765e+09, 1.19410313e+10]], 434 'initial_state_mean': [ 12939012.5625 , 12934563.71875], 435 'transition_covariance': [[ 2.98594543e+09, 6.86355073e+07], 436 [ 6.86355073e+07, 3.21368699e+09]], 437 'initial_state_covariance': [[ 2.36836696e+09, 1.63195635e+09], 438 [ 1.63195635e+09, 1.12452233e+09]], 439 'transition_offsets': [ 343.69740217, 338.5042467 ], 440 'observation_matrices': [[ 1.42539895, -0.4255261 ], 441 [-0.46280375, 1.46295189]], 442 'transition_matrices': [[ 0.56151623, 0.4385931 ], 443 [ 0.47309189, 0.52673508]]} 444 ten_iter = {'initial_state_covariance': [[229936928.28125, 41172601.0], 445 [41172601.0, 7372383.46875]], 446 'initial_state_mean': [12939012.5625, 12934563.71875], 447 'observation_covariance': [[11958914107.88334, 4761048283.066559], 448 [4761048283.066557, 12388186543.42032]], 449 'observation_matrices': [[1.4258395826727792, -0.42598392357467674], 450 [-0.4647443890462455, 1.4648767294384015]], 451 'observation_offsets': [165.409715349344, 157.96206130876212], 452 'transition_covariance': [[2515594742.7187943, -401728959.41375697], 453 [-401728959.41375697, 2743831805.402682]], 454 'transition_matrices': [[0.521306461057975, 0.47879632652984583], 455 [0.5167881285851763, 0.483006520280469]], 456 'transition_offsets': [592.4419187566978, 583.2272403965366]} 457 #testKalman(ten_iter) 458 459 113 460 def getTCPTSPrecision(): 114 461 cursor = db.conn.cursor() 115 query="""SELECT tcpts_mean FROM meta ;"""462 query="""SELECT tcpts_mean FROM meta""" 116 463 cursor.execute(query) 117 464 row = cursor.fetchone() … … 170 517 #plt.savefig('paper/graphs/dists-vs-dist-of-diffs2.svg') 171 518 172 tsFilteredHistogram() 173 sys.exit(0) 174 175 176 from pykalman import KalmanFilter 177 #kf = KalmanFilter(transition_matrices = [[1, 1], [0, 1]], observation_matrices = [[0.1, 0.5], [-0.3, 0.0]]) 178 kf = KalmanFilter(transition_matrices = [[1, 0], [0, 1]], n_dim_obs=2, observation_matrices = [[1.0, 0], [0, 1.0]]) 179 180 #delta = 1e-5 181 #trans_cov = delta / (1 - delta) * np.eye(2) 182 183 #kf = KalmanFilter(n_dim_obs=2, n_dim_state=2, 184 # initial_state_mean=np.zeros(2), 185 # initial_state_covariance=np.ones((2, 2)), 186 # transition_matrices=np.eye(2), 187 # observation_matrices=obs_mat, 188 # observation_covariance=1.0, 189 # transition_covariance=trans_cov) 190 191 192 #measurements = numpy.asarray([[1,0], [0,0], [0,1]]) # 3 observations 193 measurements = numpy.asarray([(s['unusual_packet'],s['other_packet']) for s in (db.subseries('train','long')+db.subseries('test','long'))]) 194 kf = kf.em(measurements, n_iter=5) 195 #(filtered_state_means, filtered_state_covariances) = kf.filter(measurements) 196 #print("packet_rtt pykalman:", filtered_state_means[-1][0]-filtered_state_means[-1][1]) 197 #print("packet_rtt pykalman:", filtered_state_means[-1]) 198 199 (smoothed_state_means, smoothed_state_covariances) = kf.smooth(measurements) 200 #up = numpy.mean([m[0] for m in smoothed_state_means]) 201 #op = numpy.mean([m[1] for m in smoothed_state_means]) 202 print("packet_rtt pykalman:", smoothed_state_means[-1], smoothed_state_means[-1][0]-smoothed_state_means[-1][1]) 203 #print("packet_rtt pykalman:", up, op, up-op) 519 #tsFilteredHistogram() 520 521 522 204 523 205 524 … … 267 586 classifiers.append(c[0]) 268 587 588 max_obs = 0 269 589 for classifier in classifiers: 270 590 query=""" … … 304 624 performance = [] 305 625 for row in cursor: 626 max_obs = max(max_obs, row[0]) 306 627 num_obs.append(row[0]) 307 628 performance.append(row[1]) … … 313 634 314 635 plt.legend([l[1] for l in legend], [l[0] for l in legend], scatterpoints=1, fontsize='xx-small') 636 plt.plot([0, max_obs], [5.0, 5.0], "k--") 315 637 plt.show() 316 638 -
trunk/bin/train
r11 r13 55 55 trainer = classifiers[classifier]['train'] 56 56 threshold = 5.0 # in percent 57 num_obs = 100057 num_obs = 7 58 58 max_obs = int(db.populationSize('train')/5) 59 59 result = None … … 75 75 classifiers[classifier]['train_results'].append(result) 76 76 77 if error < threshold :77 if error < threshold and num_obs > 100: 78 78 break 79 79 -
trunk/lib/nanownlib/__init__.py
r11 r13 176 176 my_ip = getLocalIP(target_ip, target_port) 177 177 my_iface = getIfaceForIP(my_ip) 178 return subprocess.Popen(['chrt', '-r', '99', ' ./bin/csamp', my_iface, my_ip,178 return subprocess.Popen(['chrt', '-r', '99', 'nanown-csamp', my_iface, my_ip, 179 179 target_ip, "%d" % target_port, output_file, '0']) 180 180 … … 256 256 suspect += 'R' # reordered received packets 257 257 258 packet_rtt = last_rcvd['observed'] - last_sent['observed']259 if packet_rtt < 0:260 sys.stderr.write("WARN: Negative packet_rtt. last_rcvd=%s,last_sent=%s\n" % (last_rcvd, last_sent))261 262 258 last_sent_ack = None 263 259 try: 264 last_sent_ack = min(((p[' observed'],p) for p in packets265 if p['sent']==0 and p['payload_len']+last_sent['tcpseq'] ==p['tcpack']))[1]260 last_sent_ack = min(((p['tcpack'],p['observed'],p) for p in packets 261 if p['sent']==0 and p['payload_len']+last_sent['tcpseq']>=p['tcpack']))[2] 266 262 267 263 except Exception as e: 268 264 sys.stderr.write("WARN: Could not find last_sent_ack.\n") 269 265 266 packet_rtt = last_rcvd['observed'] - last_sent['observed'] 270 267 tsval_rtt = None 271 268 if None not in (timestamp_precision, last_sent_ack): 272 269 tsval_rtt = int(round((last_rcvd['tsval'] - last_sent_ack['tsval'])*timestamp_precision)) 273 270 271 if packet_rtt < 0 or (tsval_rtt != None and tsval_rtt < 0): 272 #sys.stderr.write("WARN: Negative packet or tsval RTT. last_rcvd=%s,last_sent=%s\n" % (last_rcvd, last_sent)) 273 suspect += 'N' 274 274 275 return {'packet_rtt':packet_rtt, 275 276 'tsval_rtt':tsval_rtt, … … 279 280 280 281 281 # trimeanand mad for each dist of differences282 # septasummary and mad for each dist of differences 282 283 def evaluateTrim(db, unusual_case, strim, rtrim): 283 284 cursor = db.conn.cursor() … … 292 293 FROM (SELECT probes.sample s,packet_rtt FROM probes,trim_analysis WHERE sent_trimmed=:strim AND rcvd_trimmed=:rtrim AND trim_analysis.probe_id=probes.id AND probes.test_case=:unusual_case AND probes.type in ('train','test')) u 293 294 """ 294 295 #TODO: check for "N" in suspect field and return a flag 296 295 297 params = {"strim":strim,"rtrim":rtrim,"unusual_case":unusual_case} 296 298 cursor.execute(query, params) 297 299 differences = [row[0] for row in cursor] 298 300 299 return ubersummary(differences),mad(differences)301 return septasummary(differences),mad(differences) 300 302 301 303 … … 362 364 for strim in range(0,num_sent): 363 365 for rtrim in range(0,num_rcvd): 366 #print(strim,rtrim) 364 367 if strim == 0 and rtrim == 0: 365 368 continue # no point in doing 0,0 again -
trunk/lib/nanownlib/stats.py
r11 r13 166 166 #return statistics.mean((l1,l2,l3,m,r3,r2,r1)) 167 167 168 169 def septasummary(values, distance=25): 170 left2 = 50-distance 171 left3 = 50-(distance/2.0) 172 left1 = left2/2.0 173 right2 = 50+distance 174 right3 = 50+(distance/2.0) 175 right1 = (right2+100)/2.0 176 l1,l2,l3,m,r3,r2,r1 = numpy.percentile(values, (left1,left2,left3,50,right3,right2,right1)) 177 return (l1+l2+l3+m+r3+r2+r1)/7.0 178 168 179 169 180 def tsvalwmean(subseries): … … 254 265 ubersummaryTest = functools.partial(summaryTest, ubersummary) 255 266 quadsummaryTest = functools.partial(summaryTest, quadsummary) 267 septasummaryTest = functools.partial(summaryTest, septasummary) 256 268 257 269 def rmse(expected, measurements): … … 327 339 else: 328 340 return 0 341 342 343 from pykalman import KalmanFilter 344 def pyKalman4DTest(params, greater, samples): 345 kp = params['kparams'] 346 #kp['initial_state_mean']=[quadsummary([s['unusual_packet'] for s in samples]), 347 # quadsummary([s['other_packet'] for s in samples]), 348 # numpy.mean([s['unusual_tsval'] for s in samples]), 349 # numpy.mean([s['other_tsval'] for s in samples])] 350 kf = KalmanFilter(n_dim_obs=4, n_dim_state=4, **kp) 351 smooth,covariance = kf.smooth([(s['unusual_packet'],s['other_packet'],s['unusual_tsval'],s['other_tsval']) 352 for s in samples]) 353 m = numpy.mean(smooth) 354 if greater: 355 if m > params['threshold']: 356 return 1 357 else: 358 return 0 359 else: 360 if m < params['threshold']: 361 return 1 362 else: 363 return 0 364 -
trunk/lib/nanownlib/train.py
r11 r13 300 300 301 301 302 from pykalman import KalmanFilter 303 _pykalman4d_params = None 304 _pykalman4d_params = {'observation_covariance': [[11960180434.411114, 4760272534.795976, 8797551081.431936, 6908794128.927051], [4760272534.795962, 12383598172.428213, 5470747537.2599745, 11252625555.297853], [8797551081.431955, 5470747537.2601185, 1466222848395.7058, 72565713883.12643], [6908794128.927095, 11252625555.297981, 72565713883.12654, 1519760903943.507]], 'transition_offsets': [592.5708159274, 583.3804671015271, 414.4187239098291, 562.166786712371], 'observation_offsets': [165.2279084503762, 157.76807691937614, 168.4235495099334, 225.33433430227353], 'initial_state_covariance': [[33599047.5, -18251285.25, 3242535690.59375, -8560730487.84375], [-18251285.25, 9914252.3125, -1761372688.59375, 4650260880.1875], [3242535690.59375, -1761372688.59375, 312926663745.03125, -826168494791.7188], [-8560730487.84375, 4650260880.1875, -826168494791.7188, 2181195982530.4688]], 'initial_state_mean': [12939012.5625, 12934563.71875, 13134751.608, 13138990.9985], 'transition_covariance': [[2515479496.145993, -401423541.70620924, 1409951418.1627903, 255932902.74454522], [-401423541.706214, 2744353887.676857, 1162316.2019491254, 1857251491.3987627], [1409951418.1628358, 1162316.2020361447, 543279068599.8229, -39399311190.5746], [255932902.74459982, 1857251491.398838, -39399311190.574585, 537826124257.5266]], 'observation_matrices': [[1.4255288693095167, -0.4254638445329988, 0.0003406844036817347, -0.0005475021956726778], [-0.46467270827589857, 1.4654311778340343, -0.0003321330280128265, -0.0002853945703691352], [-0.2644570970067974, -0.33955835481495455, 1.7494161615202275, -0.15394117603733548], [-0.3419097544041847, -0.23992883666045373, -0.15587790880447727, 1.7292393175137022]], 'transition_matrices': [[0.52163952865412, 0.47872618354122665, -0.0004322286766109684, 0.00017293351811531466], [0.5167436693545113, 0.48319044922845933, 7.765428142114672e-05, -0.00021518950285326355], [0.2091705950622469, 0.41051399729482796, 0.19341113299389256, 0.19562916616052917], [0.368592004009912, 0.22263632461118732, 0.20756792378812872, 0.20977025833570906]]} 305 _pykalman4d_good_threshold = 2009.25853272 306 _pykalman4d_params = None 307 308 _pykalman4d_params = {'observation_covariance': [[32932883342.63772, 18054300398.442295, 27538911550.824535, 17152378956.778696], [18054300398.446983, 436546443436.5115, 37327644533.69647, 424485386677.31274], [27538911550.838238, 37327644533.706024, 3276324705772.982, 456017515263.88715], [17152378956.788027, 424485386677.317, 456017515263.88245, 3767844180658.1724]], 'observation_matrices': [[1.025773112769464, -0.028755990114063934, 0.0003540921897382532, 0.0025748564713126143], [-0.8595457826320256, 1.8607522167556567, -0.003520779053701517, 0.002309145982167138], [-0.5806427858959466, 0.22466075141448982, 1.6247192012813798, -0.27363797512617793], [-0.5853369461874607, 0.262177909212312, -0.28415108658843735, 1.6020343138710018]], 'initial_state_mean': [0.0, 0.0, 0.0, 0.0], 'observation_offsets': [549.4498515668686, 484.2106453284049, 648.556719142234, 380.10978090584763], 'transition_covariance': [[4147844406.7768326, -1308763245.5992138, 2920744388.523955, 860096280.797968], [-1308763245.5998695, 171190325905.83395, 3557618712.218984, 165332873663.83142], [2920744388.532502, 3557618712.2283373, 1054894349089.0673, -117551209299.73402], [860096280.805706, 165332873663.83963, -117551209299.73474, 1223605046475.7324]], 'transition_offsets': [1156.9264087977374, 1150.752680207601, 1312.2595286459816, 1267.4069537452415], 'initial_state_covariance': [[667999273207241.0, 669330484615232.1, 713726904326576.2, 731731206363217.4], [669330484615390.9, 670664348906228.8, 715149243295271.9, 733189424910272.2], [713726904326843.4, 715149243295370.6, 762584802695960.9, 781821582244358.5], [731731206363417.0, 733189424910299.0, 781821582244278.6, 801543624134758.0]], 'transition_matrices': [[0.9680677036616316, 0.03260717171917804, 0.0005279411071512641, -0.0012363486571871363], [0.9555219601128613, 0.03851351491891819, 0.00411268796118236, 0.0017357967358293536], [0.622254432930994, -0.2583795512595657, 0.31745705251401546, 0.32357126976364725], [0.6644076824932768, -0.33545285094373867, 0.3295778964272671, 0.34682391469482354]]} 309 _pykalman4d_good_threshold = -253.849393803 310 def trainPyKalman4D(db, unusual_case, greater, num_observations): 311 global _pykalman4d_params 312 global _pykalman4d_good_threshold 313 db.resetOffsets() 314 315 if _pykalman4d_params == None: 316 train = db.subseries('train',unusual_case, offset=0) 317 null = db.subseries('train_null',unusual_case, offset=0) 318 train_array = numpy.asarray([(s['unusual_packet'],s['other_packet'],s['unusual_tsval'],s['other_tsval']) 319 for s in train]) 320 null_array = numpy.asarray([(s['unusual_packet'],s['other_packet'],s['unusual_tsval'],s['other_tsval']) 321 for s in null]) 322 kf = KalmanFilter(n_dim_obs=4, n_dim_state=4) 323 #initial_state_mean=[quadsummary([s['unusual_packet'] for s in train]), 324 # quadsummary([s['other_packet'] for s in train]), 325 # numpy.mean([s['unusual_tsval'] for s in train]), 326 # numpy.mean([s['other_tsval'] for s in train])]) 327 328 kf = kf.em(train_array+null_array[0:50000], n_iter=10, 329 em_vars=('transition_matrices', 330 'observation_matrices', 331 'transition_offsets', 332 'observation_offsets', 333 'transition_covariance', 334 'observation_covariance', 335 'initial_state_covariance')) 336 _pykalman4d_params = {'transition_matrices': kf.transition_matrices.tolist(), 337 'observation_matrices': kf.observation_matrices.tolist(), 338 'transition_offsets': kf.transition_offsets.tolist(), 339 'observation_offsets': kf.observation_offsets.tolist(), 340 'transition_covariance': kf.transition_covariance.tolist(), 341 'observation_covariance': kf.observation_covariance.tolist(), 342 'initial_state_mean': kf.initial_state_mean.tolist(), 343 'initial_state_covariance': kf.initial_state_covariance.tolist()} 344 print(_pykalman4d_params) 345 346 kf = KalmanFilter(n_dim_obs=4, n_dim_state=4, **_pykalman4d_params) 347 smoothed,covariance = kf.smooth(train_array) 348 null_smoothed,covariance = kf.smooth(null_array) 349 350 kp = _pykalman4d_params.copy() 351 #kp['initial_state_mean']=[quadsummary([s['unusual_packet'] for s in train]), 352 # quadsummary([s['other_packet'] for s in train]), 353 # numpy.mean([s['unusual_tsval'] for s in train]), 354 # numpy.mean([s['other_tsval'] for s in train])] 355 #kf = KalmanFilter(n_dim_obs=4, n_dim_state=4, **kp) 356 #null_smoothed,covariance = kf.smooth(null_array) 357 358 _pykalman4d_good_threshold = (numpy.mean([m[0]-m[1] for m in smoothed])+numpy.mean([m[0]-m[1] for m in null_smoothed]))/2.0 359 print(_pykalman4d_good_threshold) 360 361 362 def trainAux(params, num_trials): 363 estimator = functools.partial(pyKalman4DTest, params, greater) 364 estimates = bootstrap3(estimator, db, 'train', unusual_case, num_observations, num_trials) 365 null_estimates = bootstrap3(estimator, db, 'train_null', unusual_case, num_observations, num_trials) 366 367 bad_estimates = len([e for e in estimates if e != 1]) 368 bad_null_estimates = len([e for e in null_estimates if e != 0]) 369 370 false_negatives = 100.0*bad_estimates/num_trials 371 false_positives = 100.0*bad_null_estimates/num_trials 372 return false_positives,false_negatives 373 374 params = {'threshold':_pykalman4d_good_threshold, 'kparams':_pykalman4d_params} 375 376 wt = WorkerThreads(2, trainAux) 377 num_trials = 50 378 performance = [] 379 for t in range(-80,100,20): 380 thresh = _pykalman4d_good_threshold + abs(_pykalman4d_good_threshold)*(t/100.0) 381 params['threshold'] = thresh 382 wt.addJob(thresh, (params.copy(),num_trials)) 383 wt.wait() 384 while not wt.resultq.empty(): 385 job_id,errors = wt.resultq.get() 386 fp,fn = errors 387 #performance.append(((fp+fn)/2.0, job_id, fn, fp)) 388 performance.append((abs(fp-fn), job_id, fn, fp)) 389 performance.sort() 390 #pprint.pprint(performance) 391 best_threshold = performance[0][1] 392 #print("best_threshold:", best_threshold) 393 params['threshold']=best_threshold 394 395 wt.stop() 396 397 return {'trial_type':"train", 398 'num_observations':num_observations, 399 'num_trials':num_trials, 400 'params':json.dumps(params, sort_keys=True), 401 'false_positives':performance[0][3], 402 'false_negatives':performance[0][2]} 403 404 405 302 406 classifiers = {'boxtest':{'train':trainBoxTest, 'test':multiBoxTest, 'train_results':[]}, 303 407 'midsummary':{'train':functools.partial(trainSummary, midsummary), 'test':midsummaryTest, 'train_results':[]}, 304 'ubersummary':{'train':functools.partial(trainSummary, ubersummary), 'test':ubersummaryTest, 'train_results':[]},408 #'ubersummary':{'train':functools.partial(trainSummary, ubersummary), 'test':ubersummaryTest, 'train_results':[]}, 305 409 'quadsummary':{'train':functools.partial(trainSummary, quadsummary), 'test':quadsummaryTest, 'train_results':[]}, 306 'tsvalwmean':{'train':trainTsval, 'test':tsvalwmeanTest, 'train_results':[]}, 410 'septasummary':{'train':functools.partial(trainSummary, septasummary), 'test':septasummaryTest, 'train_results':[]}, 411 #'pykalman4d':{'train':trainPyKalman4D, 'test':pyKalman4DTest, 'train_results':[]}, 412 #'tsvalwmean':{'train':trainTsval, 'test':tsvalwmeanTest, 'train_results':[]}, 307 413 #'kalman':{'train':trainKalman, 'test':kalmanTest, 'train_results':[]}, 308 414 #'_trimean':{'train':None, 'test':trimeanTest, 'train_results':[]},
Note: See TracChangeset
for help on using the changeset viewer.