Changeset 6
- Timestamp:
- 07/09/15 12:27:04 (9 years ago)
- Location:
- trunk
- Files:
-
- 1 added
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/bin/train
r4 r6 290 290 291 291 292 def trainMidhinge(db, unusual_case, greater, subs ample_size):292 def trainMidhinge(db, unusual_case, greater, subseries_size): 293 293 294 294 def trainAux(distance, threshold, num_trials): 295 estimator = functools.partial(midhingeTest, {'distance':distance,'threshold':threshold}, unusual_case,greater)296 estimates = bootstrap 2(estimator, db, 'train', subsample_size, num_trials)297 null_estimates = bootstrap 2(estimator, db, 'train_null', subsample_size, num_trials)295 estimator = functools.partial(midhingeTest, {'distance':distance,'threshold':threshold}, greater) 296 estimates = bootstrap3(estimator, db, 'train', unusual_case, subseries_size, num_trials) 297 null_estimates = bootstrap3(estimator, db, 'train_null', unusual_case, subseries_size, num_trials) 298 298 299 299 bad_estimates = len([e for e in estimates if e != 1]) … … 306 306 #determine expected delta based on differences 307 307 start = time.time() 308 mean_diffs = list(samples2MeanDiffs(subsample(db, 'train'), 'packet_rtt', unusual_case))308 mean_diffs = [s['unusual_case']-s['other_cases'] for s in subseries(db, 'train', unusual_case)] 309 309 threshold = trimean(mean_diffs)/2.0 310 310 print("initial threshold:", threshold) … … 313 313 print("trimean threshold:", trimean(mean_diffs)/2.0) 314 314 315 mean_diffs = list(samples2MeanDiffs(subsample(db, 'train_null'), 'packet_rtt', unusual_case))315 mean_diffs = [s['unusual_case']-s['other_cases'] for s in subseries(db, 'train_null', unusual_case)] 316 316 print(len(mean_diffs)) 317 317 print("null mean:", statistics.mean(mean_diffs)) … … 323 323 324 324 start = time.time() 325 wt = WorkerThreads( 1, trainAux)326 327 num_trials = 20 0325 wt = WorkerThreads(2, trainAux) 326 327 num_trials = 20 328 328 performance = [] 329 329 #for distance in range(1,46,4): … … 335 335 fp,fn = errors 336 336 performance.append(((fp+fn)/2.0, job_id, fn, fp)) 337 #for distance in range(25,46,4): 338 # job_id = distance 339 # fp,fn = trainAux(distance, threshold, num_trials) 340 # performance.append(((fp+fn)/2.0, job_id, fn, fp)) 341 337 342 performance.sort() 338 343 pprint.pprint(performance) … … 342 347 343 348 344 num_trials = 20 0349 num_trials = 20 345 350 start = time.time() 346 351 performance = [] … … 359 364 360 365 361 num_trials = 20 0366 num_trials = 20 362 367 start = time.time() 363 368 performance = [] … … 374 379 best_distance = performance[0][1] 375 380 print("best_distance:",best_distance) 376 377 num_trials = 20 0381 382 num_trials = 20 378 383 start = time.time() 379 384 performance = [] … … 394 399 return {'algorithm':"midhinge", 395 400 'params':params, 396 'sample_size':subs ample_size,401 'sample_size':subseries_size, 397 402 'num_trials':num_trials, 398 403 'trial_type':"train", … … 421 426 print(":", end-start) 422 427 428 import cProfile 429 423 430 start = time.time() 424 results = trainMidhinge(db, unusual_case, greater, 6000) 425 db.addClassifierResults(results) 431 #cProfile.run('results = trainMidhinge(db, unusual_case, greater, 100)') 432 results = trainMidhinge(db, unusual_case, greater, 100) 433 #db.addClassifierResults(results) 426 434 print("midhinge result:", results) 427 435 end = time.time() 428 436 print(":", end-start) 437 438 sys.exit(0) 429 439 430 440 start = time.time() -
trunk/lib/nanownlib/__init__.py
r5 r6 214 214 for p in packets: 215 215 key = (p['sent'],p['tcpseq'],p['tcpack'],p['payload_len']) 216 #if (key not in seen)\217 #or p['sent']==1 and (seen[key]['observed'] < p['observed'])\218 #or p['sent']==0 and (seen[key]['observed'] > p['observed']):219 if (key not in seen) or (seen[key]['observed'] > p['observed']):216 if (key not in seen)\ 217 or p['sent']==1 and (seen[key]['observed'] < p['observed'])\ 218 or p['sent']==0 and (seen[key]['observed'] > p['observed']): 219 #if (key not in seen) or (seen[key]['observed'] > p['observed']): 220 220 seen[key] = p 221 221 … … 352 352 pprint.pprint(evaluations) 353 353 354 delta_margin = 0.1 354 delta_margin = 0.15 355 355 best_strim = 0 356 356 best_rtrim = 0 … … 439 439 remaining_tm = trimean([row['packet_rtt'] for row in cursor]) 440 440 441 return (tc, tm_map[tc]-remaining_tm) 441 ret_val = (tc, tm_map[tc]-remaining_tm) 442 print("unusual_case: %s, delta: %f" % ret_val) 443 return ret_val 442 444 443 445 -
trunk/lib/nanownlib/stats.py
r4 r6 227 227 yield (sid,[dict(r) for r in probes]) 228 228 229 230 def subseries(db, probe_type, unusual_case, size=None, offset=None, field='packet_rtt'): 231 cursor = db.conn.cursor() 232 cursor.execute("SELECT max(c) FROM (SELECT count(sample) c FROM probes WHERE type=? GROUP BY test_case)", (probe_type,)) 233 population_size = cursor.fetchone()[0] 234 235 if size == None or size > population_size: 236 size = population_size 237 if offset == None or offset >= population_size or offset < 0: 238 offset = numpy.random.random_integers(0,population_size-1) 239 240 query=""" 241 SELECT %(field)s AS unusual_case, 242 (SELECT avg(%(field)s) FROM probes,analysis 243 WHERE analysis.probe_id=probes.id AND probes.test_case!=:unusual_case AND probes.type=:probe_type AND sample=u.sample) AS other_cases 244 FROM (SELECT probes.sample,%(field)s FROM probes,analysis 245 WHERE analysis.probe_id=probes.id AND probes.test_case =:unusual_case AND probes.type=:probe_type) u 246 LIMIT :size OFFSET :offset 247 """ % {"field":field} 248 249 params = {"probe_type":probe_type, "unusual_case":unusual_case, "offset":offset, "size":size} 250 cursor.execute(query, params) 251 for row in cursor: 252 size -= 1 253 yield dict(row) 254 255 if size > 0: 256 params['offset'] = 0 257 params['size'] = size 258 cursor.execute(query, params) 259 for row in cursor: 260 yield dict(row) 261 262 229 263 # if test_cases=None, include all of them. Otherwise, include only the specified test cases. 230 264 def samples2Distributions(samples, field, test_cases=None): … … 270 304 271 305 306 def bootstrap3(estimator, db, probe_type, unusual_case, subseries_size, num_trials): 307 ret_val = [] 308 for t in range(num_trials): 309 ret_val.append(estimator(subseries(db, probe_type, unusual_case, subseries_size))) 310 311 return ret_val 312 313 272 314 # Returns the test case name that clearly has higher RTT; otherwise, returns None 273 315 def boxTest(params, test_cases, samples): … … 329 371 # Returns 1 if unusual_case is unusual in the expected direction 330 372 # 0 otherwise 331 def midhingeTest(params, unusual_case,greater, samples):332 diffs = list(samples2MeanDiffs(samples, 'packet_rtt', unusual_case))373 def midhingeTest(params, greater, samples): 374 diffs = [s['unusual_case']-s['other_cases'] for s in samples] 333 375 334 376 mh = midhinge(diffs, params['distance'])
Note: See TracChangeset
for help on using the changeset viewer.