Context Navigation

source: trunk/bin/train @ 11

Last change on this file since 11 was 11, checked in by tim, 10 years ago
.
Property svn:executable set to ``*
File size: 6.7 KB

Rev	Line
[4]	1	#!/usr/bin/env python3
	2	#-- mode: Python;--
	3
	4	import sys
	5	import os
	6	import time
	7	import random
	8	import statistics
	9	import functools
	10	import argparse
	11	import pprint
	12	import json
	13
	14
	15	VERSION = "{DEVELOPMENT}"
	16	if VERSION == "{DEVELOPMENT}":
	17	script_dir = '.'
	18	try:
	19	script_dir = os.path.dirname(os.path.realpath(__file__))
	20	except:
	21	try:
	22	script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
	23	except:
	24	pass
	25	sys.path.append("%s/../lib" % script_dir)
	26
[11]	27
[4]	28	from nanownlib import *
[10]	29	from nanownlib.stats import *
[11]	30	from nanownlib.train import *
[10]	31	from nanownlib.parallel import WorkerThreads
[4]	32	import nanownlib.storage
	33
[9]	34
[10]	35
[4]	36	parser = argparse.ArgumentParser(
	37	description="")
	38	#parser.add_argument('-c', dest='cases', type=str, default='{"short":10000,"long":1010000}',
	39	# help='JSON representation of echo timing cases. Default: {"short":10000,"long":1010000}')
[11]	40	parser.add_argument('--unusual-case', action='store', default=None, help='Specify the unusual case and whether it is greater than the other cases. Format: {case name},{1 or 0}')
	41	parser.add_argument('--retrain', action='append', default=[], help='Force a classifier to be retrained (and retested). May be specified multiple times.')
[10]	42	parser.add_argument('--retest', action='append', default=[], help='Force a classifier to be retested. May be specified multiple times.')
[4]	43	parser.add_argument('session_data', default=None,
	44	help='Database file storing session information')
	45	options = parser.parse_args()
[11]	46	db = nanownlib.storage.db(options.session_data)
[4]	47
	48
	49
[10]	50	def trainClassifier(db, unusual_case, greater, classifier, retrain=False):
	51	if retrain:
	52	print("Dropping stored training results...")
	53	db.deleteClassifierResults(classifier, 'train')
	54
	55	trainer = classifiers[classifier]['train']
[9]	56	threshold = 5.0 # in percent
[10]	57	num_obs = 1000
	58	max_obs = int(db.populationSize('train')/5)
[9]	59	result = None
[10]	60	while num_obs < max_obs:
	61	num_obs = min(int(num_obs*1.5), max_obs)
	62	result = db.fetchClassifierResult(classifier, 'train', num_obs)
	63	if result != None:
	64	train_time = "(stored)"
	65	else:
	66	start = time.time()
	67	result = trainer(db,unusual_case,greater,num_obs)
	68	result['classifier'] = classifier
	69	train_time = "%f" % (time.time()-start)
	70
[9]	71	error = statistics.mean([result['false_positives'],result['false_negatives']])
[10]	72	print("number of observations: %d \| error: %f \| false_positives: %f \| false_negatives: %f \| train time: %s \| params: %s"
	73	% (num_obs, error, result['false_positives'],result['false_negatives'], train_time, result['params']))
[11]	74	db.addClassifierResult(result)
[10]	75	classifiers[classifier]['train_results'].append(result)
	76
[9]	77	if error < threshold:
	78	break
	79
	80	return result
	81
	82
[10]	83
	84	def testClassifier(db, unusual_case, greater, classifier, retest=False):
	85	target_error = 5.0 # in percent
	86	num_trials = 1000
	87	max_obs = int(db.populationSize('test')/5)
	88
	89	tester = classifiers[classifier]['test']
	90
	91	def testAux(params, num_trials, num_observations):
	92	estimator = functools.partial(tester, params, greater)
	93	estimates = bootstrap3(estimator, db, 'test', unusual_case, num_observations, num_trials)
	94	null_estimates = bootstrap3(estimator, db, 'train_null', unusual_case, num_observations, num_trials)
	95
	96	bad_estimates = len([e for e in estimates if e != 1])
	97	bad_null_estimates = len([e for e in null_estimates if e != 0])
	98
	99	false_negatives = 100.0*bad_estimates/num_trials
	100	false_positives = 100.0*bad_null_estimates/num_trials
	101	print("testAux:", num_observations, false_positives, false_negatives, params)
	102	return false_positives,false_negatives
	103
	104
[11]	105	def getResult(classifier, params, num_obs, num_trials):
	106	jparams = json.dumps(params, sort_keys=True)
	107	result = db.fetchClassifierResult(classifier, 'test', num_obs, jparams)
	108	if result:
	109	fp = result['false_positives']
	110	fn = result['false_negatives']
	111	else:
	112	fp,fn = testAux(params, num_trials, num_obs)
	113	result = {'classifier':classifier,
	114	'trial_type':"test",
	115	'num_observations':num_obs,
	116	'num_trials':num_trials,
	117	'params':jparams,
	118	'false_positives':fp,
	119	'false_negatives':fn}
	120	db.addClassifierResult(result)
	121	return ((fp+fn)/2.0,result)
	122
[10]	123	if retest:
	124	print("Dropping stored test results...")
	125	db.deleteClassifierResults(classifier, 'test')
	126
	127
	128	test_results = []
	129	lte = math.log(target_error/100.0)
	130	for tr in classifiers[classifier]['train_results']:
	131	db.resetOffsets()
	132	params = json.loads(tr['params'])
	133	num_obs = tr['num_observations']
	134
	135	print("initial test")
[11]	136	error,result = getResult(classifier,params,num_obs,num_trials)
[10]	137	print("walking up")
	138	while (error > target_error) and (num_obs < max_obs):
	139	increase_factor = 1.5 * lte/math.log(error/100.0) # don't ask how I came up with this
	140	#print("increase_factor:", increase_factor)
	141	num_obs = min(int(increase_factor*num_obs), max_obs)
[11]	142	error,result = getResult(classifier,params,num_obs,num_trials)
[10]	143
	144	print("walking down")
	145	while (num_obs > 0):
[11]	146	current_best = (error,result)
[10]	147	num_obs = int(0.95*num_obs)
[11]	148	error,result = getResult(classifier,params,num_obs,num_trials)
[10]	149	if error > target_error:
	150	break
	151
[11]	152	return current_best
[10]	153
	154
[11]	155	if options.unusual_case != None:
	156	unusual_case,greater = options.unusual_case.split(',')
	157	greater = bool(int(greater))
	158	else:
	159	start = time.time()
	160	unusual_case,unusual_diff = findUnusualTestCase(db)
	161	greater = (unusual_diff > 0)
	162	print("unusual_case:", unusual_case)
	163	print("unusual_diff:", unusual_diff)
	164	end = time.time()
	165	print(":", end-start)
[10]	166
[4]	167
[10]	168	for c in sorted(classifiers.keys()):
	169	if classifiers[c]['train'] == None:
	170	continue
[8]	171	start = time.time()
[9]	172	print("Training %s..." % c)
[10]	173	result = trainClassifier(db, unusual_case, greater, c, c in options.retrain)
[9]	174	print("%s result:" % c)
	175	pprint.pprint(result)
	176	print("completed in:", time.time()-start)
[4]	177
[10]	178	db.clearCache()
[4]	179
[10]	180	for c in sorted(classifiers.keys()):
	181	start = time.time()
	182	print("Testing %s..." % c)
[11]	183	error,result = testClassifier(db, unusual_case, greater, c, c in (options.retest+options.retrain))
[10]	184	print("%s result:" % c)
	185	pprint.pprint(result)
[11]	186	classifiers[c]['test_error'] = error
[10]	187	print("completed in:", time.time()-start)

Note: See TracBrowser for help on using the repository browser.

Download in other formats: