Context Navigation

sampler @ 21

Last change on this file since 21 was 20, checked in by tim, 10 years ago
major code refactoring, better organizing location of library functions
Property svn:executable set to ``*
File size: 7.4 KB

Rev	Line
[4]	1	#!/usr/bin/env python3
	2	#-- mode: Python;--
	3
	4	import sys
	5	import os
	6	import time
	7	import random
	8	import tempfile
	9	import argparse
	10	import socket
	11	import json
	12	try:
	13	import requests
	14	except:
	15	sys.stderr.write('ERROR: Could not import requests module. Ensure it is installed.\n')
	16	sys.stderr.write(' Under Debian, the package name is "python3-requests"\n.')
	17	sys.exit(1)
	18
	19	VERSION = "{DEVELOPMENT}"
	20	if VERSION == "{DEVELOPMENT}":
	21	script_dir = '.'
	22	try:
	23	script_dir = os.path.dirname(os.path.realpath(__file__))
	24	except:
	25	try:
	26	script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
	27	except:
	28	pass
	29	sys.path.append("%s/../lib" % script_dir)
	30
	31	from nanownlib import *
[20]	32	from nanownlib.platform import *
	33	from nanownlib.tcpts import *
[4]	34	import nanownlib.storage
	35
	36	parser = argparse.ArgumentParser(
	37	description="")
	38	parser.add_argument('-c', dest='cases', type=str, default='{"short":10000,"long":13000}',
	39	help='JSON representation of echo timing cases.')
[20]	40	parser.add_argument('--no-tcpts', action='store_true', help='Disable TCP timestamp profiling')
[9]	41	parser.add_argument('--no-control', action='store_true', help='Do not collect separate control data. Instead, synthesize it from test and train data.')
[4]	42	parser.add_argument('session_name', default=None,
	43	help='Name for the sampler session (used in output filenames)')
	44	parser.add_argument('sample_count', type=int, default=None,
	45	help='Number of samples to collect')
	46	parser.add_argument('host', default=None,
	47	help='IP address or host name of server')
	48	parser.add_argument('port', nargs='?', type=int, default=80,
	49	help='TCP port number of HTTP service (default: 80)')
	50	options = parser.parse_args()
	51
	52
[20]	53	num_samples = options.sample_count
	54	hostname = options.host
	55	port = options.port
	56	protocol = 'http'
[4]	57
	58	cases = json.loads(options.cases)
[20]	59	db_file = "%s.db" % options.session_name
	60	db = nanownlib.storage.db(db_file)
[4]	61
[20]	62
[4]	63	def extractReportedRuntime(headers, body):
	64	if body.startswith("waited: "):
	65	return int(body.split("waited: ")[1], 10)
	66
	67
	68	def sendRequest(data=None):
	69	method = 'GET'
[20]	70	path = '/?t=' + data
[4]	71	url = "%s://%s:%d%s" % (protocol,hostname,port,path)
	72	headers = {}
	73	body = (b'')
	74	req = requests.Request('GET', url).prepare()
	75
	76	retry = True
	77	while retry:
	78	try:
	79	session = requests.Session()
[20]	80	response = session.send(req, allow_redirects=False)
	81	#print("sendRequest:", repr(response.raw._original_response.local_address))
[4]	82	reported = extractReportedRuntime(response.headers, response.text)
	83	retry = False
	84	except Exception as e:
	85	sys.stderr.write("ERROR: HTTP request problem: %s\n" % repr(e))
	86	time.sleep(1.0)
	87	sys.stderr.write("ERROR: retrying...\n")
[20]	88
	89
[4]	90	return {'userspace_rtt':response.elapsed.microseconds*1000,
	91	'reported':reported,
	92	'local_port':response.raw._original_response.local_address[1]}
	93
	94
	95	def fetch(probedata, data):
	96	# http://docs.python-requests.org/en/latest/api/#requests.Response
	97	result = sendRequest("%d" % data)
	98	result.update(probedata)
	99
	100	return result
	101
	102
	103	setCPUAffinity()
[20]	104	time.sleep(0.25) # Give our process a chance to migrate to a different CPU if necessary
	105	setPowersave(False) # XXX: test this to see if it helps
	106	setLowLatency(True) # XXX: test this to see if it helps
	107	tcpts_previous = setTCPTimestamps(True)
[4]	108
[20]	109
	110	#XXX: what about multiple A records?
	111	# perform this during a script generation step, measuring lowest latency server
	112	# and hard-coding the IP and host name separately. However, including all
	113	# server IPs in comments
	114	host_ip = socket.gethostbyname(hostname)
	115
	116
[4]	117	meta = {'tcpts_mean':None,'tcpts_stddev':None,'tcpts_slopes':None}
[5]	118	if not options.no_tcpts:
[4]	119	sys.stderr.write("INFO: Probing target for TCP timestamp precision...\n")
	120	sniffer_fp = tempfile.NamedTemporaryFile('w+t')
	121	sniffer = startSniffer(host_ip, port, sniffer_fp.name)
	122	time.sleep(1.0)
	123	ports = runTimestampProbes(host_ip, port, hostname, 12)
	124	time.sleep(1.0)
	125	stopSniffer(sniffer)
	126	sniffer_fp.seek(0)
	127	mean,stddev,slopes = computeTimestampPrecision(sniffer_fp, ports)
	128	meta = {'tcpts_mean':mean,'tcpts_stddev':stddev,'tcpts_slopes':json.dumps(slopes)}
	129
	130	if meta['tcpts_mean'] == None:
	131	sys.stderr.write("INFO: TCP timestamps not supported.\n")
	132	elif meta['tcpts_stddev'] == None:
	133	sys.stderr.write("INFO: Not enough TCP timestamp samples.\n")
	134	elif meta['tcpts_stddev']/meta['tcpts_mean'] > 0.05:
	135	sys.stderr.write("INFO: TCP timestamp precision is inconsistent.\n")
	136	sys.stderr.write("INFO: mean=%(tcpts_mean)f,stddev=%(tcpts_stddev)f,samples=%(tcpts_slopes)s\n"
	137	% meta)
	138	else:
	139	sys.stderr.write("INFO: Estimated TCP timestamp precision: %f (stddev: %f, %f%%)\n"
	140	% (meta['tcpts_mean'], meta['tcpts_stddev'], 100*meta['tcpts_stddev']/meta['tcpts_mean']))
	141
[20]	142	db.addMeta(meta)
[4]	143
	144
[20]	145	def findNextSampleID(db):
	146	cursor = db.conn.cursor()
	147	cursor.execute("SELECT max(sample) FROM probes")
	148	row = cursor.fetchone()
	149	if row != None and row[0] != None:
	150	return row[0]+1
[4]	151
[20]	152	return 0
[9]	153
	154
[20]	155	def collectSamples(db, sample_type, count, sniffer):
	156	sniffer.start()
[9]	157
[20]	158	if not sniffer.is_running():
	159	sys.stderr.write('ERROR: Sniffer did not start...\n')
	160	return
[4]	161
[20]	162	sid = findNextSampleID(db)
[4]	163	for k in range(0,count):
	164	sample_order = list(cases.items())
	165	random.shuffle(sample_order)
[20]	166	if sample_type.endswith('null'):
[4]	167	for i in range(1,len(sample_order)):
	168	sample_order[i] = (sample_order[i][0],sample_order[0][1])
	169	random.shuffle(sample_order)
	170
	171	results = []
	172	now = int(time.time()*1000000000)
	173	for i in range(len(sample_order)):
	174	results.append(fetch({'sample':sid, 'test_case':sample_order[i][0],
[20]	175	'type':sample_type, 'tc_order':i, 'time_of_day':now},
[4]	176	sample_order[i][1]))
	177
[20]	178	print(results)
[4]	179	db.addProbes(results)
	180	db.conn.commit()
	181	sid += 1
	182
[20]	183	time.sleep(2.0) # Give sniffer a chance to collect remaining packets
	184	sniffer.stop()
	185	#print(sniffer.openPacketLog().read())
	186	start = time.time()
	187	associatePackets(sniffer.openPacketLog(), db)
	188	end = time.time()
	189	print("associate time:", end-start)
	190
[4]	191
	192
[20]	193	if options.no_control:
	194	num_control = 0
	195	else:
	196	num_control = int(num_samples*2/5)
[4]	197
[20]	198	num_train = int((num_samples-num_control)/3)
	199	num_test = num_samples-num_train-num_control
	200
	201	sample_types = [('train',num_train),
	202	('train_null',num_control),
	203	('test',num_test)]
	204
	205	sniffer = snifferProcess(host_ip, port)
	206	for st,count in sample_types:
	207	collectSamples(db, st,count,sniffer)
	208
	209
	210	#start = time.time()
	211	#report_interval = 20
	212	#next_report = start+report_interval
	213	# if (time.time() > next_report):
	214	# reportProgress(db, sample_types, start)
	215	# next_report += report_interval
	216
	217
[9]	218	if options.no_control:
	219	print("TODO: implement control synthesizing!")
	220
[4]	221	start = time.time()
	222	num_probes = analyzeProbes(db)
	223	end = time.time()
	224	print("analyzed %d probes' packets in: %f" % (num_probes, end-start))
[20]	225
	226
	227	setPowersave(True) # XXX: test this to see if it actually helps
	228	setLowLatency(False) # XXX: test this to see if it actually helps
	229	setTCPTimestamps(tcpts_previous)

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/bin/sampler @ 21

Download in other formats: