source: trunk/bin/sampler @ 24

Last change on this file since 24 was 20, checked in by tim, 9 years ago

major code refactoring, better organizing location of library functions

  • Property svn:executable set to *
File size: 7.4 KB
RevLine 
[4]1#!/usr/bin/env python3
2#-*- mode: Python;-*-
3
4import sys
5import os
6import time
7import random
8import tempfile
9import argparse
10import socket
11import json
12try:
13    import requests
14except:
15    sys.stderr.write('ERROR: Could not import requests module.  Ensure it is installed.\n')
16    sys.stderr.write('       Under Debian, the package name is "python3-requests"\n.')
17    sys.exit(1)
18
19VERSION = "{DEVELOPMENT}"
20if VERSION == "{DEVELOPMENT}":
21    script_dir = '.'
22    try:
23        script_dir = os.path.dirname(os.path.realpath(__file__))
24    except:
25        try:
26            script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
27        except:
28            pass
29    sys.path.append("%s/../lib" % script_dir)
30
31from nanownlib import *
[20]32from nanownlib.platform import *
33from nanownlib.tcpts import *
[4]34import nanownlib.storage
35
36parser = argparse.ArgumentParser(
37    description="")
38parser.add_argument('-c', dest='cases', type=str, default='{"short":10000,"long":13000}',
39                    help='JSON representation of echo timing cases.')
[20]40parser.add_argument('--no-tcpts', action='store_true', help='Disable TCP timestamp profiling')
[9]41parser.add_argument('--no-control', action='store_true', help='Do not collect separate control data.  Instead, synthesize it from test and train data.')
[4]42parser.add_argument('session_name', default=None,
43                    help='Name for the sampler session (used in output filenames)')
44parser.add_argument('sample_count', type=int, default=None,
45                    help='Number of samples to collect')
46parser.add_argument('host', default=None,
47                    help='IP address or host name of server')
48parser.add_argument('port', nargs='?', type=int, default=80,
49                    help='TCP port number of HTTP service (default: 80)')
50options = parser.parse_args()
51
52
[20]53num_samples = options.sample_count
54hostname    = options.host
55port        = options.port
56protocol    = 'http'
[4]57
58cases = json.loads(options.cases)
[20]59db_file = "%s.db" % options.session_name
60db = nanownlib.storage.db(db_file)
[4]61
[20]62
[4]63def extractReportedRuntime(headers, body):
64    if body.startswith("waited: "):
65        return int(body.split("waited: ")[1], 10)
66
67
68def sendRequest(data=None):
69    method = 'GET'
[20]70    path = '/?t=' + data
[4]71    url = "%s://%s:%d%s" % (protocol,hostname,port,path)
72    headers = {}
73    body = (b'')
74    req = requests.Request('GET', url).prepare()
75
76    retry = True
77    while retry:
78        try:
79            session = requests.Session()
[20]80            response = session.send(req, allow_redirects=False)
81            #print("sendRequest:", repr(response.raw._original_response.local_address))
[4]82            reported = extractReportedRuntime(response.headers, response.text)
83            retry = False
84        except Exception as e:
85            sys.stderr.write("ERROR: HTTP request problem: %s\n" % repr(e))
86            time.sleep(1.0)
87            sys.stderr.write("ERROR: retrying...\n")
[20]88
89       
[4]90    return {'userspace_rtt':response.elapsed.microseconds*1000,
91            'reported':reported,
92            'local_port':response.raw._original_response.local_address[1]}
93
94
95def fetch(probedata, data):
96    #   http://docs.python-requests.org/en/latest/api/#requests.Response
97    result = sendRequest("%d" % data)
98    result.update(probedata)
99   
100    return result
101
102
103setCPUAffinity()
[20]104time.sleep(0.25) # Give our process a chance to migrate to a different CPU if necessary
105setPowersave(False) # XXX: test this to see if it helps
106setLowLatency(True) # XXX: test this to see if it helps
107tcpts_previous = setTCPTimestamps(True)
[4]108
[20]109
110#XXX: what about multiple A records?
111#     perform this during a script generation step, measuring lowest latency server
112#     and hard-coding the IP and host name separately.  However, including all
113#     server IPs in comments
114host_ip = socket.gethostbyname(hostname) 
115
116
[4]117meta = {'tcpts_mean':None,'tcpts_stddev':None,'tcpts_slopes':None}
[5]118if not options.no_tcpts:
[4]119    sys.stderr.write("INFO: Probing target for TCP timestamp precision...\n")
120    sniffer_fp = tempfile.NamedTemporaryFile('w+t')
121    sniffer = startSniffer(host_ip, port, sniffer_fp.name)
122    time.sleep(1.0)
123    ports = runTimestampProbes(host_ip, port, hostname, 12)
124    time.sleep(1.0)
125    stopSniffer(sniffer)
126    sniffer_fp.seek(0)
127    mean,stddev,slopes = computeTimestampPrecision(sniffer_fp, ports)
128    meta = {'tcpts_mean':mean,'tcpts_stddev':stddev,'tcpts_slopes':json.dumps(slopes)}
129   
130if meta['tcpts_mean'] == None:
131    sys.stderr.write("INFO: TCP timestamps not supported.\n")
132elif meta['tcpts_stddev'] == None:
133    sys.stderr.write("INFO: Not enough TCP timestamp samples.\n")
134elif meta['tcpts_stddev']/meta['tcpts_mean'] > 0.05:
135    sys.stderr.write("INFO: TCP timestamp precision is inconsistent.\n")
136    sys.stderr.write("INFO:     mean=%(tcpts_mean)f,stddev=%(tcpts_stddev)f,samples=%(tcpts_slopes)s\n"
137                     % meta)
138else:
139    sys.stderr.write("INFO: Estimated TCP timestamp precision: %f (stddev: %f, %f%%)\n"
140                     % (meta['tcpts_mean'], meta['tcpts_stddev'], 100*meta['tcpts_stddev']/meta['tcpts_mean']))
141
[20]142db.addMeta(meta)
[4]143
144
[20]145def findNextSampleID(db):
146    cursor = db.conn.cursor()
147    cursor.execute("SELECT max(sample) FROM probes")
148    row = cursor.fetchone()
149    if row != None and row[0] != None:
150        return row[0]+1
[4]151
[20]152    return 0
[9]153
154
[20]155def collectSamples(db, sample_type, count, sniffer):
156    sniffer.start()
[9]157
[20]158    if not sniffer.is_running():
159        sys.stderr.write('ERROR: Sniffer did not start...\n')
160        return
[4]161       
[20]162    sid = findNextSampleID(db)
[4]163    for k in range(0,count):
164        sample_order = list(cases.items())
165        random.shuffle(sample_order)
[20]166        if sample_type.endswith('null'):
[4]167            for i in range(1,len(sample_order)):
168                sample_order[i] = (sample_order[i][0],sample_order[0][1])
169            random.shuffle(sample_order)
170           
171        results = []
172        now = int(time.time()*1000000000)
173        for i in range(len(sample_order)):
174            results.append(fetch({'sample':sid, 'test_case':sample_order[i][0],
[20]175                                  'type':sample_type, 'tc_order':i, 'time_of_day':now},
[4]176                                 sample_order[i][1]))
177
[20]178        print(results)
[4]179        db.addProbes(results)
180        db.conn.commit()
181        sid += 1
182
[20]183    time.sleep(2.0) # Give sniffer a chance to collect remaining packets
184    sniffer.stop()
185    #print(sniffer.openPacketLog().read())
186    start = time.time()
187    associatePackets(sniffer.openPacketLog(), db)
188    end = time.time()
189    print("associate time:", end-start)
190   
[4]191
192
[20]193if options.no_control:
194    num_control = 0
195else:
196    num_control = int(num_samples*2/5)
[4]197
[20]198num_train = int((num_samples-num_control)/3)
199num_test = num_samples-num_train-num_control
200
201sample_types = [('train',num_train),
202                ('train_null',num_control),
203                ('test',num_test)]
204
205sniffer = snifferProcess(host_ip, port)
206for st,count in sample_types:
207    collectSamples(db, st,count,sniffer)
208
209
210#start = time.time()
211#report_interval = 20
212#next_report = start+report_interval
213#        if (time.time() > next_report):
214#            reportProgress(db, sample_types, start)
215#            next_report += report_interval
216
217   
[9]218if options.no_control:
219    print("TODO: implement control synthesizing!")
220
[4]221start = time.time()
222num_probes = analyzeProbes(db)
223end = time.time()
224print("analyzed %d probes' packets in: %f" % (num_probes, end-start))
[20]225
226
227setPowersave(True) # XXX: test this to see if it actually helps
228setLowLatency(False) # XXX: test this to see if it actually helps
229setTCPTimestamps(tcpts_previous)
Note: See TracBrowser for help on using the repository browser.