#!/usr/bin/env python3
#-*- mode: Python;-*-

import sys
import os
import time
import random
import statistics
import functools
import argparse
import threading
import queue
import pprint
import json


VERSION = "{DEVELOPMENT}"
if VERSION == "{DEVELOPMENT}":
    script_dir = '.'
    try:
        script_dir = os.path.dirname(os.path.realpath(__file__))
    except:
        try:
            script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
        except:
            pass
    sys.path.append("%s/../lib" % script_dir)

from nanownlib import *
import nanownlib.storage
from nanownlib.stats import boxTest,multiBoxTest,subsample,bootstrap,bootstrap2,trimean,midhinge,midhingeTest,samples2Distributions,samples2MeanDiffs

parser = argparse.ArgumentParser(
    description="")
#parser.add_argument('-c', dest='cases', type=str, default='{"short":10000,"long":1010000}',
#                    help='JSON representation of echo timing cases. Default: {"short":10000,"long":1010000}')
parser.add_argument('session_data', default=None,
                    help='Database file storing session information')
options = parser.parse_args()


class WorkerThreads(object):
    workq = None
    resultq = None
    target = None
    
    def __init__(self, num_workers, target):
        self.workq = queue.Queue()
        self.resultq = queue.Queue()
        self.target = target
        
        self.workers = []
        for i in range(num_workers):
            t = threading.Thread(target=self._worker)
            t.daemon = True
            t.start()
            self.workers.append(t)

    def _worker(self):
        while True:
            item = self.workq.get()
            if item == None:
                self.workq.task_done()
                break

            job_id,args = item
            self.resultq.put((job_id, self.target(*args)))
            self.workq.task_done()

    def addJob(self, job_id, args):
        self.workq.put((job_id, args))
            
    def wait(self):
        self.workq.join()

    def stop(self):
        for i in range(0,len(workers)):
            self.workq.put(None)
        for w in self.workers:
            w.join()

            
def trainBoxTest(db, test_cases, longest, subsample_size):

    def trainAux(low,high,num_trials):
        estimator = functools.partial(boxTest, {'low':low, 'high':high})
        estimates = bootstrap(estimator, db, 'train', test_cases, subsample_size, num_trials)
        null_estimates = bootstrap(estimator, db, 'train_null', test_cases, subsample_size, num_trials)

        #XXX: need to have a configurable policy on what we're looking for.
        #     which is longest or which is shortest?
        bad_estimates = len([e for e in estimates if e != longest])
        bad_null_estimates = len([e for e in null_estimates if e != None])
        
        false_negatives = 100.0*bad_estimates/num_trials
        false_positives = 100.0*bad_null_estimates/num_trials
        return false_positives,false_negatives

    start = time.time()
    wt = WorkerThreads(2, trainAux)
    
    width = 2.0
    performance = []
    percentiles = list(range(0,50,2))
    for low in percentiles:
        wt.addJob(low, (low,low+width,200))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    pprint.pprint(performance)
    print(time.time()-start)
    
    lows = [p[1] for p in performance[0:5]]
    widths = [w/10.0 for w in range(0,105,5)]
    performance = []
    for width in widths:
        false_positives = []
        false_negatives = []
        for low in lows:
            wt.addJob(low,(low,low+width,150))
        wt.wait()
        while not wt.resultq.empty():
            job_id,errors = wt.resultq.get()
            fp,fn = errors
            false_negatives.append(fn)
            false_positives.append(fp)

        #print(width, false_negatives)
        #print(width, false_positives)
        performance.append(((statistics.mean(false_positives)+statistics.mean(false_negatives))/2.0,
                            width, statistics.mean(false_negatives), statistics.mean(false_positives)))
    performance.sort()
    pprint.pprint(performance)
    good_width = performance[0][1]
    print("good_width:",good_width)


    lc = {}
    for low in lows:
        if low-1 > 0:
            lc[low-1] = None
        lc[low] = None
        lc[low+1] = None
    lows = lc.keys()
    
    performance = []
    for low in lows:
        wt.addJob(low, (low,low+good_width,300))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    pprint.pprint(performance)
    best_low = performance[0][1]
    print("best_low:", best_low)

    
    widths = [good_width-0.4,good_width-0.3,good_width-0.2,good_width-0.1,
              good_width,good_width+0.1,good_width+0.2,good_width+0.3,good_width+0.4]
    performance = []
    for width in widths:
        wt.addJob(width, (best_low,best_low+width,200))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    pprint.pprint(performance)
    best_width=performance[0][1]
    print("best_width:",best_width)
    print("final_performance:", performance[0][0])

    return {"low":best_low,"high":best_low+good_width}


def trainBoxTest2(db, unusual_case, greater, subsample_size):

    def trainAux(low,high,num_trials):
        estimator = functools.partial(multiBoxTest, {'low':low, 'high':high}, unusual_case, greater)
        estimates = bootstrap2(estimator, db, 'train', subsample_size, num_trials)
        null_estimates = bootstrap2(estimator, db, 'train_null', subsample_size, num_trials)

        bad_estimates = len([e for e in estimates if e != 1])
        bad_null_estimates = len([e for e in null_estimates if e != 0])
        
        false_negatives = 100.0*bad_estimates/num_trials
        false_positives = 100.0*bad_null_estimates/num_trials
        return false_positives,false_negatives

    start = time.time()
    wt = WorkerThreads(2, trainAux)
    
    num_trials = 200
    width = 2.0
    performance = []
    percentiles = list(range(0,50,2))
    for low in percentiles:
        wt.addJob(low, (low,low+width,num_trials))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    pprint.pprint(performance)
    print(time.time()-start)
    
    num_trials = 150
    lows = [p[1] for p in performance[0:5]]
    widths = [w/10.0 for w in range(0,105,5)]
    performance = []
    for width in widths:
        false_positives = []
        false_negatives = []
        for low in lows:
            wt.addJob(low,(low,low+width,num_trials))
        wt.wait()
        while not wt.resultq.empty():
            job_id,errors = wt.resultq.get()
            fp,fn = errors
            false_negatives.append(fn)
            false_positives.append(fp)

        #print(width, false_negatives)
        #print(width, false_positives)
        performance.append(((statistics.mean(false_positives)+statistics.mean(false_negatives))/2.0,
                            width, statistics.mean(false_negatives), statistics.mean(false_positives)))
    performance.sort()
    pprint.pprint(performance)
    good_width = performance[0][1]
    print("good_width:",good_width)


    lc = {}
    for low in lows:
        if low-1 >= 0:
            lc[low-1] = None
        lc[low] = None
        lc[low+1] = None
    lows = lc.keys()
    print("candidate lows:")
    pprint.pprint(lows)
    
    num_trials = 300
    performance = []
    for low in lows:
        wt.addJob(low, (low,low+good_width,num_trials))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    pprint.pprint(performance)
    best_low = performance[0][1]
    print("best_low:", best_low)

    num_trials = 200
    widths = [good_width-0.4,good_width-0.3,good_width-0.2,good_width-0.1,
              good_width,good_width+0.1,good_width+0.2,good_width+0.3,good_width+0.4]
    performance = []
    for width in widths:
        wt.addJob(width, (best_low,best_low+width,num_trials))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    pprint.pprint(performance)
    best_width=performance[0][1]
    print("best_width:",best_width)
    print("final_performance:", performance[0][0])
    
    params = json.dumps({"low":best_low,"high":best_low+good_width})
    return {'algorithm':"boxtest",
            'params':params,
            'sample_size':subsample_size,
            'num_trials':num_trials,
            'trial_type':"train",
            'false_positives':performance[0][3],
            'false_negatives':performance[0][2]}


def trainMidhinge(db, unusual_case, greater, subsample_size):

    def trainAux(distance, threshold, num_trials):
        estimator = functools.partial(midhingeTest, {'distance':distance,'threshold':threshold}, unusual_case, greater)
        estimates = bootstrap2(estimator, db, 'train', subsample_size, num_trials)
        null_estimates = bootstrap2(estimator, db, 'train_null', subsample_size, num_trials)

        bad_estimates = len([e for e in estimates if e != 1])
        bad_null_estimates = len([e for e in null_estimates if e != 0])
        
        false_negatives = 100.0*bad_estimates/num_trials
        false_positives = 100.0*bad_null_estimates/num_trials
        return false_positives,false_negatives

    #determine expected delta based on differences
    start = time.time()
    mean_diffs = list(samples2MeanDiffs(subsample(db, 'train'), 'packet_rtt', unusual_case))
    threshold = trimean(mean_diffs)/2.0
    print("initial threshold:", threshold)
    print("median threshold:", statistics.median(mean_diffs)/2.0)
    print("midhinge threshold:", midhinge(mean_diffs)/2.0)
    print("trimean threshold:", trimean(mean_diffs)/2.0)
    
    mean_diffs = list(samples2MeanDiffs(subsample(db, 'train_null'), 'packet_rtt', unusual_case))
    print(len(mean_diffs))
    print("null mean:", statistics.mean(mean_diffs))
    print("null median:", statistics.median(mean_diffs))
    print("null midhinge:", midhinge(mean_diffs))
    print("null trimean:", trimean(mean_diffs))
    print(time.time()-start)

    
    start = time.time()
    wt = WorkerThreads(1, trainAux)
    
    num_trials = 200
    performance = []
    #for distance in range(1,46,4):
    for distance in range(25,46,4):
        wt.addJob(distance, (distance,threshold,num_trials))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    pprint.pprint(performance)
    print(time.time()-start)
    good_distance = performance[0][1]
    print("good_distance:",good_distance)

    
    num_trials = 200
    start = time.time()
    performance = []
    for t in range(80,125,5):
        wt.addJob(threshold*(t/100.0), (good_distance,threshold*(t/100.0),num_trials))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    pprint.pprint(performance)
    print(time.time()-start)
    good_threshold = performance[0][1]
    print("good_threshold:", good_threshold)

    
    num_trials = 200
    start = time.time()
    performance = []
    for d in range(-4,5):
        wt.addJob(good_distance+d, (good_distance+d,good_threshold,num_trials))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    pprint.pprint(performance)
    print(time.time()-start)
    best_distance = performance[0][1]
    print("best_distance:",best_distance)

    num_trials = 200
    start = time.time()
    performance = []
    for t in range(95,106):
        wt.addJob(good_threshold*(t/100.0), (best_distance,good_threshold*(t/100.0),num_trials))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    pprint.pprint(performance)
    print(time.time()-start)
    best_threshold = performance[0][1]
    print("best_threshold:", best_threshold)

    params = json.dumps({'distance':best_distance,'threshold':best_threshold})
    return {'algorithm':"midhinge",
            'params':params,
            'sample_size':subsample_size,
            'num_trials':num_trials,
            'trial_type':"train",
            'false_positives':performance[0][3],
            'false_negatives':performance[0][2]}


#classifiers = {'boxtest':{'train':trainBoxTest2, 'test':multiBoxTest},
#               'midhinge':{'train':trainMidhinge, 'test':midhinge}}


db = nanownlib.storage.db(options.session_data)
#cursor = db.cursor()
#cursor.execute("SELECT min(sample) min, max(sample) max FROM probes")
#train_start,test_end = cursor.fetchone()
#train_end = int(test_end-train_start)
#test_start = train_end+1
#subsample_size = min(10000,(train_end-train_start+1)/4)

start = time.time()
unusual_case,unusual_diff = findUnusualTestCase(db)
greater = (unusual_diff > 0)
print("unusual_case:", unusual_case)
print("unusual_diff:", unusual_diff)
end = time.time()
print(":", end-start)

start = time.time()
results = trainMidhinge(db, unusual_case, greater, 6000)
db.addClassifierResults(results)
print("midhinge result:", results)
end = time.time()
print(":", end-start)

start = time.time()
results = trainBoxTest2(db, unusual_case, greater, 6000)
db.addClassifierResults(results)
print("multi box test result:", results)
end = time.time()
print(":", end-start)

#start = time.time()
#print("box test params:", trainBoxTest(db, test_cases, 'long', 100))
#end = time.time()
#print(":", end-start)