#!/usr/bin/env python3
#-*- mode: Python;-*-

import sys
import os
import time
import random
import statistics
import functools
import argparse
import threading
import queue
import pprint
import json


VERSION = "{DEVELOPMENT}"
if VERSION == "{DEVELOPMENT}":
    script_dir = '.'
    try:
        script_dir = os.path.dirname(os.path.realpath(__file__))
    except:
        try:
            script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
        except:
            pass
    sys.path.append("%s/../lib" % script_dir)

from nanownlib import *
import nanownlib.storage
from nanownlib.stats import boxTest,multiBoxTest,subsample,bootstrap,bootstrap2,trimean,midhinge,midhingeTest,samples2Distributions,samples2MeanDiffs

parser = argparse.ArgumentParser(
    description="")
#parser.add_argument('-c', dest='cases', type=str, default='{"short":10000,"long":1010000}',
#                    help='JSON representation of echo timing cases. Default: {"short":10000,"long":1010000}')
parser.add_argument('session_data', default=None,
                    help='Database file storing session information')
options = parser.parse_args()


class WorkerThreads(object):
    workq = None
    resultq = None
    target = None
    
    def __init__(self, num_workers, target):
        self.workq = queue.Queue()
        self.resultq = queue.Queue()
        self.target = target
        
        self.workers = []
        for i in range(num_workers):
            t = threading.Thread(target=self._worker)
            t.daemon = True
            t.start()
            self.workers.append(t)

    def _worker(self):
        while True:
            item = self.workq.get()
            if item == None:
                self.workq.task_done()
                break

            job_id,args = item
            self.resultq.put((job_id, self.target(*args)))
            self.workq.task_done()

    def addJob(self, job_id, args):
        self.workq.put((job_id, args))
            
    def wait(self):
        self.workq.join()

    def stop(self):
        for i in range(0,len(workers)):
            self.workq.put(None)
        for w in self.workers:
            w.join()

            
def trainBoxTest(db, unusual_case, greater, subseries_size):

    def trainAux(low,high,num_trials):
        estimator = functools.partial(multiBoxTest, {'low':low, 'high':high}, greater)
        estimates = bootstrap3(estimator, db, 'train', unusual_case, subseries_size, num_trials)
        null_estimates = bootstrap3(estimator, db, 'train_null', unusual_case, subseries_size, num_trials)

        bad_estimates = len([e for e in estimates if e != 1])
        bad_null_estimates = len([e for e in null_estimates if e != 0])
        
        false_negatives = 100.0*bad_estimates/num_trials
        false_positives = 100.0*bad_null_estimates/num_trials
        return false_positives,false_negatives

    start = time.time()
    wt = WorkerThreads(2, trainAux)
    
    num_trials = 200
    width = 1.0
    performance = []
    for low in range(0,50):
        wt.addJob(low, (low,low+width,num_trials))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    pprint.pprint(performance)
    print(time.time()-start)
    
    num_trials = 200
    lows = [p[1] for p in performance[0:5]]
    widths = [w/10.0 for w in range(5,65,5)]
    performance = []
    for width in widths:
        false_positives = []
        false_negatives = []
        for low in lows:
            wt.addJob(low,(low,low+width,num_trials))
        wt.wait()
        while not wt.resultq.empty():
            job_id,errors = wt.resultq.get()
            fp,fn = errors
            false_negatives.append(fn)
            false_positives.append(fp)

        #print(width, false_negatives)
        #print(width, false_positives)
        #performance.append(((statistics.mean(false_positives)+statistics.mean(false_negatives))/2.0,
        #                    width, statistics.mean(false_negatives), statistics.mean(false_positives)))
        performance.append((abs(statistics.mean(false_positives)-statistics.mean(false_negatives)),
                            width, statistics.mean(false_negatives), statistics.mean(false_positives)))
    performance.sort()
    pprint.pprint(performance)
    good_width = performance[0][1]
    print("good_width:",good_width)


    num_trials = 500
    performance = []
    for low in lows:
        wt.addJob(low, (low,low+good_width,num_trials))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    pprint.pprint(performance)
    best_low = performance[0][1]
    print("best_low:", best_low)

    
    num_trials = 500
    widths = [good_width+(x/10.0) for x in range(-6,7) if good_width+(x/10.0) > 0.0]
    performance = []
    for width in widths:
        wt.addJob(width, (best_low,best_low+width,num_trials))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    pprint.pprint(performance)
    best_width=performance[0][1]
    print("best_width:",best_width)
    print("final_performance:", performance[0][0])
    
    params = json.dumps({"low":best_low,"high":best_low+best_width})
    return {'algorithm':"boxtest",
            'params':params,
            'sample_size':subseries_size,
            'num_trials':num_trials,
            'trial_type':"train",
            'false_positives':performance[0][3],
            'false_negatives':performance[0][2]}


def trainMidhinge(db, unusual_case, greater, subseries_size):

    def trainAux(distance, threshold, num_trials):
        estimator = functools.partial(midhingeTest, {'distance':distance,'threshold':threshold}, greater)
        estimates = bootstrap3(estimator, db, 'train', unusual_case, subseries_size, num_trials)
        null_estimates = bootstrap3(estimator, db, 'train_null', unusual_case, subseries_size, num_trials)

        bad_estimates = len([e for e in estimates if e != 1])
        bad_null_estimates = len([e for e in null_estimates if e != 0])
        
        false_negatives = 100.0*bad_estimates/num_trials
        false_positives = 100.0*bad_null_estimates/num_trials
        return false_positives,false_negatives

    #determine expected delta based on differences
    mean_diffs = [s['unusual_case']-s['other_cases'] for s in db.subseries('train', unusual_case)]
    threshold = trimean(mean_diffs)/2.0
    print("init_threshold:", threshold)
    
    wt = WorkerThreads(2, trainAux)
    
    num_trials = 500
    performance = []
    for distance in range(1,50):
        wt.addJob(distance, (distance,threshold,num_trials))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    
    performance.sort()
    #pprint.pprint(performance)
    good_distance = performance[0][1]
    print("good_distance:",good_distance)

    
    num_trials = 500
    performance = []
    for t in range(50,154,4):
        wt.addJob(threshold*(t/100.0), (good_distance,threshold*(t/100.0),num_trials))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    #pprint.pprint(performance)
    good_threshold = performance[0][1]
    print("good_threshold:", good_threshold)

    
    num_trials = 500
    performance = []
    for d in [good_distance+s for s in range(-4,5) if good_distance+s > -1]:
        wt.addJob(d, (d,good_threshold,num_trials))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    #pprint.pprint(performance)
    best_distance = performance[0][1]
    print("best_distance:",best_distance)

    
    num_trials = 500
    performance = []
    for t in range(95,106):
        wt.addJob(good_threshold*(t/100.0), (best_distance,good_threshold*(t/100.0),num_trials))
    wt.wait()
    while not wt.resultq.empty():
        job_id,errors = wt.resultq.get()
        fp,fn = errors
        performance.append(((fp+fn)/2.0, job_id, fn, fp))
    performance.sort()
    #pprint.pprint(performance)
    best_threshold = performance[0][1]
    print("best_threshold:", best_threshold)

    params = json.dumps({'distance':best_distance,'threshold':best_threshold})
    return {'algorithm':"midhinge",
            'params':params,
            'sample_size':subseries_size,
            'num_trials':num_trials,
            'trial_type':"train",
            'false_positives':performance[0][3],
            'false_negatives':performance[0][2]}


#classifiers = {'boxtest':{'train':trainBoxTest2, 'test':multiBoxTest},
#               'midhinge':{'train':trainMidhinge, 'test':midhinge}}


db = nanownlib.storage.db(options.session_data)
#cursor = db.cursor()
#cursor.execute("SELECT min(sample) min, max(sample) max FROM probes")
#train_start,test_end = cursor.fetchone()
#train_end = int(test_end-train_start)
#test_start = train_end+1
#subsample_size = min(10000,(train_end-train_start+1)/4)

start = time.time()
unusual_case,unusual_diff = findUnusualTestCase(db)
greater = (unusual_diff > 0)
print("unusual_case:", unusual_case)
print("unusual_diff:", unusual_diff)
end = time.time()
print(":", end-start)

import cProfile


for size in (500,1000,2000,4000,5000,6000):
    start = time.time()
    #cProfile.run('results = trainMidhinge(db, unusual_case, greater, 100)')
    results = trainMidhinge(db, unusual_case, greater, size)
    #db.addClassifierResults(results)
    print("midhinge result:")
    pprint.pprint(results)
    print(":", time.time()-start)

sys.exit(0)

start = time.time()
results = trainBoxTest(db, unusual_case, greater, 6000)
#db.addClassifierResults(results)
print("multi box test result:")
pprint.pprint(results)
print(":", time.time()-start)