'''
A collection of tools to assist in analyzing encrypted blobs of data

Copyright (C) 2011-2012 Virtual Security Research, LLC
Author: Timothy D. Morgan, Jason A. Donenfeld

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Lesser General Public License, version 3,
 as published by the Free Software Foundation.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
'''

import sys
import base64
import binascii
import fractions
import operator
import functools
import itertools
from . import buffertools


# urllib.parse's functions are not well suited for encoding/decoding
# bytes or managing encoded case 
def _percentEncode(binary, plus=False, upper=True):
    fmt = "%%%.2X"
    if upper:
        fmt = "%%%.2x"

    ret_val = b''
    for c in binary:
        if c not in b'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789':
            ret_val += (fmt % c).encode('ascii')
        elif plus and (c == 20):
            ret_val += b'+'
        else:
            ret_val += c
    
    return ret_val


def _percentDecode(binary, plus=False):
    ret_val = b''
    if plus:
        binary = binary.replace(b'+', b' ')
    if binary == b'':
        return b''
    chunks = binary.split(b'%')
    if binary[0] == 0x25:
        chunks = chunks[1:]

    for chunk in chunks:
        if len(chunk) < 2:
            return None
        try:
            ret_val += bytes([int(chunk[0:2], 16)]) + chunk[2:]
        except:
            print(repr(chunk))
            return None
            
    return ret_val


# abstract class
class DataEncoding(object):
    charset = frozenset(b'')
    extraneous_chars = b''
    dialect = None
    name = None
    priority = None

    def __init__(self, dialect=''):
        self.dialect = dialect

    def isExample(self, blob):
        sblob = frozenset(blob)
        if self.charset != None and not sblob.issubset(self.charset):
            return False
        return self.extraTests(blob)
    
    def extraTests(self, blob):
        """May return True, False, or None, for is an example, isn't an
        example, or unknown, respectively. 

        """
        return True

    def decode(self, blob):
        return None

    def encode(self, blob):
        return None


class base64Encoding(DataEncoding):
    name = 'base64'
    def __init__(self, dialect='rfc3548'):
        super(base64Encoding, self).__init__(dialect)
        if dialect.startswith('rfc3548'):
            self.c62 = b'+'
            self.c63 = b'/'
            self.pad = b'='
        elif dialect.startswith('filename'):
            self.c62 = b'+'
            self.c63 = b'-'
            self.pad = b'='
        elif dialect.startswith('url1'):
            self.c62 = b'-'
            self.c63 = b'_'
            self.pad = b'='
        elif dialect.startswith('url2'):
            self.c62 = b'-'
            self.c63 = b'_'
            self.pad = b'.'
        elif dialect.startswith('url3'):
            self.c62 = b'_'
            self.c63 = b'-'
            self.pad = b'.'
        elif dialect.startswith('url4'):
            self.c62 = b'-'
            self.c63 = b'_'
            self.pad = b'!'
        elif dialect.startswith('url5'):
            self.c62 = b'+'
            self.c63 = b'/'
            self.pad = b'$'
        elif dialect.startswith('otkurl'):
            self.c62 = b'-'
            self.c63 = b'_'
            self.pad = b'*'
        elif dialect.startswith('xmlnmtoken'):
            self.c62 = b'.'
            self.c63 = b'-'
            self.pad = b'='
        elif dialect.startswith('xmlname'):
            self.c62 = b'_'
            self.c63 = b':'
            self.pad = b'='
        
        if 'newline' in dialect:
            self.extraneous_chars = b'\r\n'

        self.charset = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
                                 +b'abcdefghijklmnopqrstuvwxyz0123456789'
                                 +self.c62+self.c63+self.pad+self.extraneous_chars)

    def _guessPadLength(self, nopad_len):
        length = ((4 - nopad_len % 4) % 4)
        if length != 3:
            return length
        return None

    def extraTests(self, blob):
        for c in self.extraneous_chars:
            blob = blob.replace(bytes([c]), b'')

        nopad = blob.rstrip(self.pad)
        padlen_guess = self._guessPadLength(len(nopad))
        if padlen_guess == None:
            return False

        # we don't accept bad pads, only missing pads
        if self.dialect.endswith('nopad'):
            return self.pad not in blob

        # pad must not appear in the middle of the 
        # string and must be the correct length at the end
        return (self.pad not in nopad) and (len(blob) == len(nopad)+padlen_guess)

    def decode(self, blob):
        for c in self.extraneous_chars:
            blob = blob.replace(bytes(c), b'')

        if self.dialect.endswith('nopad'):
            if self.pad in blob:
                raise Exception("Unpadded base64 string contains pad character")

            padlen = self._guessPadLength(len(blob))
            if padlen == None:
                raise Exception("Invalid length for unpadded base64 string.")

            blob = blob+(self.pad*padlen)

        if not self.dialect.startswith('rfc3548'):
            table = bytes.maketrans(self.c62+self.c63+self.pad, b'+/=')
            blob = blob.translate(table)

        return base64.standard_b64decode(blob)


    def encode(self, blob):
        ret_val = base64.standard_b64encode(blob)

        if not self.dialect.startswith('rfc3548'):
            table = bytes.maketrans(b'+/=', self.c62+self.c63+self.pad)
            ret_val = ret_val.translate(table)

        if ret_val != None and self.dialect.endswith('nopad'):
            ret_val = ret_val.rstrip(self.pad)

        return ret_val


class base32Encoding(DataEncoding):
    name = 'base32'
    def __init__(self, dialect='rfc3548upper'):
        super(base32Encoding, self).__init__(dialect)
        if dialect.startswith('rfc3548upper'):
            self.pad = b'='
            self.charset = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'+self.pad)

        elif dialect.startswith('rfc3548lower'):
            self.pad = b'='
            self.charset = frozenset(b'abcdefghijklmnopqrstuvwxyz234567'+self.pad)

    def _guessPadLength(self, nopad_len):
        pad_lengths = {0:0, 7:1, 5:3, 4:4, 2:6}
        return pad_lengths.get(nopad_len%8, None)  

    def extraTests(self, blob):
        nopad = blob.rstrip(self.pad)
        padlen_guess = self._guessPadLength(len(nopad))
        if padlen_guess == None:
            return False

        # we don't accept bad pads, only missing pads
        if self.dialect.endswith('nopad'):
            return self.pad not in blob

        # pad must not appear in the middle of the 
        # string and must be the correct length at the end
        return (self.pad not in nopad) and (len(blob) == len(nopad)+padlen_guess)


    def decode(self, blob):
        if self.dialect.endswith('nopad'):
            if self.pad in blob:
                raise Exception("Unpadded base32 string contains pad character")

            padlen = self._guessPadLength(len(blob))
            if padlen == None:
                raise Exception("Invalid length for unpadded base64 string.")

            blob = blob+(self.pad*padlen)

        return base64.b32decode(blob.upper())


    def encode(self, blob):
        ret_val = base64.b32encode(blob)

        if ret_val != None and self.dialect.endswith('nopad'):
            ret_val = ret_val.rstrip(self.pad)

        if 'lower' in self.dialect:
            ret_val = ret_val.lower()
        else:
            ret_val = ret_val.upper()

        return ret_val


class hexEncoding(DataEncoding):
    name = 'hex'
    def __init__(self, dialect='mixed'):
        super(hexEncoding, self).__init__(dialect)
        if 'mixed' in dialect:
            self.charset = frozenset(b'ABCDEFabcdef0123456789')
        elif 'upper' in dialect:
            self.charset = frozenset(b'ABCDEF0123456789')            
        elif 'lower' in dialect:
            self.charset = frozenset(b'abcdef0123456789')


    def extraTests(self, blob):
        return (len(blob) % 2 == 0)

    def decode(self, blob):
        return binascii.a2b_hex(blob)

    def encode(self, blob):
        if 'upper' in self.dialect:
            return binascii.b2a_hex(blob).upper()
        if 'lower' in self.dialect:
            return binascii.b2a_hex(blob).lower()
        else:
            return binascii.b2a_hex(blob)


class percentEncoding(DataEncoding):
    name = 'percent'
    def __init__(self, dialect='mixed'):
        super(percentEncoding, self).__init__(dialect)
        self.charset = None
        if 'mixed' in dialect:
            self.hexchars = frozenset(b'ABCDEFabcdef0123456789')
        elif 'upper' in dialect:
            self.hexchars = frozenset(b'ABCDEF0123456789')            
        elif 'lower' in dialect:
            self.hexchars = frozenset(b'abcdef0123456789')

    def extraTests(self, blob):
        chunks = blob.split(b'%')
        if len(chunks) < 2:
            return None
        for c in chunks[1:]:
            if len(c) < 2:
                return False
            if (c[0] not in self.hexchars) or (c[1] not in self.hexchars):
                return False
        return True

    def decode(self, blob):
        plus = False
        if 'plus' in self.dialect:
            plus = True
        return _percentDecode(blob, plus=plus)

    def encode(self, blob):
        upper = True
        plus = False
        if 'plus' in self.dialect:
            plus = True
        if 'lower' in self.dialect:
            upper = False

        return _percentEncode(blob, plus=plus, upper=upper)


priorities = [
    (hexEncoding, 'upper', 100),
    (hexEncoding, 'lower', 101),
    (hexEncoding, 'mixed', 102),
    (base32Encoding, 'rfc3548upper', 150),
    (base32Encoding, 'rfc3548lower', 151),
    (base32Encoding, 'rfc3548upper-nopad', 160),
    (base32Encoding, 'rfc3548lower-nopad', 161),
    (base64Encoding, 'rfc3548', 200),
    (base64Encoding, 'rfc3548-nopad', 201),
    (base64Encoding, 'rfc3548-newline', 202),
    (base64Encoding, 'filename', 210),
    (base64Encoding, 'filename-nopad', 211),
    (base64Encoding, 'url1', 230),
    (base64Encoding, 'url1-nopad', 231),
    (base64Encoding, 'otkurl', 235),
    (base64Encoding, 'otkurl-nopad', 236),
    (base64Encoding, 'url2', 240),
    (base64Encoding, 'url2-nopad', 241),
    (base64Encoding, 'url3', 250),
    (base64Encoding, 'url3-nopad', 251),
    (base64Encoding, 'url4', 260),
    (base64Encoding, 'url4-nopad', 261),
    (base64Encoding, 'url5', 265),
    (base64Encoding, 'url5-nopad', 266),
    (base64Encoding, 'xmlnmtoken', 270),
    (base64Encoding, 'xmlnmtoken-nopad', 271),
    (base64Encoding, 'xmlname', 280),
    (base64Encoding, 'xmlname-nopad', 281),
    (percentEncoding, 'upper-plus', 400),
    (percentEncoding, 'upper', 401),
    (percentEncoding, 'lower-plus', 410),
    (percentEncoding, 'lower', 411),
    (percentEncoding, 'mixed-plus', 420),
    (percentEncoding, 'mixed', 421),
    ]

encodings = {}
for enc,d,p in priorities:
    e = enc(d)
    e.priority = p
    encodings["%s/%s" % (enc.name, d)] = e

def supportedEncodings():
    e = list(encodings.keys())
    e.sort()
    return e


def possibleEncodings(blob):
    likely = set()
    possible = set()
    for name,encoding in encodings.items():
        result = encoding.isExample(blob)
        if result == True:
            likely.add(name)
        elif result == None:
            possible.add(name)
    return likely,possible


def encodingIntersection(blobs):
    ret_val = set(encodings.keys())
    p = set(encodings.keys())
    for b in blobs:
        likely,possible = possibleEncodings(b)
        ret_val &= likely | possible
        p &= possible
    return ret_val - p


def bestEncoding(encs):
    priority = 999999999
    best = None
    for e in encs:
        if encodings[e].priority < priority:
            best = e
            priority = encodings[e].priority
    return best


def decode(encoding, blob):
    return encodings[encoding].decode(blob)

def encode(encoding, blob):
    return encodings[encoding].encode(blob)

def decodeAll(encoding, blobs):
    return [encodings[encoding].decode(b) for b in blobs]

def encodeAll(encoding, blobs):
    return [encodings[encoding].encode(b) for b in blobs]

def decodeChain(decoding_chain, blob):
    for decoding in decoding_chain:
        blob = decode(decoding, blob)
    return blob

def encodeChain(encoding_chain, blob):
    for encoding in encoding_chain:
        blob = encode(encoding, blob)
    return blob

def getLengths(s):
    lengths = set()
    for bin in s:
        lengths.add(len(bin))
    lengths = list(lengths)
    lengths.sort()
    return lengths


def maxBlockSize(blob_lengths):
    divisor = 0
    for bl in blob_lengths:
        divisor = fractions.gcd(divisor, bl)

    return divisor


allTrue = functools.partial(functools.reduce, (lambda x,y: x and y))

def checkCommonBlocksizes(lengths):
    common_block_sizes = (8,16,20)
    ret_val = []
    for cbs in common_block_sizes:
        gcdIsCBS = (lambda x: fractions.gcd(x,cbs)==cbs)
        if allTrue(map(gcdIsCBS, lengths)):
            ret_val.append(cbs)
    return ret_val


def int2binary(x, bits=8):
        """
        Integer to binary
        Count is number of bits
        """
        return "".join(map(lambda y:str((x>>y)&1), range(bits-1, -1, -1)))


#XXX: move this to buffertools
def smartPermutateBlobs(blobs, block_size=8):
    """
    Intelligently permutates through blocks in blobs.
    If the same blob shows up in the same place for
    every blob, the resultant permutations will have
    this property as well.
    blobs should be an array containing blobs
    block_size should be an integer block_size or an
    array of block sizes.
    """

    if len(blobs) == 0:
        return

    if not isinstance(block_size, (int, long)):
        for size in block_size:
             for blob in smartPermutateBlobs(blobs, size):
                 yield blob
        return

    # First we find the indexes of the chunks that are different
    different = set()
    for combo in itertools.combinations(blobs, 2):
        different |= set(buffertools.blockWiseDiff(block_size, combo[0], combo[1]))
    
    # Next we form a set containing the chunks that are different
    different_chunks = []
    for blob in blobs:
        different_chunks.extend([blob[i * block_size:(i + 1) * block_size] for i in different])
    # Remove duplicates
    different_chunks = set(different_chunks)
    
    # We want to know which chunks are the same, too
    chunk_len = len(blobs[0]) / block_size
    same = set(range(0, chunk_len)) - different

    # Now let's mix and match the differnet blocks, for all possible lengths
    for i in range(1, chunk_len + 1):
        for mix in itertools.permutations(different_chunks, i):
            # We add back in the part that stays the same
            for j in same:
                mix.insert(j, blobs[0][j * block_size:(j + 1) * block_size])
            mix = "".join(mix)
            if mix in blobs:
                continue
            yield mix