Context Navigation

source: trunk/lib/bletchley/blobtools.py @ 91

Last change on this file since 91 was 82, checked in by tim, 9 years ago
minor fixes
File size: 18.0 KB

Rev	Line
[1]	1	'''
	2	A collection of tools to assist in analyzing encrypted blobs of data
	3
[66]	4	Copyright (C) 2011-2013 Virtual Security Research, LLC
[1]	5	Author: Timothy D. Morgan, Jason A. Donenfeld
	6
	7	This program is free software: you can redistribute it and/or modify
	8	it under the terms of the GNU Lesser General Public License, version 3,
	9	as published by the Free Software Foundation.
	10
	11	This program is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	14	GNU General Public License for more details.
	15
	16	You should have received a copy of the GNU General Public License
	17	along with this program. If not, see <http://www.gnu.org/licenses/>.
	18	'''
	19
	20	import sys
	21	import base64
	22	import binascii
[44]	23	import traceback
[1]	24	import fractions
	25	import operator
	26	import functools
[40]	27	from . import buffertools
[1]	28
[40]	29
	30	# urllib.parse's functions are not well suited for encoding/decoding
	31	# bytes or managing encoded case
	32	def _percentEncode(binary, plus=False, upper=True):
[82]	33	fmt = "%%%.2x"
[40]	34	if upper:
[82]	35	fmt = "%%%.2X"
[40]	36
[45]	37	ret_val = bytearray(b'')
[40]	38	for c in binary:
	39	if c not in b'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789':
[45]	40	ret_val.extend((fmt % c).encode('ascii'))
[40]	41	elif plus and (c == 20):
[45]	42	ret_val.extend(b'+')
[40]	43	else:
[45]	44	ret_val.append(c)
[40]	45
	46	return ret_val
	47
	48
	49	def _percentDecode(binary, plus=False):
	50	if plus:
	51	binary = binary.replace(b'+', b' ')
	52	if binary == b'':
	53	return b''
	54	chunks = binary.split(b'%')
	55
[44]	56	ret_val = chunks[0]
	57	for chunk in chunks[1:]:
[40]	58	if len(chunk) < 2:
	59	return None
	60	try:
	61	ret_val += bytes([int(chunk[0:2], 16)]) + chunk[2:]
	62	except:
[44]	63	#traceback.print_exc()
	64	#print(repr(chunk), repr(binary))
[40]	65	return None
	66
	67	return ret_val
	68
	69
[1]	70	# abstract class
	71	class DataEncoding(object):
[40]	72	charset = frozenset(b'')
	73	extraneous_chars = b''
[1]	74	dialect = None
	75	name = None
	76	priority = None
	77
	78	def __init__(self, dialect=''):
	79	self.dialect = dialect
	80
	81	def isExample(self, blob):
	82	sblob = frozenset(blob)
[20]	83	if self.charset != None and not sblob.issubset(self.charset):
	84	return False
	85	return self.extraTests(blob)
[1]	86
	87	def extraTests(self, blob):
[20]	88	"""May return True, False, or None, for is an example, isn't an
	89	example, or unknown, respectively.
	90
	91	"""
[1]	92	return True
	93
	94	def decode(self, blob):
	95	return None
	96
	97	def encode(self, blob):
	98	return None
	99
	100
	101	class base64Encoding(DataEncoding):
	102	name = 'base64'
	103	def __init__(self, dialect='rfc3548'):
	104	super(base64Encoding, self).__init__(dialect)
	105	if dialect.startswith('rfc3548'):
[40]	106	self.c62 = b'+'
	107	self.c63 = b'/'
	108	self.pad = b'='
[1]	109	elif dialect.startswith('filename'):
[40]	110	self.c62 = b'+'
	111	self.c63 = b'-'
	112	self.pad = b'='
[1]	113	elif dialect.startswith('url1'):
[40]	114	self.c62 = b'-'
	115	self.c63 = b'_'
	116	self.pad = b'='
[1]	117	elif dialect.startswith('url2'):
[40]	118	self.c62 = b'-'
	119	self.c63 = b'_'
	120	self.pad = b'.'
[1]	121	elif dialect.startswith('url3'):
[40]	122	self.c62 = b'_'
	123	self.c63 = b'-'
	124	self.pad = b'.'
[1]	125	elif dialect.startswith('url4'):
[40]	126	self.c62 = b'-'
	127	self.c63 = b'_'
	128	self.pad = b'!'
[1]	129	elif dialect.startswith('url5'):
[40]	130	self.c62 = b'+'
	131	self.c63 = b'/'
	132	self.pad = b'$'
[53]	133	elif dialect.startswith('url6'):
	134	self.c62 = b'*'
	135	self.c63 = b'/'
	136	self.pad = b'='
[1]	137	elif dialect.startswith('otkurl'):
[40]	138	self.c62 = b'-'
	139	self.c63 = b'_'
	140	self.pad = b'*'
[1]	141	elif dialect.startswith('xmlnmtoken'):
[40]	142	self.c62 = b'.'
	143	self.c63 = b'-'
	144	self.pad = b'='
[1]	145	elif dialect.startswith('xmlname'):
[40]	146	self.c62 = b'_'
	147	self.c63 = b':'
	148	self.pad = b'='
[1]	149
[34]	150	if 'newline' in dialect:
[40]	151	self.extraneous_chars = b'\r\n'
[34]	152
[40]	153	self.charset = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
	154	+b'abcdefghijklmnopqrstuvwxyz0123456789'
[34]	155	+self.c62+self.c63+self.pad+self.extraneous_chars)
[1]	156
	157	def _guessPadLength(self, nopad_len):
	158	length = ((4 - nopad_len % 4) % 4)
	159	if length != 3:
	160	return length
	161	return None
	162
	163	def extraTests(self, blob):
[34]	164	for c in self.extraneous_chars:
[40]	165	blob = blob.replace(bytes([c]), b'')
[34]	166
[66]	167	if self.dialect.endswith('intpad'):
	168	if blob[-1] not in b'012':
	169	return False
	170	nopad = blob[:-1]
	171	padlen = blob[-1] - 48 # see the ascii table
	172	else:
	173	nopad = blob.rstrip(self.pad)
	174	padlen = len(blob) - len(nopad)
	175
	176	# what the pad length ought to be
[1]	177	padlen_guess = self._guessPadLength(len(nopad))
	178	if padlen_guess == None:
	179	return False
	180
	181	# we don't accept bad pads, only missing pads
	182	if self.dialect.endswith('nopad'):
	183	return self.pad not in blob
	184
	185	# pad must not appear in the middle of the
	186	# string and must be the correct length at the end
[66]	187	return (self.pad not in nopad) and (padlen == padlen_guess)
[1]	188
	189	def decode(self, blob):
[34]	190	for c in self.extraneous_chars:
[40]	191	blob = blob.replace(bytes(c), b'')
[34]	192
[66]	193	if self.dialect.endswith('intpad'):
	194	padlen = blob[-1] - 48 # see the ascii table
	195	padlen_guess = self._guessPadLength(len(blob[:-1]))
	196	if padlen != padlen_guess:
	197	raise Exception("Invalid length for int-padded base64 string. (%d != %d)"
	198	% (padlen, padlen_guess))
	199
	200	blob = blob[:-1] + (self.pad*padlen)
	201
[1]	202	if self.dialect.endswith('nopad'):
	203	if self.pad in blob:
	204	raise Exception("Unpadded base64 string contains pad character")
	205
	206	padlen = self._guessPadLength(len(blob))
	207	if padlen == None:
	208	raise Exception("Invalid length for unpadded base64 string.")
	209
	210	blob = blob+(self.pad*padlen)
	211
	212	if not self.dialect.startswith('rfc3548'):
[42]	213	table = bytes.maketrans(self.c62+self.c63+self.pad, b'+/=')
[1]	214	blob = blob.translate(table)
	215
	216	return base64.standard_b64decode(blob)
	217
	218
	219	def encode(self, blob):
	220	ret_val = base64.standard_b64encode(blob)
	221
	222	if not self.dialect.startswith('rfc3548'):
[42]	223	table = bytes.maketrans(b'+/=', self.c62+self.c63+self.pad)
[1]	224	ret_val = ret_val.translate(table)
	225
	226	if ret_val != None and self.dialect.endswith('nopad'):
	227	ret_val = ret_val.rstrip(self.pad)
	228
[66]	229	if ret_val != None and self.dialect.endswith('intpad'):
	230	stripped = ret_val.rstrip(self.pad)
	231	ret_val = stripped + ("%d" % (len(ret_val) - len(stripped))).encode('utf-8')
	232
[1]	233	return ret_val
	234
	235
	236	class base32Encoding(DataEncoding):
	237	name = 'base32'
	238	def __init__(self, dialect='rfc3548upper'):
	239	super(base32Encoding, self).__init__(dialect)
	240	if dialect.startswith('rfc3548upper'):
[40]	241	self.pad = b'='
	242	self.charset = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'+self.pad)
[1]	243
	244	elif dialect.startswith('rfc3548lower'):
[40]	245	self.pad = b'='
	246	self.charset = frozenset(b'abcdefghijklmnopqrstuvwxyz234567'+self.pad)
[1]	247
	248	def _guessPadLength(self, nopad_len):
	249	pad_lengths = {0:0, 7:1, 5:3, 4:4, 2:6}
	250	return pad_lengths.get(nopad_len%8, None)
	251
	252	def extraTests(self, blob):
	253	nopad = blob.rstrip(self.pad)
	254	padlen_guess = self._guessPadLength(len(nopad))
	255	if padlen_guess == None:
	256	return False
	257
	258	# we don't accept bad pads, only missing pads
	259	if self.dialect.endswith('nopad'):
	260	return self.pad not in blob
	261
	262	# pad must not appear in the middle of the
	263	# string and must be the correct length at the end
	264	return (self.pad not in nopad) and (len(blob) == len(nopad)+padlen_guess)
	265
	266
	267	def decode(self, blob):
	268	if self.dialect.endswith('nopad'):
	269	if self.pad in blob:
[40]	270	raise Exception("Unpadded base32 string contains pad character")
[1]	271
	272	padlen = self._guessPadLength(len(blob))
	273	if padlen == None:
	274	raise Exception("Invalid length for unpadded base64 string.")
	275
	276	blob = blob+(self.pad*padlen)
	277
	278	return base64.b32decode(blob.upper())
	279
	280
	281	def encode(self, blob):
	282	ret_val = base64.b32encode(blob)
	283
	284	if ret_val != None and self.dialect.endswith('nopad'):
	285	ret_val = ret_val.rstrip(self.pad)
	286
	287	if 'lower' in self.dialect:
	288	ret_val = ret_val.lower()
	289	else:
	290	ret_val = ret_val.upper()
	291
	292	return ret_val
	293
	294
	295	class hexEncoding(DataEncoding):
	296	name = 'hex'
	297	def __init__(self, dialect='mixed'):
	298	super(hexEncoding, self).__init__(dialect)
	299	if 'mixed' in dialect:
[40]	300	self.charset = frozenset(b'ABCDEFabcdef0123456789')
[1]	301	elif 'upper' in dialect:
[40]	302	self.charset = frozenset(b'ABCDEF0123456789')
[1]	303	elif 'lower' in dialect:
[40]	304	self.charset = frozenset(b'abcdef0123456789')
[1]	305
	306
	307	def extraTests(self, blob):
	308	return (len(blob) % 2 == 0)
	309
	310	def decode(self, blob):
	311	return binascii.a2b_hex(blob)
	312
	313	def encode(self, blob):
	314	if 'upper' in self.dialect:
	315	return binascii.b2a_hex(blob).upper()
	316	if 'lower' in self.dialect:
	317	return binascii.b2a_hex(blob).lower()
	318	else:
	319	return binascii.b2a_hex(blob)
	320
	321
	322	class percentEncoding(DataEncoding):
	323	name = 'percent'
	324	def __init__(self, dialect='mixed'):
	325	super(percentEncoding, self).__init__(dialect)
	326	self.charset = None
	327	if 'mixed' in dialect:
[40]	328	self.hexchars = frozenset(b'ABCDEFabcdef0123456789')
[1]	329	elif 'upper' in dialect:
[40]	330	self.hexchars = frozenset(b'ABCDEF0123456789')
[1]	331	elif 'lower' in dialect:
[40]	332	self.hexchars = frozenset(b'abcdef0123456789')
[1]	333
	334	def extraTests(self, blob):
[40]	335	chunks = blob.split(b'%')
[1]	336	if len(chunks) < 2:
[20]	337	return None
[1]	338	for c in chunks[1:]:
	339	if len(c) < 2:
	340	return False
	341	if (c[0] not in self.hexchars) or (c[1] not in self.hexchars):
	342	return False
	343	return True
	344
	345	def decode(self, blob):
[40]	346	plus = False
[1]	347	if 'plus' in self.dialect:
[40]	348	plus = True
	349	return _percentDecode(blob, plus=plus)
[1]	350
	351	def encode(self, blob):
[40]	352	upper = True
	353	plus = False
[1]	354	if 'plus' in self.dialect:
[40]	355	plus = True
	356	if 'lower' in self.dialect:
	357	upper = False
[1]	358
[40]	359	return _percentEncode(blob, plus=plus, upper=upper)
[1]	360
[66]	361	# XXX: need a better way to organize these with the possible combinations of dialects, padding, etc
	362	# for instance, can we have rfc3548-newline-nopad ?
[1]	363	priorities = [
	364	(hexEncoding, 'upper', 100),
	365	(hexEncoding, 'lower', 101),
	366	(hexEncoding, 'mixed', 102),
	367	(base32Encoding, 'rfc3548upper', 150),
	368	(base32Encoding, 'rfc3548lower', 151),
	369	(base32Encoding, 'rfc3548upper-nopad', 160),
	370	(base32Encoding, 'rfc3548lower-nopad', 161),
	371	(base64Encoding, 'rfc3548', 200),
	372	(base64Encoding, 'rfc3548-nopad', 201),
[34]	373	(base64Encoding, 'rfc3548-newline', 202),
[66]	374	(base64Encoding, 'rfc3548-intpad', 203),
[1]	375	(base64Encoding, 'filename', 210),
	376	(base64Encoding, 'filename-nopad', 211),
[66]	377	(base64Encoding, 'filename-intpad', 212),
[1]	378	(base64Encoding, 'url1', 230),
	379	(base64Encoding, 'url1-nopad', 231),
[66]	380	(base64Encoding, 'url1-intpad', 232),
[1]	381	(base64Encoding, 'otkurl', 235),
	382	(base64Encoding, 'otkurl-nopad', 236),
[66]	383	(base64Encoding, 'otkurl-intpad', 237),
[1]	384	(base64Encoding, 'url2', 240),
	385	(base64Encoding, 'url2-nopad', 241),
[66]	386	(base64Encoding, 'url2-intpad', 242),
[1]	387	(base64Encoding, 'url3', 250),
	388	(base64Encoding, 'url3-nopad', 251),
[66]	389	(base64Encoding, 'url3-intpad', 252),
[1]	390	(base64Encoding, 'url4', 260),
	391	(base64Encoding, 'url4-nopad', 261),
[66]	392	(base64Encoding, 'url4-intpad', 262),
[1]	393	(base64Encoding, 'url5', 265),
	394	(base64Encoding, 'url5-nopad', 266),
[66]	395	(base64Encoding, 'url5-intpad', 267),
[53]	396	(base64Encoding, 'url6', 267),
	397	(base64Encoding, 'url6-nopad', 268),
[66]	398	(base64Encoding, 'url6-intpad', 269),
[1]	399	(base64Encoding, 'xmlnmtoken', 270),
	400	(base64Encoding, 'xmlnmtoken-nopad', 271),
[66]	401	(base64Encoding, 'xmlnmtoken-intpad', 272),
[1]	402	(base64Encoding, 'xmlname', 280),
	403	(base64Encoding, 'xmlname-nopad', 281),
[66]	404	(base64Encoding, 'xmlname-intpad', 282),
[1]	405	(percentEncoding, 'upper-plus', 400),
	406	(percentEncoding, 'upper', 401),
	407	(percentEncoding, 'lower-plus', 410),
	408	(percentEncoding, 'lower', 411),
	409	(percentEncoding, 'mixed-plus', 420),
	410	(percentEncoding, 'mixed', 421),
	411	]
	412
	413	encodings = {}
	414	for enc,d,p in priorities:
	415	e = enc(d)
	416	e.priority = p
	417	encodings["%s/%s" % (enc.name, d)] = e
	418
[67]	419
[28]	420	def supportedEncodings():
[40]	421	e = list(encodings.keys())
[28]	422	e.sort()
	423	return e
[1]	424
[28]	425
[1]	426	def possibleEncodings(blob):
[20]	427	likely = set()
	428	possible = set()
[1]	429	for name,encoding in encodings.items():
[20]	430	result = encoding.isExample(blob)
	431	if result == True:
	432	likely.add(name)
	433	elif result == None:
	434	possible.add(name)
	435	return likely,possible
[1]	436
	437
	438	def encodingIntersection(blobs):
	439	ret_val = set(encodings.keys())
[20]	440	p = set(encodings.keys())
[1]	441	for b in blobs:
[20]	442	likely,possible = possibleEncodings(b)
	443	ret_val &= likely \| possible
	444	p &= possible
	445	return ret_val - p
[1]	446
	447
	448	def bestEncoding(encs):
	449	priority = 999999999
	450	best = None
	451	for e in encs:
	452	if encodings[e].priority < priority:
	453	best = e
	454	priority = encodings[e].priority
	455	return best
	456
	457
	458	def decode(encoding, blob):
[67]	459	"""Given an encoding name and a blob, decodes the blob and returns it.
	460
	461	encoding -- A string representation of the encoding and dialect.
	462	For a list of valid encoding names, run:
	463	bletchley-analyze -e ?
	464
	465	blob -- A bytes or bytearray object to be decoded. If a string
	466	is provided instead, it will be converted to a bytes
	467	object using 'utf-8'.
	468
	469	Returns a bytes object containing the decoded representation of
	470	blob. Will throw various types of exceptions if a problem is
	471	encountered.
	472	"""
	473	if isinstance(blob, str):
	474	blob = blob.encode('utf-8')
[1]	475	return encodings[encoding].decode(blob)
	476
	477	def encode(encoding, blob):
[67]	478	"""Given an encoding name and a blob, encodes the blob and returns it.
	479
	480	encoding -- A string representation of the encoding and dialect.
	481	For a list of valid encoding names, run:
	482	bletchley-analyze -e ?
	483
	484	blob -- A bytes or bytearray object to be encoded.
	485
	486	Returns a bytes object containing the encoded representation of
	487	blob. Will throw various types of exceptions if a problem is
	488	encountered."""
[1]	489	return encodings[encoding].encode(blob)
	490
[67]	491
[1]	492	def decodeAll(encoding, blobs):
[40]	493	return [encodings[encoding].decode(b) for b in blobs]
[1]	494
[67]	495
[1]	496	def encodeAll(encoding, blobs):
[40]	497	return [encodings[encoding].encode(b) for b in blobs]
[1]	498
[67]	499
[1]	500	def decodeChain(decoding_chain, blob):
[67]	501	"""Given a sequence of encoding names (decoding_chain) and a blob,
	502	decodes the blob once for each element of the decoding_chain. For
	503	instance, if the decoding_chain were
	504	['percent/lower', 'base64/rfc3548']
	505	then blob would first be decoded as 'percent/lower', followed by
	506	'base64/rfc3548'.
	507
	508	decoding_chain -- A sequence (list,tuple,...) of string
	509	representations of the encoding and dialect. For a
	510	list of valid encoding names, run:
	511	bletchley-analyze -e ?
	512
	513	blob -- A bytes or bytearray object to be decoded. If a string
	514	is provided instead, it will be converted to a bytes
	515	object using 'utf-8'.
	516
	517	Returns a bytes object containing the decoded representation of
	518	blob. Will throw various types of exceptions if a problem is
	519	encountered.
	520	"""
[1]	521	for decoding in decoding_chain:
	522	blob = decode(decoding, blob)
	523	return blob
	524
[67]	525
[1]	526	def encodeChain(encoding_chain, blob):
[67]	527	"""Given a sequence of encoding names (encoding_chain) and a blob,
	528	encodes the blob once for each element of the encoding_chain. For
	529	instance, if the encoding_chain were
	530	['base64/rfc3548', 'percent/lower',]
	531	then blob would first be encoded as 'base64/rfc3548', followed by
	532	'percent/lower'.
	533
	534	encoding_chain -- A sequence (list,tuple,...) of string
	535	representations of the encoding and dialect. For a
	536	list of valid encoding names, run:
	537	bletchley-analyze -e ?
	538
	539	blob -- A bytes or bytearray object to be encoded.
	540
	541	Returns a bytes object containing the encoded representation of
	542	blob. Will throw various types of exceptions if a problem is
	543	encountered.
	544	"""
[1]	545	for encoding in encoding_chain:
	546	blob = encode(encoding, blob)
	547	return blob
	548
[67]	549
[1]	550	def getLengths(s):
	551	lengths = set()
	552	for bin in s:
	553	lengths.add(len(bin))
	554	lengths = list(lengths)
	555	lengths.sort()
	556	return lengths
	557
	558
	559	def maxBlockSize(blob_lengths):
	560	divisor = 0
	561	for bl in blob_lengths:
	562	divisor = fractions.gcd(divisor, bl)
	563
	564	return divisor
	565
	566
[40]	567	allTrue = functools.partial(functools.reduce, (lambda x,y: x and y))
[1]	568
	569	def checkCommonBlocksizes(lengths):
	570	common_block_sizes = (8,16,20)
	571	ret_val = []
	572	for cbs in common_block_sizes:
	573	gcdIsCBS = (lambda x: fractions.gcd(x,cbs)==cbs)
	574	if allTrue(map(gcdIsCBS, lengths)):
	575	ret_val.append(cbs)
	576	return ret_val
	577
	578
	579	def int2binary(x, bits=8):
	580	"""
	581	Integer to binary
	582	Count is number of bits
	583	"""
	584	return "".join(map(lambda y:str((x>>y)&1), range(bits-1, -1, -1)))

Note: See TracBrowser for help on using the repository browser.

Download in other formats: