Context Navigation

source: trunk/lib/bletchley/blobtools.py @ 42

Last change on this file since 42 was 42, checked in by tmorgan, 11 years ago
fixed bug in 2->3 transition
File size: 15.3 KB

Rev	Line
[1]	1	'''
	2	A collection of tools to assist in analyzing encrypted blobs of data
	3
	4	Copyright (C) 2011-2012 Virtual Security Research, LLC
	5	Author: Timothy D. Morgan, Jason A. Donenfeld
	6
	7	This program is free software: you can redistribute it and/or modify
	8	it under the terms of the GNU Lesser General Public License, version 3,
	9	as published by the Free Software Foundation.
	10
	11	This program is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	14	GNU General Public License for more details.
	15
	16	You should have received a copy of the GNU General Public License
	17	along with this program. If not, see <http://www.gnu.org/licenses/>.
	18	'''
	19
	20	import sys
	21	import base64
	22	import binascii
	23	import fractions
	24	import operator
	25	import functools
	26	import itertools
[40]	27	from . import buffertools
[1]	28
[40]	29
	30	# urllib.parse's functions are not well suited for encoding/decoding
	31	# bytes or managing encoded case
	32	def _percentEncode(binary, plus=False, upper=True):
	33	fmt = "%%%.2X"
	34	if upper:
	35	fmt = "%%%.2x"
	36
	37	ret_val = b''
	38	for c in binary:
	39	if c not in b'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789':
	40	ret_val += (fmt % c).encode('ascii')
	41	elif plus and (c == 20):
	42	ret_val += b'+'
	43	else:
	44	ret_val += c
	45
	46	return ret_val
	47
	48
	49	def _percentDecode(binary, plus=False):
	50	ret_val = b''
	51	if plus:
	52	binary = binary.replace(b'+', b' ')
	53	if binary == b'':
	54	return b''
	55	chunks = binary.split(b'%')
	56	if binary[0] == 0x25:
	57	chunks = chunks[1:]
	58
	59	for chunk in chunks:
	60	if len(chunk) < 2:
	61	return None
	62	try:
	63	ret_val += bytes([int(chunk[0:2], 16)]) + chunk[2:]
	64	except:
	65	print(repr(chunk))
	66	return None
	67
	68	return ret_val
	69
	70
[1]	71	# abstract class
	72	class DataEncoding(object):
[40]	73	charset = frozenset(b'')
	74	extraneous_chars = b''
[1]	75	dialect = None
	76	name = None
	77	priority = None
	78
	79	def __init__(self, dialect=''):
	80	self.dialect = dialect
	81
	82	def isExample(self, blob):
	83	sblob = frozenset(blob)
[20]	84	if self.charset != None and not sblob.issubset(self.charset):
	85	return False
	86	return self.extraTests(blob)
[1]	87
	88	def extraTests(self, blob):
[20]	89	"""May return True, False, or None, for is an example, isn't an
	90	example, or unknown, respectively.
	91
	92	"""
[1]	93	return True
	94
	95	def decode(self, blob):
	96	return None
	97
	98	def encode(self, blob):
	99	return None
	100
	101
	102	class base64Encoding(DataEncoding):
	103	name = 'base64'
	104	def __init__(self, dialect='rfc3548'):
	105	super(base64Encoding, self).__init__(dialect)
	106	if dialect.startswith('rfc3548'):
[40]	107	self.c62 = b'+'
	108	self.c63 = b'/'
	109	self.pad = b'='
[1]	110	elif dialect.startswith('filename'):
[40]	111	self.c62 = b'+'
	112	self.c63 = b'-'
	113	self.pad = b'='
[1]	114	elif dialect.startswith('url1'):
[40]	115	self.c62 = b'-'
	116	self.c63 = b'_'
	117	self.pad = b'='
[1]	118	elif dialect.startswith('url2'):
[40]	119	self.c62 = b'-'
	120	self.c63 = b'_'
	121	self.pad = b'.'
[1]	122	elif dialect.startswith('url3'):
[40]	123	self.c62 = b'_'
	124	self.c63 = b'-'
	125	self.pad = b'.'
[1]	126	elif dialect.startswith('url4'):
[40]	127	self.c62 = b'-'
	128	self.c63 = b'_'
	129	self.pad = b'!'
[1]	130	elif dialect.startswith('url5'):
[40]	131	self.c62 = b'+'
	132	self.c63 = b'/'
	133	self.pad = b'$'
[1]	134	elif dialect.startswith('otkurl'):
[40]	135	self.c62 = b'-'
	136	self.c63 = b'_'
	137	self.pad = b'*'
[1]	138	elif dialect.startswith('xmlnmtoken'):
[40]	139	self.c62 = b'.'
	140	self.c63 = b'-'
	141	self.pad = b'='
[1]	142	elif dialect.startswith('xmlname'):
[40]	143	self.c62 = b'_'
	144	self.c63 = b':'
	145	self.pad = b'='
[1]	146
[34]	147	if 'newline' in dialect:
[40]	148	self.extraneous_chars = b'\r\n'
[34]	149
[40]	150	self.charset = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
	151	+b'abcdefghijklmnopqrstuvwxyz0123456789'
[34]	152	+self.c62+self.c63+self.pad+self.extraneous_chars)
[1]	153
	154	def _guessPadLength(self, nopad_len):
	155	length = ((4 - nopad_len % 4) % 4)
	156	if length != 3:
	157	return length
	158	return None
	159
	160	def extraTests(self, blob):
[34]	161	for c in self.extraneous_chars:
[40]	162	blob = blob.replace(bytes([c]), b'')
[34]	163
[1]	164	nopad = blob.rstrip(self.pad)
	165	padlen_guess = self._guessPadLength(len(nopad))
	166	if padlen_guess == None:
	167	return False
	168
	169	# we don't accept bad pads, only missing pads
	170	if self.dialect.endswith('nopad'):
	171	return self.pad not in blob
	172
	173	# pad must not appear in the middle of the
	174	# string and must be the correct length at the end
	175	return (self.pad not in nopad) and (len(blob) == len(nopad)+padlen_guess)
	176
	177	def decode(self, blob):
[34]	178	for c in self.extraneous_chars:
[40]	179	blob = blob.replace(bytes(c), b'')
[34]	180
[1]	181	if self.dialect.endswith('nopad'):
	182	if self.pad in blob:
	183	raise Exception("Unpadded base64 string contains pad character")
	184
	185	padlen = self._guessPadLength(len(blob))
	186	if padlen == None:
	187	raise Exception("Invalid length for unpadded base64 string.")
	188
	189	blob = blob+(self.pad*padlen)
	190
	191	if not self.dialect.startswith('rfc3548'):
[42]	192	table = bytes.maketrans(self.c62+self.c63+self.pad, b'+/=')
[1]	193	blob = blob.translate(table)
	194
	195	return base64.standard_b64decode(blob)
	196
	197
	198	def encode(self, blob):
	199	ret_val = base64.standard_b64encode(blob)
	200
	201	if not self.dialect.startswith('rfc3548'):
[42]	202	table = bytes.maketrans(b'+/=', self.c62+self.c63+self.pad)
[1]	203	ret_val = ret_val.translate(table)
	204
	205	if ret_val != None and self.dialect.endswith('nopad'):
	206	ret_val = ret_val.rstrip(self.pad)
	207
	208	return ret_val
	209
	210
	211	class base32Encoding(DataEncoding):
	212	name = 'base32'
	213	def __init__(self, dialect='rfc3548upper'):
	214	super(base32Encoding, self).__init__(dialect)
	215	if dialect.startswith('rfc3548upper'):
[40]	216	self.pad = b'='
	217	self.charset = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'+self.pad)
[1]	218
	219	elif dialect.startswith('rfc3548lower'):
[40]	220	self.pad = b'='
	221	self.charset = frozenset(b'abcdefghijklmnopqrstuvwxyz234567'+self.pad)
[1]	222
	223	def _guessPadLength(self, nopad_len):
	224	pad_lengths = {0:0, 7:1, 5:3, 4:4, 2:6}
	225	return pad_lengths.get(nopad_len%8, None)
	226
	227	def extraTests(self, blob):
	228	nopad = blob.rstrip(self.pad)
	229	padlen_guess = self._guessPadLength(len(nopad))
	230	if padlen_guess == None:
	231	return False
	232
	233	# we don't accept bad pads, only missing pads
	234	if self.dialect.endswith('nopad'):
	235	return self.pad not in blob
	236
	237	# pad must not appear in the middle of the
	238	# string and must be the correct length at the end
	239	return (self.pad not in nopad) and (len(blob) == len(nopad)+padlen_guess)
	240
	241
	242	def decode(self, blob):
	243	if self.dialect.endswith('nopad'):
	244	if self.pad in blob:
[40]	245	raise Exception("Unpadded base32 string contains pad character")
[1]	246
	247	padlen = self._guessPadLength(len(blob))
	248	if padlen == None:
	249	raise Exception("Invalid length for unpadded base64 string.")
	250
	251	blob = blob+(self.pad*padlen)
	252
	253	return base64.b32decode(blob.upper())
	254
	255
	256	def encode(self, blob):
	257	ret_val = base64.b32encode(blob)
	258
	259	if ret_val != None and self.dialect.endswith('nopad'):
	260	ret_val = ret_val.rstrip(self.pad)
	261
	262	if 'lower' in self.dialect:
	263	ret_val = ret_val.lower()
	264	else:
	265	ret_val = ret_val.upper()
	266
	267	return ret_val
	268
	269
	270	class hexEncoding(DataEncoding):
	271	name = 'hex'
	272	def __init__(self, dialect='mixed'):
	273	super(hexEncoding, self).__init__(dialect)
	274	if 'mixed' in dialect:
[40]	275	self.charset = frozenset(b'ABCDEFabcdef0123456789')
[1]	276	elif 'upper' in dialect:
[40]	277	self.charset = frozenset(b'ABCDEF0123456789')
[1]	278	elif 'lower' in dialect:
[40]	279	self.charset = frozenset(b'abcdef0123456789')
[1]	280
	281
	282	def extraTests(self, blob):
	283	return (len(blob) % 2 == 0)
	284
	285	def decode(self, blob):
	286	return binascii.a2b_hex(blob)
	287
	288	def encode(self, blob):
	289	if 'upper' in self.dialect:
	290	return binascii.b2a_hex(blob).upper()
	291	if 'lower' in self.dialect:
	292	return binascii.b2a_hex(blob).lower()
	293	else:
	294	return binascii.b2a_hex(blob)
	295
	296
	297	class percentEncoding(DataEncoding):
	298	name = 'percent'
	299	def __init__(self, dialect='mixed'):
	300	super(percentEncoding, self).__init__(dialect)
	301	self.charset = None
	302	if 'mixed' in dialect:
[40]	303	self.hexchars = frozenset(b'ABCDEFabcdef0123456789')
[1]	304	elif 'upper' in dialect:
[40]	305	self.hexchars = frozenset(b'ABCDEF0123456789')
[1]	306	elif 'lower' in dialect:
[40]	307	self.hexchars = frozenset(b'abcdef0123456789')
[1]	308
	309	def extraTests(self, blob):
[40]	310	chunks = blob.split(b'%')
[1]	311	if len(chunks) < 2:
[20]	312	return None
[1]	313	for c in chunks[1:]:
	314	if len(c) < 2:
	315	return False
	316	if (c[0] not in self.hexchars) or (c[1] not in self.hexchars):
	317	return False
	318	return True
	319
	320	def decode(self, blob):
[40]	321	plus = False
[1]	322	if 'plus' in self.dialect:
[40]	323	plus = True
	324	return _percentDecode(blob, plus=plus)
[1]	325
	326	def encode(self, blob):
[40]	327	upper = True
	328	plus = False
[1]	329	if 'plus' in self.dialect:
[40]	330	plus = True
	331	if 'lower' in self.dialect:
	332	upper = False
[1]	333
[40]	334	return _percentEncode(blob, plus=plus, upper=upper)
[1]	335
[40]	336
[1]	337	priorities = [
	338	(hexEncoding, 'upper', 100),
	339	(hexEncoding, 'lower', 101),
	340	(hexEncoding, 'mixed', 102),
	341	(base32Encoding, 'rfc3548upper', 150),
	342	(base32Encoding, 'rfc3548lower', 151),
	343	(base32Encoding, 'rfc3548upper-nopad', 160),
	344	(base32Encoding, 'rfc3548lower-nopad', 161),
	345	(base64Encoding, 'rfc3548', 200),
	346	(base64Encoding, 'rfc3548-nopad', 201),
[34]	347	(base64Encoding, 'rfc3548-newline', 202),
[1]	348	(base64Encoding, 'filename', 210),
	349	(base64Encoding, 'filename-nopad', 211),
	350	(base64Encoding, 'url1', 230),
	351	(base64Encoding, 'url1-nopad', 231),
	352	(base64Encoding, 'otkurl', 235),
	353	(base64Encoding, 'otkurl-nopad', 236),
	354	(base64Encoding, 'url2', 240),
	355	(base64Encoding, 'url2-nopad', 241),
	356	(base64Encoding, 'url3', 250),
	357	(base64Encoding, 'url3-nopad', 251),
	358	(base64Encoding, 'url4', 260),
	359	(base64Encoding, 'url4-nopad', 261),
	360	(base64Encoding, 'url5', 265),
	361	(base64Encoding, 'url5-nopad', 266),
	362	(base64Encoding, 'xmlnmtoken', 270),
	363	(base64Encoding, 'xmlnmtoken-nopad', 271),
	364	(base64Encoding, 'xmlname', 280),
	365	(base64Encoding, 'xmlname-nopad', 281),
	366	(percentEncoding, 'upper-plus', 400),
	367	(percentEncoding, 'upper', 401),
	368	(percentEncoding, 'lower-plus', 410),
	369	(percentEncoding, 'lower', 411),
	370	(percentEncoding, 'mixed-plus', 420),
	371	(percentEncoding, 'mixed', 421),
	372	]
	373
	374	encodings = {}
	375	for enc,d,p in priorities:
	376	e = enc(d)
	377	e.priority = p
	378	encodings["%s/%s" % (enc.name, d)] = e
	379
[28]	380	def supportedEncodings():
[40]	381	e = list(encodings.keys())
[28]	382	e.sort()
	383	return e
[1]	384
[28]	385
[1]	386	def possibleEncodings(blob):
[20]	387	likely = set()
	388	possible = set()
[1]	389	for name,encoding in encodings.items():
[20]	390	result = encoding.isExample(blob)
	391	if result == True:
	392	likely.add(name)
	393	elif result == None:
	394	possible.add(name)
	395	return likely,possible
[1]	396
	397
	398	def encodingIntersection(blobs):
	399	ret_val = set(encodings.keys())
[20]	400	p = set(encodings.keys())
[1]	401	for b in blobs:
[20]	402	likely,possible = possibleEncodings(b)
	403	ret_val &= likely \| possible
	404	p &= possible
	405	return ret_val - p
[1]	406
	407
	408	def bestEncoding(encs):
	409	priority = 999999999
	410	best = None
	411	for e in encs:
	412	if encodings[e].priority < priority:
	413	best = e
	414	priority = encodings[e].priority
	415	return best
	416
	417
	418	def decode(encoding, blob):
	419	return encodings[encoding].decode(blob)
	420
	421	def encode(encoding, blob):
	422	return encodings[encoding].encode(blob)
	423
	424	def decodeAll(encoding, blobs):
[40]	425	return [encodings[encoding].decode(b) for b in blobs]
[1]	426
	427	def encodeAll(encoding, blobs):
[40]	428	return [encodings[encoding].encode(b) for b in blobs]
[1]	429
	430	def decodeChain(decoding_chain, blob):
	431	for decoding in decoding_chain:
	432	blob = decode(decoding, blob)
	433	return blob
	434
	435	def encodeChain(encoding_chain, blob):
	436	for encoding in encoding_chain:
	437	blob = encode(encoding, blob)
	438	return blob
	439
	440	def getLengths(s):
	441	lengths = set()
	442	for bin in s:
	443	lengths.add(len(bin))
	444	lengths = list(lengths)
	445	lengths.sort()
	446	return lengths
	447
	448
	449	def maxBlockSize(blob_lengths):
	450	divisor = 0
	451	for bl in blob_lengths:
	452	divisor = fractions.gcd(divisor, bl)
	453
	454	return divisor
	455
	456
[40]	457	allTrue = functools.partial(functools.reduce, (lambda x,y: x and y))
[1]	458
	459	def checkCommonBlocksizes(lengths):
	460	common_block_sizes = (8,16,20)
	461	ret_val = []
	462	for cbs in common_block_sizes:
	463	gcdIsCBS = (lambda x: fractions.gcd(x,cbs)==cbs)
	464	if allTrue(map(gcdIsCBS, lengths)):
	465	ret_val.append(cbs)
	466	return ret_val
	467
	468
	469	def int2binary(x, bits=8):
	470	"""
	471	Integer to binary
	472	Count is number of bits
	473	"""
	474	return "".join(map(lambda y:str((x>>y)&1), range(bits-1, -1, -1)))
	475
	476
[20]	477	#XXX: move this to buffertools
[1]	478	def smartPermutateBlobs(blobs, block_size=8):
	479	"""
	480	Intelligently permutates through blocks in blobs.
	481	If the same blob shows up in the same place for
	482	every blob, the resultant permutations will have
	483	this property as well.
	484	blobs should be an array containing blobs
	485	block_size should be an integer block_size or an
	486	array of block sizes.
	487	"""
	488
	489	if len(blobs) == 0:
	490	return
	491
	492	if not isinstance(block_size, (int, long)):
	493	for size in block_size:
	494	for blob in smartPermutateBlobs(blobs, size):
	495	yield blob
	496	return
	497
	498	# First we find the indexes of the chunks that are different
	499	different = set()
	500	for combo in itertools.combinations(blobs, 2):
	501	different \|= set(buffertools.blockWiseDiff(block_size, combo[0], combo[1]))
	502
	503	# Next we form a set containing the chunks that are different
	504	different_chunks = []
	505	for blob in blobs:
	506	different_chunks.extend([blob[i * block_size:(i + 1) * block_size] for i in different])
	507	# Remove duplicates
	508	different_chunks = set(different_chunks)
	509
	510	# We want to know which chunks are the same, too
	511	chunk_len = len(blobs[0]) / block_size
	512	same = set(range(0, chunk_len)) - different
	513
	514	# Now let's mix and match the differnet blocks, for all possible lengths
	515	for i in range(1, chunk_len + 1):
	516	for mix in itertools.permutations(different_chunks, i):
	517	# We add back in the part that stays the same
	518	for j in same:
	519	mix.insert(j, blobs[0][j * block_size:(j + 1) * block_size])
	520	mix = "".join(mix)
	521	if mix in blobs:
	522	continue
	523	yield mix

Note: See TracBrowser for help on using the repository browser.

Download in other formats: