Context Navigation

source: releases/0.0.1/lib/bletchley/blobtools.py @ 83

Last change on this file since 83 was 53, checked in by tmorgan, 11 years ago
new custom base64 dialect
File size: 15.5 KB

Line
1	'''
2	A collection of tools to assist in analyzing encrypted blobs of data
3
4	Copyright (C) 2011-2012 Virtual Security Research, LLC
5	Author: Timothy D. Morgan, Jason A. Donenfeld
6
7	This program is free software: you can redistribute it and/or modify
8	it under the terms of the GNU Lesser General Public License, version 3,
9	as published by the Free Software Foundation.
10
11	This program is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	GNU General Public License for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with this program. If not, see <http://www.gnu.org/licenses/>.
18	'''
19
20	import sys
21	import base64
22	import binascii
23	import traceback
24	import fractions
25	import operator
26	import functools
27	import itertools
28	from . import buffertools
29
30
31	# urllib.parse's functions are not well suited for encoding/decoding
32	# bytes or managing encoded case
33	def _percentEncode(binary, plus=False, upper=True):
34	fmt = "%%%.2X"
35	if upper:
36	fmt = "%%%.2x"
37
38	ret_val = bytearray(b'')
39	for c in binary:
40	if c not in b'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789':
41	ret_val.extend((fmt % c).encode('ascii'))
42	elif plus and (c == 20):
43	ret_val.extend(b'+')
44	else:
45	ret_val.append(c)
46
47	return ret_val
48
49
50	def _percentDecode(binary, plus=False):
51	if plus:
52	binary = binary.replace(b'+', b' ')
53	if binary == b'':
54	return b''
55	chunks = binary.split(b'%')
56
57	ret_val = chunks[0]
58	for chunk in chunks[1:]:
59	if len(chunk) < 2:
60	return None
61	try:
62	ret_val += bytes([int(chunk[0:2], 16)]) + chunk[2:]
63	except:
64	#traceback.print_exc()
65	#print(repr(chunk), repr(binary))
66	return None
67
68	return ret_val
69
70
71	# abstract class
72	class DataEncoding(object):
73	charset = frozenset(b'')
74	extraneous_chars = b''
75	dialect = None
76	name = None
77	priority = None
78
79	def __init__(self, dialect=''):
80	self.dialect = dialect
81
82	def isExample(self, blob):
83	sblob = frozenset(blob)
84	if self.charset != None and not sblob.issubset(self.charset):
85	return False
86	return self.extraTests(blob)
87
88	def extraTests(self, blob):
89	"""May return True, False, or None, for is an example, isn't an
90	example, or unknown, respectively.
91
92	"""
93	return True
94
95	def decode(self, blob):
96	return None
97
98	def encode(self, blob):
99	return None
100
101
102	class base64Encoding(DataEncoding):
103	name = 'base64'
104	def __init__(self, dialect='rfc3548'):
105	super(base64Encoding, self).__init__(dialect)
106	if dialect.startswith('rfc3548'):
107	self.c62 = b'+'
108	self.c63 = b'/'
109	self.pad = b'='
110	elif dialect.startswith('filename'):
111	self.c62 = b'+'
112	self.c63 = b'-'
113	self.pad = b'='
114	elif dialect.startswith('url1'):
115	self.c62 = b'-'
116	self.c63 = b'_'
117	self.pad = b'='
118	elif dialect.startswith('url2'):
119	self.c62 = b'-'
120	self.c63 = b'_'
121	self.pad = b'.'
122	elif dialect.startswith('url3'):
123	self.c62 = b'_'
124	self.c63 = b'-'
125	self.pad = b'.'
126	elif dialect.startswith('url4'):
127	self.c62 = b'-'
128	self.c63 = b'_'
129	self.pad = b'!'
130	elif dialect.startswith('url5'):
131	self.c62 = b'+'
132	self.c63 = b'/'
133	self.pad = b'$'
134	elif dialect.startswith('url6'):
135	self.c62 = b'*'
136	self.c63 = b'/'
137	self.pad = b'='
138	elif dialect.startswith('otkurl'):
139	self.c62 = b'-'
140	self.c63 = b'_'
141	self.pad = b'*'
142	elif dialect.startswith('xmlnmtoken'):
143	self.c62 = b'.'
144	self.c63 = b'-'
145	self.pad = b'='
146	elif dialect.startswith('xmlname'):
147	self.c62 = b'_'
148	self.c63 = b':'
149	self.pad = b'='
150
151	if 'newline' in dialect:
152	self.extraneous_chars = b'\r\n'
153
154	self.charset = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
155	+b'abcdefghijklmnopqrstuvwxyz0123456789'
156	+self.c62+self.c63+self.pad+self.extraneous_chars)
157
158	def _guessPadLength(self, nopad_len):
159	length = ((4 - nopad_len % 4) % 4)
160	if length != 3:
161	return length
162	return None
163
164	def extraTests(self, blob):
165	for c in self.extraneous_chars:
166	blob = blob.replace(bytes([c]), b'')
167
168	nopad = blob.rstrip(self.pad)
169	padlen_guess = self._guessPadLength(len(nopad))
170	if padlen_guess == None:
171	return False
172
173	# we don't accept bad pads, only missing pads
174	if self.dialect.endswith('nopad'):
175	return self.pad not in blob
176
177	# pad must not appear in the middle of the
178	# string and must be the correct length at the end
179	return (self.pad not in nopad) and (len(blob) == len(nopad)+padlen_guess)
180
181	def decode(self, blob):
182	for c in self.extraneous_chars:
183	blob = blob.replace(bytes(c), b'')
184
185	if self.dialect.endswith('nopad'):
186	if self.pad in blob:
187	raise Exception("Unpadded base64 string contains pad character")
188
189	padlen = self._guessPadLength(len(blob))
190	if padlen == None:
191	raise Exception("Invalid length for unpadded base64 string.")
192
193	blob = blob+(self.pad*padlen)
194
195	if not self.dialect.startswith('rfc3548'):
196	table = bytes.maketrans(self.c62+self.c63+self.pad, b'+/=')
197	blob = blob.translate(table)
198
199	return base64.standard_b64decode(blob)
200
201
202	def encode(self, blob):
203	ret_val = base64.standard_b64encode(blob)
204
205	if not self.dialect.startswith('rfc3548'):
206	table = bytes.maketrans(b'+/=', self.c62+self.c63+self.pad)
207	ret_val = ret_val.translate(table)
208
209	if ret_val != None and self.dialect.endswith('nopad'):
210	ret_val = ret_val.rstrip(self.pad)
211
212	return ret_val
213
214
215	class base32Encoding(DataEncoding):
216	name = 'base32'
217	def __init__(self, dialect='rfc3548upper'):
218	super(base32Encoding, self).__init__(dialect)
219	if dialect.startswith('rfc3548upper'):
220	self.pad = b'='
221	self.charset = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'+self.pad)
222
223	elif dialect.startswith('rfc3548lower'):
224	self.pad = b'='
225	self.charset = frozenset(b'abcdefghijklmnopqrstuvwxyz234567'+self.pad)
226
227	def _guessPadLength(self, nopad_len):
228	pad_lengths = {0:0, 7:1, 5:3, 4:4, 2:6}
229	return pad_lengths.get(nopad_len%8, None)
230
231	def extraTests(self, blob):
232	nopad = blob.rstrip(self.pad)
233	padlen_guess = self._guessPadLength(len(nopad))
234	if padlen_guess == None:
235	return False
236
237	# we don't accept bad pads, only missing pads
238	if self.dialect.endswith('nopad'):
239	return self.pad not in blob
240
241	# pad must not appear in the middle of the
242	# string and must be the correct length at the end
243	return (self.pad not in nopad) and (len(blob) == len(nopad)+padlen_guess)
244
245
246	def decode(self, blob):
247	if self.dialect.endswith('nopad'):
248	if self.pad in blob:
249	raise Exception("Unpadded base32 string contains pad character")
250
251	padlen = self._guessPadLength(len(blob))
252	if padlen == None:
253	raise Exception("Invalid length for unpadded base64 string.")
254
255	blob = blob+(self.pad*padlen)
256
257	return base64.b32decode(blob.upper())
258
259
260	def encode(self, blob):
261	ret_val = base64.b32encode(blob)
262
263	if ret_val != None and self.dialect.endswith('nopad'):
264	ret_val = ret_val.rstrip(self.pad)
265
266	if 'lower' in self.dialect:
267	ret_val = ret_val.lower()
268	else:
269	ret_val = ret_val.upper()
270
271	return ret_val
272
273
274	class hexEncoding(DataEncoding):
275	name = 'hex'
276	def __init__(self, dialect='mixed'):
277	super(hexEncoding, self).__init__(dialect)
278	if 'mixed' in dialect:
279	self.charset = frozenset(b'ABCDEFabcdef0123456789')
280	elif 'upper' in dialect:
281	self.charset = frozenset(b'ABCDEF0123456789')
282	elif 'lower' in dialect:
283	self.charset = frozenset(b'abcdef0123456789')
284
285
286	def extraTests(self, blob):
287	return (len(blob) % 2 == 0)
288
289	def decode(self, blob):
290	return binascii.a2b_hex(blob)
291
292	def encode(self, blob):
293	if 'upper' in self.dialect:
294	return binascii.b2a_hex(blob).upper()
295	if 'lower' in self.dialect:
296	return binascii.b2a_hex(blob).lower()
297	else:
298	return binascii.b2a_hex(blob)
299
300
301	class percentEncoding(DataEncoding):
302	name = 'percent'
303	def __init__(self, dialect='mixed'):
304	super(percentEncoding, self).__init__(dialect)
305	self.charset = None
306	if 'mixed' in dialect:
307	self.hexchars = frozenset(b'ABCDEFabcdef0123456789')
308	elif 'upper' in dialect:
309	self.hexchars = frozenset(b'ABCDEF0123456789')
310	elif 'lower' in dialect:
311	self.hexchars = frozenset(b'abcdef0123456789')
312
313	def extraTests(self, blob):
314	chunks = blob.split(b'%')
315	if len(chunks) < 2:
316	return None
317	for c in chunks[1:]:
318	if len(c) < 2:
319	return False
320	if (c[0] not in self.hexchars) or (c[1] not in self.hexchars):
321	return False
322	return True
323
324	def decode(self, blob):
325	plus = False
326	if 'plus' in self.dialect:
327	plus = True
328	return _percentDecode(blob, plus=plus)
329
330	def encode(self, blob):
331	upper = True
332	plus = False
333	if 'plus' in self.dialect:
334	plus = True
335	if 'lower' in self.dialect:
336	upper = False
337
338	return _percentEncode(blob, plus=plus, upper=upper)
339
340
341	priorities = [
342	(hexEncoding, 'upper', 100),
343	(hexEncoding, 'lower', 101),
344	(hexEncoding, 'mixed', 102),
345	(base32Encoding, 'rfc3548upper', 150),
346	(base32Encoding, 'rfc3548lower', 151),
347	(base32Encoding, 'rfc3548upper-nopad', 160),
348	(base32Encoding, 'rfc3548lower-nopad', 161),
349	(base64Encoding, 'rfc3548', 200),
350	(base64Encoding, 'rfc3548-nopad', 201),
351	(base64Encoding, 'rfc3548-newline', 202),
352	(base64Encoding, 'filename', 210),
353	(base64Encoding, 'filename-nopad', 211),
354	(base64Encoding, 'url1', 230),
355	(base64Encoding, 'url1-nopad', 231),
356	(base64Encoding, 'otkurl', 235),
357	(base64Encoding, 'otkurl-nopad', 236),
358	(base64Encoding, 'url2', 240),
359	(base64Encoding, 'url2-nopad', 241),
360	(base64Encoding, 'url3', 250),
361	(base64Encoding, 'url3-nopad', 251),
362	(base64Encoding, 'url4', 260),
363	(base64Encoding, 'url4-nopad', 261),
364	(base64Encoding, 'url5', 265),
365	(base64Encoding, 'url5-nopad', 266),
366	(base64Encoding, 'url6', 267),
367	(base64Encoding, 'url6-nopad', 268),
368	(base64Encoding, 'xmlnmtoken', 270),
369	(base64Encoding, 'xmlnmtoken-nopad', 271),
370	(base64Encoding, 'xmlname', 280),
371	(base64Encoding, 'xmlname-nopad', 281),
372	(percentEncoding, 'upper-plus', 400),
373	(percentEncoding, 'upper', 401),
374	(percentEncoding, 'lower-plus', 410),
375	(percentEncoding, 'lower', 411),
376	(percentEncoding, 'mixed-plus', 420),
377	(percentEncoding, 'mixed', 421),
378	]
379
380	encodings = {}
381	for enc,d,p in priorities:
382	e = enc(d)
383	e.priority = p
384	encodings["%s/%s" % (enc.name, d)] = e
385
386	def supportedEncodings():
387	e = list(encodings.keys())
388	e.sort()
389	return e
390
391
392	def possibleEncodings(blob):
393	likely = set()
394	possible = set()
395	for name,encoding in encodings.items():
396	result = encoding.isExample(blob)
397	if result == True:
398	likely.add(name)
399	elif result == None:
400	possible.add(name)
401	return likely,possible
402
403
404	def encodingIntersection(blobs):
405	ret_val = set(encodings.keys())
406	p = set(encodings.keys())
407	for b in blobs:
408	likely,possible = possibleEncodings(b)
409	ret_val &= likely \| possible
410	p &= possible
411	return ret_val - p
412
413
414	def bestEncoding(encs):
415	priority = 999999999
416	best = None
417	for e in encs:
418	if encodings[e].priority < priority:
419	best = e
420	priority = encodings[e].priority
421	return best
422
423
424	def decode(encoding, blob):
425	return encodings[encoding].decode(blob)
426
427	def encode(encoding, blob):
428	return encodings[encoding].encode(blob)
429
430	def decodeAll(encoding, blobs):
431	return [encodings[encoding].decode(b) for b in blobs]
432
433	def encodeAll(encoding, blobs):
434	return [encodings[encoding].encode(b) for b in blobs]
435
436	def decodeChain(decoding_chain, blob):
437	for decoding in decoding_chain:
438	blob = decode(decoding, blob)
439	return blob
440
441	def encodeChain(encoding_chain, blob):
442	for encoding in encoding_chain:
443	blob = encode(encoding, blob)
444	return blob
445
446	def getLengths(s):
447	lengths = set()
448	for bin in s:
449	lengths.add(len(bin))
450	lengths = list(lengths)
451	lengths.sort()
452	return lengths
453
454
455	def maxBlockSize(blob_lengths):
456	divisor = 0
457	for bl in blob_lengths:
458	divisor = fractions.gcd(divisor, bl)
459
460	return divisor
461
462
463	allTrue = functools.partial(functools.reduce, (lambda x,y: x and y))
464
465	def checkCommonBlocksizes(lengths):
466	common_block_sizes = (8,16,20)
467	ret_val = []
468	for cbs in common_block_sizes:
469	gcdIsCBS = (lambda x: fractions.gcd(x,cbs)==cbs)
470	if allTrue(map(gcdIsCBS, lengths)):
471	ret_val.append(cbs)
472	return ret_val
473
474
475	def int2binary(x, bits=8):
476	"""
477	Integer to binary
478	Count is number of bits
479	"""
480	return "".join(map(lambda y:str((x>>y)&1), range(bits-1, -1, -1)))
481
482
483	#XXX: move this to buffertools
484	def smartPermutateBlobs(blobs, block_size=8):
485	"""
486	Intelligently permutates through blocks in blobs.
487	If the same blob shows up in the same place for
488	every blob, the resultant permutations will have
489	this property as well.
490	blobs should be an array containing blobs
491	block_size should be an integer block_size or an
492	array of block sizes.
493	"""
494
495	if len(blobs) == 0:
496	return
497
498	if not isinstance(block_size, (int, long)):
499	for size in block_size:
500	for blob in smartPermutateBlobs(blobs, size):
501	yield blob
502	return
503
504	# First we find the indexes of the chunks that are different
505	different = set()
506	for combo in itertools.combinations(blobs, 2):
507	different \|= set(buffertools.blockWiseDiff(block_size, combo[0], combo[1]))
508
509	# Next we form a set containing the chunks that are different
510	different_chunks = []
511	for blob in blobs:
512	different_chunks.extend([blob[i * block_size:(i + 1) * block_size] for i in different])
513	# Remove duplicates
514	different_chunks = set(different_chunks)
515
516	# We want to know which chunks are the same, too
517	chunk_len = len(blobs[0]) / block_size
518	same = set(range(0, chunk_len)) - different
519
520	# Now let's mix and match the differnet blocks, for all possible lengths
521	for i in range(1, chunk_len + 1):
522	for mix in itertools.permutations(different_chunks, i):
523	# We add back in the part that stays the same
524	for j in same:
525	mix.insert(j, blobs[0][j * block_size:(j + 1) * block_size])
526	mix = "".join(mix)
527	if mix in blobs:
528	continue
529	yield mix

Note: See TracBrowser for help on using the repository browser.

Download in other formats: