Context Navigation

source: trunk/lib/bletchley/blobtools.py @ 28

Last change on this file since 28 was 28, checked in by tmorgan, 12 years ago
more documentation added supported encodings listing
File size: 13.9 KB

Line
1	'''
2	A collection of tools to assist in analyzing encrypted blobs of data
3
4	Copyright (C) 2011-2012 Virtual Security Research, LLC
5	Author: Timothy D. Morgan, Jason A. Donenfeld
6
7	This program is free software: you can redistribute it and/or modify
8	it under the terms of the GNU Lesser General Public License, version 3,
9	as published by the Free Software Foundation.
10
11	This program is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	GNU General Public License for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with this program. If not, see <http://www.gnu.org/licenses/>.
18	'''
19
20	import sys
21	import string
22	import base64
23	import binascii
24	import urllib
25	import fractions
26	import operator
27	import functools
28	import itertools
29	import buffertools
30
31	# abstract class
32	class DataEncoding(object):
33	charset = frozenset('')
34	dialect = None
35	name = None
36	priority = None
37
38	def __init__(self, dialect=''):
39	self.dialect = dialect
40
41	def isExample(self, blob):
42	sblob = frozenset(blob)
43	if self.charset != None and not sblob.issubset(self.charset):
44	return False
45	return self.extraTests(blob)
46
47	def extraTests(self, blob):
48	"""May return True, False, or None, for is an example, isn't an
49	example, or unknown, respectively.
50
51	"""
52	return True
53
54	def decode(self, blob):
55	return None
56
57	def encode(self, blob):
58	return None
59
60
61	class base64Encoding(DataEncoding):
62	name = 'base64'
63	def __init__(self, dialect='rfc3548'):
64	super(base64Encoding, self).__init__(dialect)
65	if dialect.startswith('rfc3548'):
66	self.c62 = '+'
67	self.c63 = '/'
68	self.pad = '='
69	elif dialect.startswith('filename'):
70	self.c62 = '+'
71	self.c63 = '-'
72	self.pad = '='
73	elif dialect.startswith('url1'):
74	self.c62 = '-'
75	self.c63 = '_'
76	self.pad = '='
77	elif dialect.startswith('url2'):
78	self.c62 = '-'
79	self.c63 = '_'
80	self.pad = '.'
81	elif dialect.startswith('url3'):
82	self.c62 = '_'
83	self.c63 = '-'
84	self.pad = '.'
85	elif dialect.startswith('url4'):
86	self.c62 = '-'
87	self.c63 = '_'
88	self.pad = '!'
89	elif dialect.startswith('url5'):
90	self.c62 = '+'
91	self.c63 = '/'
92	self.pad = '$'
93	elif dialect.startswith('otkurl'):
94	self.c62 = '-'
95	self.c63 = '_'
96	self.pad = '*'
97	elif dialect.startswith('xmlnmtoken'):
98	self.c62 = '.'
99	self.c63 = '-'
100	self.pad = '='
101	elif dialect.startswith('xmlname'):
102	self.c62 = '_'
103	self.c63 = ':'
104	self.pad = '='
105
106	self.charset = frozenset('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
107	+'abcdefghijklmnopqrstuvwxyz0123456789'
108	+self.c62+self.c63+self.pad)
109
110	def _guessPadLength(self, nopad_len):
111	length = ((4 - nopad_len % 4) % 4)
112	if length != 3:
113	return length
114	return None
115
116	def extraTests(self, blob):
117	nopad = blob.rstrip(self.pad)
118	padlen_guess = self._guessPadLength(len(nopad))
119	if padlen_guess == None:
120	return False
121
122	# we don't accept bad pads, only missing pads
123	if self.dialect.endswith('nopad'):
124	return self.pad not in blob
125
126	# pad must not appear in the middle of the
127	# string and must be the correct length at the end
128	return (self.pad not in nopad) and (len(blob) == len(nopad)+padlen_guess)
129
130	def decode(self, blob):
131	if self.dialect.endswith('nopad'):
132	if self.pad in blob:
133	raise Exception("Unpadded base64 string contains pad character")
134
135	padlen = self._guessPadLength(len(blob))
136	if padlen == None:
137	raise Exception("Invalid length for unpadded base64 string.")
138
139	blob = blob+(self.pad*padlen)
140
141	if not self.dialect.startswith('rfc3548'):
142	table = string.maketrans(self.c62+self.c63+self.pad, '+/=')
143	blob = blob.translate(table)
144
145	return base64.standard_b64decode(blob)
146
147
148	def encode(self, blob):
149	ret_val = base64.standard_b64encode(blob)
150
151	if not self.dialect.startswith('rfc3548'):
152	table = string.maketrans('+/=', self.c62+self.c63+self.pad)
153	ret_val = ret_val.translate(table)
154
155	if ret_val != None and self.dialect.endswith('nopad'):
156	ret_val = ret_val.rstrip(self.pad)
157
158	return ret_val
159
160
161	class base32Encoding(DataEncoding):
162	name = 'base32'
163	def __init__(self, dialect='rfc3548upper'):
164	super(base32Encoding, self).__init__(dialect)
165	if dialect.startswith('rfc3548upper'):
166	self.pad = '='
167	self.charset = frozenset('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'+self.pad)
168
169	elif dialect.startswith('rfc3548lower'):
170	self.pad = '='
171	self.charset = frozenset('abcdefghijklmnopqrstuvwxyz234567'+self.pad)
172
173	def _guessPadLength(self, nopad_len):
174	pad_lengths = {0:0, 7:1, 5:3, 4:4, 2:6}
175	return pad_lengths.get(nopad_len%8, None)
176
177	def extraTests(self, blob):
178	nopad = blob.rstrip(self.pad)
179	padlen_guess = self._guessPadLength(len(nopad))
180	if padlen_guess == None:
181	return False
182
183	# we don't accept bad pads, only missing pads
184	if self.dialect.endswith('nopad'):
185	return self.pad not in blob
186
187	# pad must not appear in the middle of the
188	# string and must be the correct length at the end
189	return (self.pad not in nopad) and (len(blob) == len(nopad)+padlen_guess)
190
191
192	def decode(self, blob):
193	if self.dialect.endswith('nopad'):
194	if self.pad in blob:
195	raise Exception("Unpadded base64 string contains pad character")
196
197	padlen = self._guessPadLength(len(blob))
198	if padlen == None:
199	raise Exception("Invalid length for unpadded base64 string.")
200
201	blob = blob+(self.pad*padlen)
202
203	return base64.b32decode(blob.upper())
204
205
206	def encode(self, blob):
207	ret_val = base64.b32encode(blob)
208
209	if ret_val != None and self.dialect.endswith('nopad'):
210	ret_val = ret_val.rstrip(self.pad)
211
212	if 'lower' in self.dialect:
213	ret_val = ret_val.lower()
214	else:
215	ret_val = ret_val.upper()
216
217	return ret_val
218
219
220	class hexEncoding(DataEncoding):
221	name = 'hex'
222	def __init__(self, dialect='mixed'):
223	super(hexEncoding, self).__init__(dialect)
224	if 'mixed' in dialect:
225	self.charset = frozenset('ABCDEFabcdef0123456789')
226	elif 'upper' in dialect:
227	self.charset = frozenset('ABCDEF0123456789')
228	elif 'lower' in dialect:
229	self.charset = frozenset('abcdef0123456789')
230
231
232	def extraTests(self, blob):
233	return (len(blob) % 2 == 0)
234
235	def decode(self, blob):
236	return binascii.a2b_hex(blob)
237
238	def encode(self, blob):
239	if 'upper' in self.dialect:
240	return binascii.b2a_hex(blob).upper()
241	if 'lower' in self.dialect:
242	return binascii.b2a_hex(blob).lower()
243	else:
244	return binascii.b2a_hex(blob)
245
246
247	class percentEncoding(DataEncoding):
248	name = 'percent'
249	def __init__(self, dialect='mixed'):
250	super(percentEncoding, self).__init__(dialect)
251	self.charset = None
252	if 'mixed' in dialect:
253	self.hexchars = frozenset('ABCDEFabcdef0123456789')
254	elif 'upper' in dialect:
255	self.hexchars = frozenset('ABCDEF0123456789')
256	elif 'lower' in dialect:
257	self.hexchars = frozenset('abcdef0123456789')
258
259	def extraTests(self, blob):
260	chunks = blob.split('%')
261	if len(chunks) < 2:
262	return None
263	for c in chunks[1:]:
264	if len(c) < 2:
265	return False
266	if (c[0] not in self.hexchars) or (c[1] not in self.hexchars):
267	return False
268	return True
269
270	def decode(self, blob):
271	if 'plus' in self.dialect:
272	return urllib.unquote(blob)
273	else:
274	return urllib.unquote_plus(blob)
275
276	# XXX: should technically produce quoted digits in same upper/lower case
277	def encode(self, blob):
278	if 'plus' in self.dialect:
279	return urllib.quote(blob)
280	else:
281	return urllib.quote_plus(blob)
282
283
284	priorities = [
285	(hexEncoding, 'upper', 100),
286	(hexEncoding, 'lower', 101),
287	(hexEncoding, 'mixed', 102),
288	(base32Encoding, 'rfc3548upper', 150),
289	(base32Encoding, 'rfc3548lower', 151),
290	(base32Encoding, 'rfc3548upper-nopad', 160),
291	(base32Encoding, 'rfc3548lower-nopad', 161),
292	(base64Encoding, 'rfc3548', 200),
293	(base64Encoding, 'rfc3548-nopad', 201),
294	(base64Encoding, 'filename', 210),
295	(base64Encoding, 'filename-nopad', 211),
296	(base64Encoding, 'url1', 230),
297	(base64Encoding, 'url1-nopad', 231),
298	(base64Encoding, 'otkurl', 235),
299	(base64Encoding, 'otkurl-nopad', 236),
300	(base64Encoding, 'url2', 240),
301	(base64Encoding, 'url2-nopad', 241),
302	(base64Encoding, 'url3', 250),
303	(base64Encoding, 'url3-nopad', 251),
304	(base64Encoding, 'url4', 260),
305	(base64Encoding, 'url4-nopad', 261),
306	(base64Encoding, 'url5', 265),
307	(base64Encoding, 'url5-nopad', 266),
308	(base64Encoding, 'xmlnmtoken', 270),
309	(base64Encoding, 'xmlnmtoken-nopad', 271),
310	(base64Encoding, 'xmlname', 280),
311	(base64Encoding, 'xmlname-nopad', 281),
312	(percentEncoding, 'upper-plus', 400),
313	(percentEncoding, 'upper', 401),
314	(percentEncoding, 'lower-plus', 410),
315	(percentEncoding, 'lower', 411),
316	(percentEncoding, 'mixed-plus', 420),
317	(percentEncoding, 'mixed', 421),
318	]
319
320	encodings = {}
321	for enc,d,p in priorities:
322	e = enc(d)
323	e.priority = p
324	encodings["%s/%s" % (enc.name, d)] = e
325
326	def supportedEncodings():
327	e = encodings.keys()
328	e.sort()
329	return e
330
331
332	def possibleEncodings(blob):
333	likely = set()
334	possible = set()
335	for name,encoding in encodings.items():
336	result = encoding.isExample(blob)
337	if result == True:
338	likely.add(name)
339	elif result == None:
340	possible.add(name)
341	return likely,possible
342
343
344	def encodingIntersection(blobs):
345	ret_val = set(encodings.keys())
346	p = set(encodings.keys())
347	for b in blobs:
348	likely,possible = possibleEncodings(b)
349	ret_val &= likely \| possible
350	p &= possible
351	return ret_val - p
352
353
354	def bestEncoding(encs):
355	priority = 999999999
356	best = None
357	for e in encs:
358	if encodings[e].priority < priority:
359	best = e
360	priority = encodings[e].priority
361	return best
362
363
364	def decode(encoding, blob):
365	return encodings[encoding].decode(blob)
366
367	def encode(encoding, blob):
368	return encodings[encoding].encode(blob)
369
370	def decodeAll(encoding, blobs):
371	return map(encodings[encoding].decode, blobs)
372
373	def encodeAll(encoding, blobs):
374	return map(encodings[encoding].encode, blobs)
375
376	def decodeChain(decoding_chain, blob):
377	for decoding in decoding_chain:
378	blob = decode(decoding, blob)
379	return blob
380
381	def encodeChain(encoding_chain, blob):
382	for encoding in encoding_chain:
383	blob = encode(encoding, blob)
384	return blob
385
386	def getLengths(s):
387	lengths = set()
388	for bin in s:
389	lengths.add(len(bin))
390	lengths = list(lengths)
391	lengths.sort()
392	return lengths
393
394
395	def maxBlockSize(blob_lengths):
396	divisor = 0
397	for bl in blob_lengths:
398	divisor = fractions.gcd(divisor, bl)
399
400	return divisor
401
402
403	allTrue = functools.partial(reduce, (lambda x,y: x and y))
404
405	def checkCommonBlocksizes(lengths):
406	common_block_sizes = (8,16,20)
407	ret_val = []
408	for cbs in common_block_sizes:
409	gcdIsCBS = (lambda x: fractions.gcd(x,cbs)==cbs)
410	if allTrue(map(gcdIsCBS, lengths)):
411	ret_val.append(cbs)
412	return ret_val
413
414
415	def int2binary(x, bits=8):
416	"""
417	Integer to binary
418	Count is number of bits
419	"""
420	return "".join(map(lambda y:str((x>>y)&1), range(bits-1, -1, -1)))
421
422
423	#XXX: move this to buffertools
424	def smartPermutateBlobs(blobs, block_size=8):
425	"""
426	Intelligently permutates through blocks in blobs.
427	If the same blob shows up in the same place for
428	every blob, the resultant permutations will have
429	this property as well.
430	blobs should be an array containing blobs
431	block_size should be an integer block_size or an
432	array of block sizes.
433	"""
434
435	if len(blobs) == 0:
436	return
437
438	if not isinstance(block_size, (int, long)):
439	for size in block_size:
440	for blob in smartPermutateBlobs(blobs, size):
441	yield blob
442	return
443
444	# First we find the indexes of the chunks that are different
445	different = set()
446	for combo in itertools.combinations(blobs, 2):
447	different \|= set(buffertools.blockWiseDiff(block_size, combo[0], combo[1]))
448
449	# Next we form a set containing the chunks that are different
450	different_chunks = []
451	for blob in blobs:
452	different_chunks.extend([blob[i * block_size:(i + 1) * block_size] for i in different])
453	# Remove duplicates
454	different_chunks = set(different_chunks)
455
456	# We want to know which chunks are the same, too
457	chunk_len = len(blobs[0]) / block_size
458	same = set(range(0, chunk_len)) - different
459
460	# Now let's mix and match the differnet blocks, for all possible lengths
461	for i in range(1, chunk_len + 1):
462	for mix in itertools.permutations(different_chunks, i):
463	# We add back in the part that stays the same
464	for j in same:
465	mix.insert(j, blobs[0][j * block_size:(j + 1) * block_size])
466	mix = "".join(mix)
467	if mix in blobs:
468	continue
469	yield mix

Note: See TracBrowser for help on using the repository browser.

Download in other formats: