Context Navigation

source: lib/bletchley/blobtools.py @ 6

Last change on this file since 6 was 1, checked in by tmorgan, 12 years ago
moved to dedicated repository
File size: 13.4 KB

Line
1	'''
2	A collection of tools to assist in analyzing encrypted blobs of data
3
4	Copyright (C) 2011-2012 Virtual Security Research, LLC
5	Author: Timothy D. Morgan, Jason A. Donenfeld
6
7	This program is free software: you can redistribute it and/or modify
8	it under the terms of the GNU Lesser General Public License, version 3,
9	as published by the Free Software Foundation.
10
11	This program is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	GNU General Public License for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with this program. If not, see <http://www.gnu.org/licenses/>.
18	'''
19
20	import sys
21	import string
22	import base64
23	import binascii
24	import urllib
25	import fractions
26	import operator
27	import functools
28	import itertools
29	import buffertools
30
31	# abstract class
32	class DataEncoding(object):
33	charset = frozenset('')
34	dialect = None
35	name = None
36	priority = None
37
38	def __init__(self, dialect=''):
39	self.dialect = dialect
40
41	def isExample(self, blob):
42	sblob = frozenset(blob)
43	return ((self.charset == None or sblob.issubset(self.charset)) and self.extraTests(blob))
44
45	def extraTests(self, blob):
46	return True
47
48	def decode(self, blob):
49	return None
50
51	def encode(self, blob):
52	return None
53
54
55	class base64Encoding(DataEncoding):
56	name = 'base64'
57	def __init__(self, dialect='rfc3548'):
58	super(base64Encoding, self).__init__(dialect)
59	if dialect.startswith('rfc3548'):
60	self.c62 = '+'
61	self.c63 = '/'
62	self.pad = '='
63	elif dialect.startswith('filename'):
64	self.c62 = '+'
65	self.c63 = '-'
66	self.pad = '='
67	elif dialect.startswith('url1'):
68	self.c62 = '-'
69	self.c63 = '_'
70	self.pad = '='
71	elif dialect.startswith('url2'):
72	self.c62 = '-'
73	self.c63 = '_'
74	self.pad = '.'
75	elif dialect.startswith('url3'):
76	self.c62 = '_'
77	self.c63 = '-'
78	self.pad = '.'
79	elif dialect.startswith('url4'):
80	self.c62 = '-'
81	self.c63 = '_'
82	self.pad = '!'
83	elif dialect.startswith('url5'):
84	self.c62 = '+'
85	self.c63 = '/'
86	self.pad = '$'
87	elif dialect.startswith('otkurl'):
88	self.c62 = '-'
89	self.c63 = '_'
90	self.pad = '*'
91	elif dialect.startswith('xmlnmtoken'):
92	self.c62 = '.'
93	self.c63 = '-'
94	self.pad = '='
95	elif dialect.startswith('xmlname'):
96	self.c62 = '_'
97	self.c63 = ':'
98	self.pad = '='
99
100	self.charset = frozenset('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
101	+'abcdefghijklmnopqrstuvwxyz0123456789'
102	+self.c62+self.c63+self.pad)
103
104	def _guessPadLength(self, nopad_len):
105	length = ((4 - nopad_len % 4) % 4)
106	if length != 3:
107	return length
108	return None
109
110	def extraTests(self, blob):
111	nopad = blob.rstrip(self.pad)
112	padlen_guess = self._guessPadLength(len(nopad))
113	if padlen_guess == None:
114	return False
115
116	# we don't accept bad pads, only missing pads
117	if self.dialect.endswith('nopad'):
118	return self.pad not in blob
119
120	# pad must not appear in the middle of the
121	# string and must be the correct length at the end
122	return (self.pad not in nopad) and (len(blob) == len(nopad)+padlen_guess)
123
124	def decode(self, blob):
125	if self.dialect.endswith('nopad'):
126	if self.pad in blob:
127	raise Exception("Unpadded base64 string contains pad character")
128
129	padlen = self._guessPadLength(len(blob))
130	if padlen == None:
131	raise Exception("Invalid length for unpadded base64 string.")
132
133	blob = blob+(self.pad*padlen)
134
135	if not self.dialect.startswith('rfc3548'):
136	table = string.maketrans(self.c62+self.c63+self.pad, '+/=')
137	blob = blob.translate(table)
138
139	return base64.standard_b64decode(blob)
140
141
142	def encode(self, blob):
143	ret_val = base64.standard_b64encode(blob)
144
145	if not self.dialect.startswith('rfc3548'):
146	table = string.maketrans('+/=', self.c62+self.c63+self.pad)
147	ret_val = ret_val.translate(table)
148
149	if ret_val != None and self.dialect.endswith('nopad'):
150	ret_val = ret_val.rstrip(self.pad)
151
152	return ret_val
153
154
155	class base32Encoding(DataEncoding):
156	name = 'base32'
157	def __init__(self, dialect='rfc3548upper'):
158	super(base32Encoding, self).__init__(dialect)
159	if dialect.startswith('rfc3548upper'):
160	self.pad = '='
161	self.charset = frozenset('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'+self.pad)
162
163	elif dialect.startswith('rfc3548lower'):
164	self.pad = '='
165	self.charset = frozenset('abcdefghijklmnopqrstuvwxyz234567'+self.pad)
166
167	def _guessPadLength(self, nopad_len):
168	pad_lengths = {0:0, 7:1, 5:3, 4:4, 2:6}
169	return pad_lengths.get(nopad_len%8, None)
170
171	def extraTests(self, blob):
172	nopad = blob.rstrip(self.pad)
173	padlen_guess = self._guessPadLength(len(nopad))
174	if padlen_guess == None:
175	return False
176
177	# we don't accept bad pads, only missing pads
178	if self.dialect.endswith('nopad'):
179	return self.pad not in blob
180
181	# pad must not appear in the middle of the
182	# string and must be the correct length at the end
183	return (self.pad not in nopad) and (len(blob) == len(nopad)+padlen_guess)
184
185
186	def decode(self, blob):
187	if self.dialect.endswith('nopad'):
188	if self.pad in blob:
189	raise Exception("Unpadded base64 string contains pad character")
190
191	padlen = self._guessPadLength(len(blob))
192	if padlen == None:
193	raise Exception("Invalid length for unpadded base64 string.")
194
195	blob = blob+(self.pad*padlen)
196
197	return base64.b32decode(blob.upper())
198
199
200	def encode(self, blob):
201	ret_val = base64.b32encode(blob)
202
203	if ret_val != None and self.dialect.endswith('nopad'):
204	ret_val = ret_val.rstrip(self.pad)
205
206	if 'lower' in self.dialect:
207	ret_val = ret_val.lower()
208	else:
209	ret_val = ret_val.upper()
210
211	return ret_val
212
213
214	class hexEncoding(DataEncoding):
215	name = 'hex'
216	def __init__(self, dialect='mixed'):
217	super(hexEncoding, self).__init__(dialect)
218	if 'mixed' in dialect:
219	self.charset = frozenset('ABCDEFabcdef0123456789')
220	elif 'upper' in dialect:
221	self.charset = frozenset('ABCDEF0123456789')
222	elif 'lower' in dialect:
223	self.charset = frozenset('abcdef0123456789')
224
225
226	def extraTests(self, blob):
227	return (len(blob) % 2 == 0)
228
229	def decode(self, blob):
230	return binascii.a2b_hex(blob)
231
232	def encode(self, blob):
233	if 'upper' in self.dialect:
234	return binascii.b2a_hex(blob).upper()
235	if 'lower' in self.dialect:
236	return binascii.b2a_hex(blob).lower()
237	else:
238	return binascii.b2a_hex(blob)
239
240
241	class percentEncoding(DataEncoding):
242	name = 'percent'
243	def __init__(self, dialect='mixed'):
244	super(percentEncoding, self).__init__(dialect)
245	self.charset = None
246	if 'mixed' in dialect:
247	self.hexchars = frozenset('ABCDEFabcdef0123456789')
248	elif 'upper' in dialect:
249	self.hexchars = frozenset('ABCDEF0123456789')
250	elif 'lower' in dialect:
251	self.hexchars = frozenset('abcdef0123456789')
252
253	def extraTests(self, blob):
254	chunks = blob.split('%')
255	if len(chunks) < 2:
256	return False
257	for c in chunks[1:]:
258	if len(c) < 2:
259	return False
260	if (c[0] not in self.hexchars) or (c[1] not in self.hexchars):
261	return False
262	return True
263
264	def decode(self, blob):
265	if 'plus' in self.dialect:
266	return urllib.unquote(blob)
267	else:
268	return urllib.unquote_plus(blob)
269
270	# XXX: should technically produce quoted digits in same upper/lower case
271	def encode(self, blob):
272	if 'plus' in self.dialect:
273	return urllib.quote(blob)
274	else:
275	return urllib.quote_plus(blob)
276
277
278	priorities = [
279	(hexEncoding, 'upper', 100),
280	(hexEncoding, 'lower', 101),
281	(hexEncoding, 'mixed', 102),
282	(base32Encoding, 'rfc3548upper', 150),
283	(base32Encoding, 'rfc3548lower', 151),
284	(base32Encoding, 'rfc3548upper-nopad', 160),
285	(base32Encoding, 'rfc3548lower-nopad', 161),
286	(base64Encoding, 'rfc3548', 200),
287	(base64Encoding, 'rfc3548-nopad', 201),
288	(base64Encoding, 'filename', 210),
289	(base64Encoding, 'filename-nopad', 211),
290	(base64Encoding, 'url1', 230),
291	(base64Encoding, 'url1-nopad', 231),
292	(base64Encoding, 'otkurl', 235),
293	(base64Encoding, 'otkurl-nopad', 236),
294	(base64Encoding, 'url2', 240),
295	(base64Encoding, 'url2-nopad', 241),
296	(base64Encoding, 'url3', 250),
297	(base64Encoding, 'url3-nopad', 251),
298	(base64Encoding, 'url4', 260),
299	(base64Encoding, 'url4-nopad', 261),
300	(base64Encoding, 'url5', 265),
301	(base64Encoding, 'url5-nopad', 266),
302	(base64Encoding, 'xmlnmtoken', 270),
303	(base64Encoding, 'xmlnmtoken-nopad', 271),
304	(base64Encoding, 'xmlname', 280),
305	(base64Encoding, 'xmlname-nopad', 281),
306	(percentEncoding, 'upper-plus', 400),
307	(percentEncoding, 'upper', 401),
308	(percentEncoding, 'lower-plus', 410),
309	(percentEncoding, 'lower', 411),
310	(percentEncoding, 'mixed-plus', 420),
311	(percentEncoding, 'mixed', 421),
312	]
313
314	encodings = {}
315	for enc,d,p in priorities:
316	e = enc(d)
317	e.priority = p
318	encodings["%s/%s" % (enc.name, d)] = e
319
320
321	def possibleEncodings(blob):
322	ret_val = set()
323	for name,encoding in encodings.items():
324	if encoding.isExample(blob):
325	ret_val.add(name)
326	return ret_val
327
328
329	def encodingIntersection(blobs):
330	ret_val = set(encodings.keys())
331	for b in blobs:
332	ret_val &= possibleEncodings(b)
333
334	return ret_val
335
336
337	def bestEncoding(encs):
338	priority = 999999999
339	best = None
340	for e in encs:
341	if encodings[e].priority < priority:
342	best = e
343	priority = encodings[e].priority
344	return best
345
346
347	def decode(encoding, blob):
348	return encodings[encoding].decode(blob)
349
350	def encode(encoding, blob):
351	return encodings[encoding].encode(blob)
352
353	def decodeAll(encoding, blobs):
354	return map(encodings[encoding].decode, blobs)
355
356	def encodeAll(encoding, blobs):
357	return map(encodings[encoding].encode, blobs)
358
359	def decodeChain(decoding_chain, blob):
360	for decoding in decoding_chain:
361	blob = decode(decoding, blob)
362	return blob
363
364	def encodeChain(encoding_chain, blob):
365	for encoding in encoding_chain:
366	blob = encode(encoding, blob)
367	return blob
368
369	def getLengths(s):
370	lengths = set()
371	for bin in s:
372	lengths.add(len(bin))
373	lengths = list(lengths)
374	lengths.sort()
375	return lengths
376
377
378	def maxBlockSize(blob_lengths):
379	divisor = 0
380	for bl in blob_lengths:
381	divisor = fractions.gcd(divisor, bl)
382
383	return divisor
384
385
386	allTrue = functools.partial(reduce, (lambda x,y: x and y))
387
388	def checkCommonBlocksizes(lengths):
389	common_block_sizes = (8,16,20)
390	ret_val = []
391	for cbs in common_block_sizes:
392	gcdIsCBS = (lambda x: fractions.gcd(x,cbs)==cbs)
393	if allTrue(map(gcdIsCBS, lengths)):
394	ret_val.append(cbs)
395	return ret_val
396
397
398	def int2binary(x, bits=8):
399	"""
400	Integer to binary
401	Count is number of bits
402	"""
403	return "".join(map(lambda y:str((x>>y)&1), range(bits-1, -1, -1)))
404
405
406	def smartPermutateBlobs(blobs, block_size=8):
407	"""
408	Intelligently permutates through blocks in blobs.
409	If the same blob shows up in the same place for
410	every blob, the resultant permutations will have
411	this property as well.
412	blobs should be an array containing blobs
413	block_size should be an integer block_size or an
414	array of block sizes.
415	"""
416
417	if len(blobs) == 0:
418	return
419
420	if not isinstance(block_size, (int, long)):
421	for size in block_size:
422	for blob in smartPermutateBlobs(blobs, size):
423	yield blob
424	return
425
426	# First we find the indexes of the chunks that are different
427	different = set()
428	for combo in itertools.combinations(blobs, 2):
429	different \|= set(buffertools.blockWiseDiff(block_size, combo[0], combo[1]))
430
431	# Next we form a set containing the chunks that are different
432	different_chunks = []
433	for blob in blobs:
434	different_chunks.extend([blob[i * block_size:(i + 1) * block_size] for i in different])
435	# Remove duplicates
436	different_chunks = set(different_chunks)
437
438	# We want to know which chunks are the same, too
439	chunk_len = len(blobs[0]) / block_size
440	same = set(range(0, chunk_len)) - different
441
442	# Now let's mix and match the differnet blocks, for all possible lengths
443	for i in range(1, chunk_len + 1):
444	for mix in itertools.permutations(different_chunks, i):
445	# We add back in the part that stays the same
446	for j in same:
447	mix.insert(j, blobs[0][j * block_size:(j + 1) * block_size])
448	mix = "".join(mix)
449	if mix in blobs:
450	continue
451	yield mix

Note: See TracBrowser for help on using the repository browser.

Download in other formats: