Context Navigation

blobtools.py @ 20

Last change on this file since 20 was 20, checked in by tmorgan, 12 years ago

added simple encode/decode command line tools

fixed problem with percent encoding by allowing for a third result of tests

implemented (untested) resumption of decryption for POA

more POA documentation and logging

File size: 13.8 KB

Line
1	'''
2	A collection of tools to assist in analyzing encrypted blobs of data
3
4	Copyright (C) 2011-2012 Virtual Security Research, LLC
5	Author: Timothy D. Morgan, Jason A. Donenfeld
6
7	This program is free software: you can redistribute it and/or modify
8	it under the terms of the GNU Lesser General Public License, version 3,
9	as published by the Free Software Foundation.
10
11	This program is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	GNU General Public License for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with this program. If not, see <http://www.gnu.org/licenses/>.
18	'''
19
20	import sys
21	import string
22	import base64
23	import binascii
24	import urllib
25	import fractions
26	import operator
27	import functools
28	import itertools
29	import buffertools
30
31	# abstract class
32	class DataEncoding(object):
33	charset = frozenset('')
34	dialect = None
35	name = None
36	priority = None
37
38	def __init__(self, dialect=''):
39	self.dialect = dialect
40
41	def isExample(self, blob):
42	sblob = frozenset(blob)
43	if self.charset != None and not sblob.issubset(self.charset):
44	return False
45	return self.extraTests(blob)
46
47	def extraTests(self, blob):
48	"""May return True, False, or None, for is an example, isn't an
49	example, or unknown, respectively.
50
51	"""
52	return True
53
54	def decode(self, blob):
55	return None
56
57	def encode(self, blob):
58	return None
59
60
61	class base64Encoding(DataEncoding):
62	name = 'base64'
63	def __init__(self, dialect='rfc3548'):
64	super(base64Encoding, self).__init__(dialect)
65	if dialect.startswith('rfc3548'):
66	self.c62 = '+'
67	self.c63 = '/'
68	self.pad = '='
69	elif dialect.startswith('filename'):
70	self.c62 = '+'
71	self.c63 = '-'
72	self.pad = '='
73	elif dialect.startswith('url1'):
74	self.c62 = '-'
75	self.c63 = '_'
76	self.pad = '='
77	elif dialect.startswith('url2'):
78	self.c62 = '-'
79	self.c63 = '_'
80	self.pad = '.'
81	elif dialect.startswith('url3'):
82	self.c62 = '_'
83	self.c63 = '-'
84	self.pad = '.'
85	elif dialect.startswith('url4'):
86	self.c62 = '-'
87	self.c63 = '_'
88	self.pad = '!'
89	elif dialect.startswith('url5'):
90	self.c62 = '+'
91	self.c63 = '/'
92	self.pad = '$'
93	elif dialect.startswith('otkurl'):
94	self.c62 = '-'
95	self.c63 = '_'
96	self.pad = '*'
97	elif dialect.startswith('xmlnmtoken'):
98	self.c62 = '.'
99	self.c63 = '-'
100	self.pad = '='
101	elif dialect.startswith('xmlname'):
102	self.c62 = '_'
103	self.c63 = ':'
104	self.pad = '='
105
106	self.charset = frozenset('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
107	+'abcdefghijklmnopqrstuvwxyz0123456789'
108	+self.c62+self.c63+self.pad)
109
110	def _guessPadLength(self, nopad_len):
111	length = ((4 - nopad_len % 4) % 4)
112	if length != 3:
113	return length
114	return None
115
116	def extraTests(self, blob):
117	nopad = blob.rstrip(self.pad)
118	padlen_guess = self._guessPadLength(len(nopad))
119	if padlen_guess == None:
120	return False
121
122	# we don't accept bad pads, only missing pads
123	if self.dialect.endswith('nopad'):
124	return self.pad not in blob
125
126	# pad must not appear in the middle of the
127	# string and must be the correct length at the end
128	return (self.pad not in nopad) and (len(blob) == len(nopad)+padlen_guess)
129
130	def decode(self, blob):
131	if self.dialect.endswith('nopad'):
132	if self.pad in blob:
133	raise Exception("Unpadded base64 string contains pad character")
134
135	padlen = self._guessPadLength(len(blob))
136	if padlen == None:
137	raise Exception("Invalid length for unpadded base64 string.")
138
139	blob = blob+(self.pad*padlen)
140
141	if not self.dialect.startswith('rfc3548'):
142	table = string.maketrans(self.c62+self.c63+self.pad, '+/=')
143	blob = blob.translate(table)
144
145	return base64.standard_b64decode(blob)
146
147
148	def encode(self, blob):
149	ret_val = base64.standard_b64encode(blob)
150
151	if not self.dialect.startswith('rfc3548'):
152	table = string.maketrans('+/=', self.c62+self.c63+self.pad)
153	ret_val = ret_val.translate(table)
154
155	if ret_val != None and self.dialect.endswith('nopad'):
156	ret_val = ret_val.rstrip(self.pad)
157
158	return ret_val
159
160
161	class base32Encoding(DataEncoding):
162	name = 'base32'
163	def __init__(self, dialect='rfc3548upper'):
164	super(base32Encoding, self).__init__(dialect)
165	if dialect.startswith('rfc3548upper'):
166	self.pad = '='
167	self.charset = frozenset('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'+self.pad)
168
169	elif dialect.startswith('rfc3548lower'):
170	self.pad = '='
171	self.charset = frozenset('abcdefghijklmnopqrstuvwxyz234567'+self.pad)
172
173	def _guessPadLength(self, nopad_len):
174	pad_lengths = {0:0, 7:1, 5:3, 4:4, 2:6}
175	return pad_lengths.get(nopad_len%8, None)
176
177	def extraTests(self, blob):
178	nopad = blob.rstrip(self.pad)
179	padlen_guess = self._guessPadLength(len(nopad))
180	if padlen_guess == None:
181	return False
182
183	# we don't accept bad pads, only missing pads
184	if self.dialect.endswith('nopad'):
185	return self.pad not in blob
186
187	# pad must not appear in the middle of the
188	# string and must be the correct length at the end
189	return (self.pad not in nopad) and (len(blob) == len(nopad)+padlen_guess)
190
191
192	def decode(self, blob):
193	if self.dialect.endswith('nopad'):
194	if self.pad in blob:
195	raise Exception("Unpadded base64 string contains pad character")
196
197	padlen = self._guessPadLength(len(blob))
198	if padlen == None:
199	raise Exception("Invalid length for unpadded base64 string.")
200
201	blob = blob+(self.pad*padlen)
202
203	return base64.b32decode(blob.upper())
204
205
206	def encode(self, blob):
207	ret_val = base64.b32encode(blob)
208
209	if ret_val != None and self.dialect.endswith('nopad'):
210	ret_val = ret_val.rstrip(self.pad)
211
212	if 'lower' in self.dialect:
213	ret_val = ret_val.lower()
214	else:
215	ret_val = ret_val.upper()
216
217	return ret_val
218
219
220	class hexEncoding(DataEncoding):
221	name = 'hex'
222	def __init__(self, dialect='mixed'):
223	super(hexEncoding, self).__init__(dialect)
224	if 'mixed' in dialect:
225	self.charset = frozenset('ABCDEFabcdef0123456789')
226	elif 'upper' in dialect:
227	self.charset = frozenset('ABCDEF0123456789')
228	elif 'lower' in dialect:
229	self.charset = frozenset('abcdef0123456789')
230
231
232	def extraTests(self, blob):
233	return (len(blob) % 2 == 0)
234
235	def decode(self, blob):
236	return binascii.a2b_hex(blob)
237
238	def encode(self, blob):
239	if 'upper' in self.dialect:
240	return binascii.b2a_hex(blob).upper()
241	if 'lower' in self.dialect:
242	return binascii.b2a_hex(blob).lower()
243	else:
244	return binascii.b2a_hex(blob)
245
246
247	class percentEncoding(DataEncoding):
248	name = 'percent'
249	def __init__(self, dialect='mixed'):
250	super(percentEncoding, self).__init__(dialect)
251	self.charset = None
252	if 'mixed' in dialect:
253	self.hexchars = frozenset('ABCDEFabcdef0123456789')
254	elif 'upper' in dialect:
255	self.hexchars = frozenset('ABCDEF0123456789')
256	elif 'lower' in dialect:
257	self.hexchars = frozenset('abcdef0123456789')
258
259	def extraTests(self, blob):
260	chunks = blob.split('%')
261	if len(chunks) < 2:
262	return None
263	for c in chunks[1:]:
264	if len(c) < 2:
265	return False
266	if (c[0] not in self.hexchars) or (c[1] not in self.hexchars):
267	return False
268	return True
269
270	def decode(self, blob):
271	if 'plus' in self.dialect:
272	return urllib.unquote(blob)
273	else:
274	return urllib.unquote_plus(blob)
275
276	# XXX: should technically produce quoted digits in same upper/lower case
277	def encode(self, blob):
278	if 'plus' in self.dialect:
279	return urllib.quote(blob)
280	else:
281	return urllib.quote_plus(blob)
282
283
284	priorities = [
285	(hexEncoding, 'upper', 100),
286	(hexEncoding, 'lower', 101),
287	(hexEncoding, 'mixed', 102),
288	(base32Encoding, 'rfc3548upper', 150),
289	(base32Encoding, 'rfc3548lower', 151),
290	(base32Encoding, 'rfc3548upper-nopad', 160),
291	(base32Encoding, 'rfc3548lower-nopad', 161),
292	(base64Encoding, 'rfc3548', 200),
293	(base64Encoding, 'rfc3548-nopad', 201),
294	(base64Encoding, 'filename', 210),
295	(base64Encoding, 'filename-nopad', 211),
296	(base64Encoding, 'url1', 230),
297	(base64Encoding, 'url1-nopad', 231),
298	(base64Encoding, 'otkurl', 235),
299	(base64Encoding, 'otkurl-nopad', 236),
300	(base64Encoding, 'url2', 240),
301	(base64Encoding, 'url2-nopad', 241),
302	(base64Encoding, 'url3', 250),
303	(base64Encoding, 'url3-nopad', 251),
304	(base64Encoding, 'url4', 260),
305	(base64Encoding, 'url4-nopad', 261),
306	(base64Encoding, 'url5', 265),
307	(base64Encoding, 'url5-nopad', 266),
308	(base64Encoding, 'xmlnmtoken', 270),
309	(base64Encoding, 'xmlnmtoken-nopad', 271),
310	(base64Encoding, 'xmlname', 280),
311	(base64Encoding, 'xmlname-nopad', 281),
312	(percentEncoding, 'upper-plus', 400),
313	(percentEncoding, 'upper', 401),
314	(percentEncoding, 'lower-plus', 410),
315	(percentEncoding, 'lower', 411),
316	(percentEncoding, 'mixed-plus', 420),
317	(percentEncoding, 'mixed', 421),
318	]
319
320	encodings = {}
321	for enc,d,p in priorities:
322	e = enc(d)
323	e.priority = p
324	encodings["%s/%s" % (enc.name, d)] = e
325
326
327	def possibleEncodings(blob):
328	likely = set()
329	possible = set()
330	for name,encoding in encodings.items():
331	result = encoding.isExample(blob)
332	if result == True:
333	likely.add(name)
334	elif result == None:
335	possible.add(name)
336	return likely,possible
337
338
339	def encodingIntersection(blobs):
340	ret_val = set(encodings.keys())
341	p = set(encodings.keys())
342	for b in blobs:
343	likely,possible = possibleEncodings(b)
344	ret_val &= likely \| possible
345	p &= possible
346	return ret_val - p
347
348
349	def bestEncoding(encs):
350	priority = 999999999
351	best = None
352	for e in encs:
353	if encodings[e].priority < priority:
354	best = e
355	priority = encodings[e].priority
356	return best
357
358
359	def decode(encoding, blob):
360	return encodings[encoding].decode(blob)
361
362	def encode(encoding, blob):
363	return encodings[encoding].encode(blob)
364
365	def decodeAll(encoding, blobs):
366	return map(encodings[encoding].decode, blobs)
367
368	def encodeAll(encoding, blobs):
369	return map(encodings[encoding].encode, blobs)
370
371	def decodeChain(decoding_chain, blob):
372	for decoding in decoding_chain:
373	blob = decode(decoding, blob)
374	return blob
375
376	def encodeChain(encoding_chain, blob):
377	for encoding in encoding_chain:
378	blob = encode(encoding, blob)
379	return blob
380
381	def getLengths(s):
382	lengths = set()
383	for bin in s:
384	lengths.add(len(bin))
385	lengths = list(lengths)
386	lengths.sort()
387	return lengths
388
389
390	def maxBlockSize(blob_lengths):
391	divisor = 0
392	for bl in blob_lengths:
393	divisor = fractions.gcd(divisor, bl)
394
395	return divisor
396
397
398	allTrue = functools.partial(reduce, (lambda x,y: x and y))
399
400	def checkCommonBlocksizes(lengths):
401	common_block_sizes = (8,16,20)
402	ret_val = []
403	for cbs in common_block_sizes:
404	gcdIsCBS = (lambda x: fractions.gcd(x,cbs)==cbs)
405	if allTrue(map(gcdIsCBS, lengths)):
406	ret_val.append(cbs)
407	return ret_val
408
409
410	def int2binary(x, bits=8):
411	"""
412	Integer to binary
413	Count is number of bits
414	"""
415	return "".join(map(lambda y:str((x>>y)&1), range(bits-1, -1, -1)))
416
417
418	#XXX: move this to buffertools
419	def smartPermutateBlobs(blobs, block_size=8):
420	"""
421	Intelligently permutates through blocks in blobs.
422	If the same blob shows up in the same place for
423	every blob, the resultant permutations will have
424	this property as well.
425	blobs should be an array containing blobs
426	block_size should be an integer block_size or an
427	array of block sizes.
428	"""
429
430	if len(blobs) == 0:
431	return
432
433	if not isinstance(block_size, (int, long)):
434	for size in block_size:
435	for blob in smartPermutateBlobs(blobs, size):
436	yield blob
437	return
438
439	# First we find the indexes of the chunks that are different
440	different = set()
441	for combo in itertools.combinations(blobs, 2):
442	different \|= set(buffertools.blockWiseDiff(block_size, combo[0], combo[1]))
443
444	# Next we form a set containing the chunks that are different
445	different_chunks = []
446	for blob in blobs:
447	different_chunks.extend([blob[i * block_size:(i + 1) * block_size] for i in different])
448	# Remove duplicates
449	different_chunks = set(different_chunks)
450
451	# We want to know which chunks are the same, too
452	chunk_len = len(blobs[0]) / block_size
453	same = set(range(0, chunk_len)) - different
454
455	# Now let's mix and match the differnet blocks, for all possible lengths
456	for i in range(1, chunk_len + 1):
457	for mix in itertools.permutations(different_chunks, i):
458	# We add back in the part that stays the same
459	for j in same:
460	mix.insert(j, blobs[0][j * block_size:(j + 1) * block_size])
461	mix = "".join(mix)
462	if mix in blobs:
463	continue
464	yield mix

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: lib/bletchley/blobtools.py @ 20

Download in other formats: