Changeset 221 for trunk/python/pyregfi
- Timestamp:
- 04/01/11 23:42:08 (14 years ago)
- Location:
- trunk/python/pyregfi
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/python/pyregfi/__init__.py
r220 r221 5 5 # 6 6 7 ## @mainpage API Documentation 8 # 9 # The pyregfi module provides a Python interface to the @ref regfi Windows 10 # registry library. 11 # 12 # The library operates on registry hives, each of which is contained within a 13 # single file. To get started, one must first open the registry hive file with 14 # the open() or file() Python built-in functions (or equivalent) and then pass 15 # the resulting file object to pyregfi. For example: 16 # @code 17 # >>> import pyregfi 18 # >>> fh = open('/mnt/win/c/WINDOWS/system32/config/system', 'rb') 19 # >>> myHive = pyregfi.Hive(fh) 20 # @endcode 21 # 22 # Using this Hive object, one can begin investigating what top-level keys 23 # exist by starting with the root Key attribute: 24 # @code 25 # >>> for key in myHive.root.subkeys: 26 # ... print(key.name) 27 # ControlSet001 28 # ControlSet003 29 # LastKnownGoodRecovery 30 # MountedDevices 31 # Select 32 # Setup 33 # WPA 34 # @endcode 35 # 36 # From there, accessing subkeys and values by name is a simple matter of: 37 # @code 38 # >>> myKey = myHive.root.subkeys['Select'] 39 # >>> myValue = myKey.values['Current'] 40 # @endcode 41 # 42 # The data associated with a Value can be obtained through the fetch_data() 43 # method: 44 # @code 45 # >>> print(myValue.fetch_data()) 46 # 1 47 # @endcode 48 # 49 # While useful for simple exercises, using the subkeys object for deeply nested 50 # paths is not efficient and doesn't make for particularly attractive code. 51 # Instead, a special-purpose HiveIterator class is provided for simplicity of 52 # use and fast access to specific known paths: 53 # @code 54 # >>> myIter = pyregfi.HiveIterator(myHive) 55 # >>> myIter.descend(['ControlSet001','Control','NetworkProvider','HwOrder']) 56 # >>> myKey = myIter.current_key() 57 # >>> print(myKey.values['ProviderOrder'].fetch_data()) 58 # RDPNP,LanmanWorkstation,WebClient 59 # @endcode 60 # 61 # The first two lines above can be simplified in some "syntactic sugar" provided 62 # by the Hive.subtree() method. Also, as one might expect, the HiveIterator 63 # also acts as an iterator, producing keys in a depth-first order. 64 # For instance, to traverse all keys under the ControlSet003\\Services key, 65 # printing their names as we go, we could do: 66 # @code 67 # >>> for key in Hive.subtree(['ControlSet003','Services']): 68 # >>> print(key.name) 69 # Services 70 # Abiosdsk 71 # abp480n5 72 # Parameters 73 # PnpInterface 74 # ACPI 75 # [...] 76 # @endcode 77 # 78 # Note that "Services" was printed first, since the subtree is traversed as a 79 # "preordering depth-first" search starting with the HiveIterator's current_key(). 80 # As one might expect, traversals of subtrees stops when all elements in a 81 # specific subtree (and none outside of it) have been traversed. 82 # 83 # For more information, peruse the various attributes and methods available on 84 # the Hive, HiveIterator, Key, Value, and Security classes. 85 # 86 # @note @ref regfi is a read-only library by design and there 87 # are no plans to implement write support. 88 # 89 # @note At present, pyregfi has been tested with Python versions 2.6 and 3.1 90 # 91 # @note Developers strive to make pyregfi thread-safe. 92 # 93 # @note Key and Value names are case-sensitive in regfi and pyregfi 94 # 7 95 import sys 8 96 import time … … 13 101 import ctypes.util 14 102 15 16 ## Retrieves messages produced by regfi during parsing and interpretation 17 # 18 def GetLogMessages(): 19 msgs = regfi.regfi_log_get_str() 20 if msgs == None: 21 return '' 22 return msgs.decode('utf-8') 103 ## An enumeration of registry Value data types 104 # 105 # @note This is a static class, there is no need to instantiate it. 106 # Just access its attributes directly as DATA_TYPES.SZ, etc 107 class DATA_TYPES(object): 108 ## None / Unknown 109 NONE = 0 110 ## String 111 SZ = 1 112 ## String with %...% expansions 113 EXPAND_SZ = 2 114 ## Binary buffer 115 BINARY = 3 116 ## 32 bit integer (little endian) 117 DWORD = 4 # DWORD, little endian 118 ## 32 bit integer (little endian) 119 DWORD_LE = 4 120 ## 32 bit integer (big endian) 121 DWORD_BE = 5 # DWORD, big endian 122 ## Symbolic link 123 LINK = 6 124 ## List of strings 125 MULTI_SZ = 7 126 ## Unknown structure 127 RESOURCE_LIST = 8 128 ## Unknown structure 129 FULL_RESOURCE_DESCRIPTOR = 9 130 ## Unknown structure 131 RESOURCE_REQUIREMENTS_LIST = 10 132 ## 64 bit integer 133 QWORD = 11 # 64-bit little endian 23 134 24 135 … … 58 169 59 170 60 ## Abstract class which Handles memory management and proxies attribute 61 # access to base structures 171 ## Retrieves messages produced by regfi during parsing and interpretation 172 # 173 # The regfi C library may generate log messages stored in a special thread-safe 174 # global data structure. These messages should be retrieved periodically or 175 # after each major operation by callers to determine if any errors or warnings 176 # should be reported to the user. Failure to retrieve these could result in 177 # excessive memory consumption. 178 def GetLogMessages(): 179 msgs = regfi.regfi_log_get_str() 180 if msgs == None: 181 return '' 182 return msgs.decode('utf-8') 183 184 185 ## Abstract class for most objects returned by the library 62 186 class _StructureWrapper(object): 63 187 _hive = None … … 76 200 self._base = base 77 201 202 # Memory management for most regfi structures is taken care of here 78 203 def __del__(self): 79 204 regfi.regfi_free_record(self._base) 80 205 206 # Any attribute requests not explicitly defined in subclasses gets passed 207 # to the equivalent REGFI_* structure defined in structures.py 81 208 def __getattr__(self, name): 82 209 return getattr(self._base.contents, name) 83 210 211 ## Test for equality 212 # 213 # Records returned by pyregfi may be compared with one another. For example: 214 # @code 215 # >>> key2 = key1.subkeys['child'] 216 # >>> key1 == key2 217 # False 218 # >>> key1 != key2 219 # True 220 # >>> key1 == key2.get_parent() 221 # True 222 # @endcode 84 223 def __eq__(self, other): 85 224 return (type(self) == type(other)) and (self.offset == other.offset) … … 88 227 return (not self.__eq__(other)) 89 228 90 class Key(_StructureWrapper): 229 230 class Key(): 91 231 pass 92 232 93 class Value(_StructureWrapper): 233 234 class Value(): 94 235 pass 95 236 96 ## Registry value data 97 class Data(_StructureWrapper): 98 pass 99 100 ## Registry security record/permissions 237 238 ## Registry security record and descriptor 239 # XXX: Access to security descriptors not yet implemented 101 240 class Security(_StructureWrapper): 102 241 pass 103 242 104 243 ## Abstract class for ValueList and SubkeyList 105 244 class _GenericList(object): 106 245 _hive = None … … 109 248 _current = None 110 249 111 # implementation-specific functions for _SubkeyList and _ValueList250 # implementation-specific functions for SubkeyList and ValueList 112 251 _fetch_num = None 113 252 _find_element = None … … 128 267 self._key = weakref.proxy(key) 129 268 self._length = self._fetch_num(key._base) 130 131 269 270 271 ## Length of list 132 272 def __len__(self): 133 273 return self._length 134 274 275 276 ## Retrieves a list element by name 277 # 278 # @return the first element whose name matches, or None if the element 279 # could not be found 135 280 def __getitem__(self, name): 136 281 index = ctypes.c_uint32() … … 172 317 173 318 174 class _SubkeyList(_GenericList): 319 ## The list of subkeys associated with a Key 320 # 321 # This attribute is both iterable: 322 # @code 323 # for k in myKey.subkeys: 324 # ... 325 # @endcode 326 # and accessible as a dictionary: 327 # @code 328 # mySubkey = myKey.subkeys["keyName"] 329 # @endcode 330 # 331 # @note SubkeyLists should never be accessed directly and only exist 332 # in association with a parent Key object. Do not retain references to 333 # SubkeyLists. Instead, access them via their parent Key at all times. 334 class SubkeyList(_GenericList): 175 335 _fetch_num = regfi.regfi_fetch_num_subkeys 176 336 _find_element = regfi.regfi_find_subkey … … 178 338 179 339 180 class _ValueList(_GenericList): 340 ## The list of values associated with a Key 341 # 342 # This attribute is both iterable: 343 # @code 344 # for v in myKey.values: 345 # ... 346 # @endcode 347 # and accessible as a dictionary: 348 # @code 349 # myValue = myKey.values["valueName"] 350 # @endcode 351 # 352 # @note ValueLists should never be accessed directly and only exist 353 # in association with a parent Key object. Do not retain references to 354 # ValueLists. Instead, access them via their parent Key at all times. 355 class ValueList(_GenericList): 181 356 _fetch_num = regfi.regfi_fetch_num_values 182 357 _find_element = regfi.regfi_find_value … … 185 360 186 361 ## Registry key 362 # These represent registry keys (@ref REGFI_NK records) and provide 363 # access to their subkeys, values, and other metadata. 364 # 365 # @note Value instances may provide access to more than the attributes 366 # documented here. However, undocumented attributes may change over time 367 # and are not officially supported. If you need access to an attribute 368 # not shown here, see pyregfi.structures. 187 369 class Key(_StructureWrapper): 370 ## A @ref ValueList object representing the list of Values 371 # stored on this Key 188 372 values = None 373 374 ## A @ref SubkeyList object representing the list of subkeys 375 # stored on this Key 189 376 subkeys = None 377 378 ## The raw Key name as an uninterpreted bytearray 379 name_raw = (b"...") 380 381 ## The name of the Key as a (unicode) string 382 name = "..." 383 384 ## The absolute file offset of the Key record's cell in the Hive file 385 offset = 0xCAFEBABE 386 387 ## This Key's last modified time represented as the number of seconds 388 # since the UNIX epoch in UTC; similar to what time.time() returns 389 modified = 1300000000.123456 390 391 ## The NK record's flags field 392 flags = 0x10110001 190 393 191 394 def __init__(self, hive, base): 192 395 super(Key, self).__init__(hive, base) 193 self.values = _ValueList(self)194 self.subkeys = _SubkeyList(self)396 self.values = ValueList(self) 397 self.subkeys = SubkeyList(self) 195 398 196 399 def __getattr__(self, name): … … 216 419 return ret_val 217 420 421 422 ## Retrieves the Security properties for this key 218 423 def fetch_security(self): 219 424 return Security(self._hive, 220 425 regfi.regfi_fetch_sk(self._hive.file, self._base)) 221 426 427 428 ## Retrieves the class name for this key 429 # 430 # Class names are typically stored as UTF-16LE strings, so these are decoded 431 # into proper python (unicode) strings. However, if this fails, a bytearray 432 # is instead returned containing the raw buffer stored for the class name. 433 # 434 # @return The class name as a string or bytearray. None if a class name 435 # doesn't exist or an unrecoverable error occurred during retrieval. 222 436 def fetch_classname(self): 223 437 ret_val = None … … 234 448 return ret_val 235 449 450 451 ## Retrieves this key's parent key 452 # 453 # @return The parent's Key instance or None if current key is root 454 # (or an error occured) 236 455 def get_parent(self): 237 456 if self.is_root(): … … 250 469 # These represent registry values (@ref REGFI_VK records) and provide 251 470 # access to their associated data. 252 # 471 # 472 # @note Value instances may provide access to more than the attributes 473 # documented here. However, undocumented attributes may change over time 474 # and are not officially supported. If you need access to an attribute 475 # not shown here, see pyregfi.structures. 253 476 class Value(_StructureWrapper): 477 ## The raw Value name as an uninterpreted bytearray 478 name_raw = (b"...") 479 480 ## The name of the Value as a (unicode) string 481 name = "..." 482 483 ## The absolute file offset of the Value record's cell in the Hive file 484 offset = 0xCAFEBABE 485 486 ## The length of data advertised in the VK record 487 data_size = 0xCAFEBABE 488 489 ## An integer which represents the data type for this Value's data 490 # Typically this value is one of 12 types defined in @ref DATA_TYPES, 491 # but in some cases (the SAM hive) it may be used for other purposes 492 type = DATA_TYPES.NONE 493 494 ## The VK record's flags field 495 flags = 0x10110001 496 497 ## Retrieves the Value's data according to advertised type 498 # 499 # Data is loaded from its cell(s) and then interpreted based on the data 500 # type recorded in the Value. It is not uncommon for data to be stored with 501 # the wrong type or even with invalid types. If you have difficulty 502 # obtaining desired data here, use @ref fetch_raw_data(). 503 # 504 # @return The interpreted representation of the data as one of several 505 # possible Python types, as listed below. None if any failure 506 # occurred during extraction or conversion. 507 # 508 # @retval string for SZ, EXPAND_SZ, and LINK 509 # @retval int for DWORD, DWORD_BE, and QWORD 510 # @retval list(string) for MULTI_SZ 511 # @retval bytearray for NONE, BINARY, RESOURCE_LIST, 512 # FULL_RESOURCE_DESCRIPTOR, and RESOURCE_REQUIREMENTS_LIST 513 # 254 514 def fetch_data(self): 255 515 ret_val = None … … 261 521 if data_struct.interpreted_size == 0: 262 522 ret_val = None 263 elif data_struct.type in ( REG_SZ, REG_EXPAND_SZ, REG_LINK):523 elif data_struct.type in (DATA_TYPES.SZ, DATA_TYPES.EXPAND_SZ, DATA_TYPES.LINK): 264 524 # Unicode strings 265 525 ret_val = data_struct.interpreted.string.decode('utf-8', 'replace') 266 elif data_struct.type in ( REG_DWORD, REG_DWORD_BE):526 elif data_struct.type in (DATA_TYPES.DWORD, DATA_TYPES.DWORD_BE): 267 527 # 32 bit integers 268 528 ret_val = data_struct.interpreted.dword 269 elif data_struct.type == REG_QWORD:529 elif data_struct.type == DATA_TYPES.QWORD: 270 530 # 64 bit integers 271 531 ret_val = data_struct.interpreted.qword 272 elif data_struct.type == REG_MULTI_SZ:532 elif data_struct.type == DATA_TYPES.MULTI_SZ: 273 533 ret_val = _charss2strlist(data_struct.interpreted.multiple_string) 274 elif data_struct.type in ( REG_NONE, REG_RESOURCE_LIST,275 REG_FULL_RESOURCE_DESCRIPTOR,276 REG_RESOURCE_REQUIREMENTS_LIST,277 REG_BINARY):534 elif data_struct.type in (DATA_TYPES.NONE, DATA_TYPES.RESOURCE_LIST, 535 DATA_TYPES.FULL_RESOURCE_DESCRIPTOR, 536 DATA_TYPES.RESOURCE_REQUIREMENTS_LIST, 537 DATA_TYPES.BINARY): 278 538 ret_val = _buffer2bytearray(data_struct.interpreted.none, 279 539 data_struct.interpreted_size) … … 281 541 regfi.regfi_free_record(data_p) 282 542 return ret_val 283 543 544 545 ## Retrieves raw representation of Value's data 546 # 547 # @return A bytearray containing the data 548 # 284 549 def fetch_raw_data(self): 285 550 ret_val = None 286 287 551 # XXX: should we load the data without interpretation instead? 288 552 data_p = regfi.regfi_fetch_data(self._hive.file, self._base) … … 294 558 data_struct.size) 295 559 regfi.regfi_free_record(data_p) 296 297 560 return ret_val 561 298 562 299 563 def __getattr__(self, name): … … 314 578 # Avoids chicken/egg class definitions. 315 579 # Also makes for convenient code reuse in these lists' parent classes. 316 _SubkeyList._constructor = Key317 _ValueList._constructor = Value580 SubkeyList._constructor = Key 581 ValueList._constructor = Value 318 582 319 583 320 584 321 585 ## Represents a single registry hive (file) 322 #323 586 class Hive(): 324 587 file = None … … 326 589 _root = None 327 590 591 ## The root Key of this Hive 592 root = None 593 594 ## This Hives's last modified time represented as the number of seconds 595 # since the UNIX epoch in UTC; similar to what time.time() returns 596 modified = 1300000000.123456 597 598 ## First sequence number 599 sequence1 = 12345678 600 601 ## Second sequence number 602 sequence2 = 12345678 603 604 ## Major version 605 major_version = 1 606 607 ## Minor version 608 minor_version = 5 609 610 # XXX: Possibly add a second or factory function which opens a 611 # hive file for you 612 613 ## Constructor 614 # 615 # @param fh A Python file object. The constructor first looks for a valid 616 # fileno attribute on this object and uses it if possible. 617 # Otherwise, the seek and read methods are used for file 618 # access. 619 # 620 # @note Supplied file must be seekable 328 621 def __init__(self, fh): 329 # The fileno method may not exist, or it may throw an exception330 # when called if the file isn't backed with a descriptor.331 622 try: 623 # The fileno method may not exist, or it may throw an exception 624 # when called if the file isn't backed with a descriptor. 332 625 if hasattr(fh, 'fileno'): 333 626 self.file = regfi.regfi_alloc(fh.fileno(), REGFI_ENCODING_UTF8) … … 342 635 self.file = regfi.regfi_alloc_cb(self.raw_file, REGFI_ENCODING_UTF8) 343 636 637 344 638 def __getattr__(self, name): 345 639 if name == "root": … … 348 642 return self._root 349 643 644 elif name == "modified": 645 return regfi.regfi_nt2unix_time(byref(self._base.contents.mtime)) 646 350 647 return getattr(self.file.contents, name) 648 351 649 352 650 def __del__(self): … … 355 653 self.raw_file = None 356 654 655 357 656 def __iter__(self): 358 657 return HiveIterator(self) … … 360 659 361 660 ## Creates a @ref HiveIterator initialized at the specified path in 362 # the hive. 363 # 364 # Raises an Exception if the path could not be found/traversed. 661 # the hive. 662 # 663 # @param path A list of Key names which represent an absolute path within 664 # the Hive 665 # 666 # @return A @ref HiveIterator which is positioned at the specified path. 667 # 668 # @exception Exception If the path could not be found/traversed 365 669 def subtree(self, path): 366 670 hi = HiveIterator(self) … … 416 720 417 721 if not up_ret: 722 self._iteration_root = None 418 723 raise StopIteration('') 419 724 … … 429 734 next = __next__ 430 735 736 # XXX: Should add sanity checks on some of these traversal functions 737 # to throw exceptions if a traversal/retrieval *should* have worked 738 # but failed for some reason. 739 740 ## Descends the iterator to a subkey 741 # 742 # Descends the iterator one level to the current subkey, or a subkey 743 # specified by name. 744 # 745 # @param subkey_name If specified, locates specified subkey by name 746 # (via find_subkey()) and descends to it. 747 # 748 # @return True if successful, False otherwise 431 749 def down(self, subkey_name=None): 432 750 if subkey_name == None: … … 438 756 and regfi.regfi_iterator_down(self._iter)) 439 757 758 759 ## Causes the iterator to ascend to the current Key's parent 760 # 761 # @return True if successful, False otherwise 762 # 763 # @note The state of current subkeys and values at this level in the tree 764 # is lost as a side effect. That is, if you go up() and then back 765 # down() again, current_subkey() and current_value() will return 766 # default selections. 440 767 def up(self): 441 768 return regfi.regfi_iterator_up(self._iter) 442 769 770 771 ## Selects first subkey of current key 772 # 773 # @return A Key instance for the first subkey. 774 # None on error or if the current key has no subkeys. 443 775 def first_subkey(self): 444 776 if regfi.regfi_iterator_first_subkey(self._iter): … … 446 778 return None 447 779 780 781 ## Selects first value of current Key 782 # 783 # @return A Value instance for the first value. 784 # None on error or if the current key has no values. 448 785 def first_value(self): 449 786 if regfi.regfi_iterator_first_value(self._iter): … … 451 788 return None 452 789 790 791 ## Selects the next subkey in the current Key's list 792 # 793 # @return A Key instance for the next subkey. 794 # None if there are no remaining subkeys or an error occurred. 453 795 def next_subkey(self): 454 796 if regfi.regfi_iterator_next_subkey(self._iter): … … 456 798 return None 457 799 800 801 ## Selects the next value in the current Key's list 802 # 803 # @return A Value instance for the next value. 804 # None if there are no remaining values or an error occurred. 458 805 def next_value(self): 459 806 if regfi.regfi_iterator_next_value(self._iter): … … 461 808 return None 462 809 810 811 ## Selects the first subkey which has the specified name 812 # 813 # @return A Key instance for the selected key. 814 # None if it could not be located or an error occurred. 463 815 def find_subkey(self, name): 464 816 if name != None: … … 468 820 return None 469 821 822 823 ## Selects the first value which has the specified name 824 # 825 # @return A Value instance for the selected value. 826 # None if it could not be located or an error occurred. 470 827 def find_value(self, name): 471 828 if name != None: … … 475 832 return None 476 833 834 ## Retrieves the currently selected subkey 835 # 836 # @return A Key instance of the current subkey 477 837 def current_subkey(self): 478 838 return Key(self._hive, regfi.regfi_iterator_cur_subkey(self._iter)) 479 839 840 ## Retrieves the currently selected value 841 # 842 # @return A Value instance of the current value 480 843 def current_value(self): 481 844 return Value(self._hive, regfi.regfi_iterator_cur_value(self._iter)) 482 845 846 ## Retrieves the current key 847 # 848 # @return A Key instance of the current position of the iterator 483 849 def current_key(self): 484 850 return Key(self._hive, regfi.regfi_iterator_cur_key(self._iter)) 485 851 852 853 ## Traverse downward multiple levels 854 # 855 # This is more efficient than calling down() multiple times 856 # 857 # @param path A list of Key names which represent the path to descend 858 # 859 # @exception Exception If path could not be located 486 860 def descend(self, path): 487 861 cpath = _strlist2charss(path) … … 490 864 if not regfi.regfi_iterator_walk_path(self._iter, cpath): 491 865 raise Exception('Could not locate path.\n'+GetLogMessages()) 866 867 868 # Freeing symbols defined for the sake of documentation 869 del Value.name,Value.name_raw,Value.offset,Value.data_size,Value.type,Value.flags 870 del Key.name,Key.name_raw,Key.offset,Key.modified,Key.flags 871 del Hive.root,Hive.modified,Hive.sequence1,Hive.sequence2,Hive.major_version,Hive.minor_version -
trunk/python/pyregfi/structures.py
r220 r221 1 1 #!/usr/bin/env python 2 3 ## @package pyregfi.structures 4 # Low-level data structures and C API mappings. 5 # 6 # Most users need not venture here. For more information, see the source. 2 7 3 8 import sys … … 8 13 from ctypes import * 9 14 15 10 16 # XXX: can we always be sure enums are this size? 11 17 REGFI_ENCODING = c_uint32 … … 13 19 14 20 REGFI_DATA_TYPE = c_uint32 15 REGFI_REGF_SIZE = 0x1000 16 17 # Registry value data types 18 REG_NONE = 0 19 REG_SZ = 1 20 REG_EXPAND_SZ = 2 21 REG_BINARY = 3 22 REG_DWORD = 4 23 REG_DWORD_LE = 4 # DWORD, little endian 24 REG_DWORD_BE = 5 # DWORD, big endian 25 REG_LINK = 6 26 REG_MULTI_SZ = 7 27 REG_RESOURCE_LIST = 8 28 REG_FULL_RESOURCE_DESCRIPTOR = 9 29 REG_RESOURCE_REQUIREMENTS_LIST = 10 30 REG_QWORD = 11 # 64-bit little endian 21 REGFI_REGF_SIZE = 0x1000 31 22 32 23
Note: See TracChangeset
for help on using the changeset viewer.