source: trunk/python/pyregfi/__init__.py @ 261

Last change on this file since 261 was 261, checked in by tim, 13 years ago

readded windows file descriptor hack
copyright notices

File size: 36.0 KB
RevLine 
[204]1#!/usr/bin/env python
2
[261]3# Copyright (C) 2010-2011 Timothy D. Morgan
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; version 3 of the License.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program; if not, write to the Free Software
16# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17#
18# $Id: $
19
[210]20## @package pyregfi
21# Python interface to the regfi library.
22#
23
[221]24## @mainpage API Documentation
25#
26# The pyregfi module provides a Python interface to the @ref regfi Windows
27# registry library. 
28#
29# The library operates on registry hives, each of which is contained within a
[257]30# single file.  The quickest way to get started, is to use the @ref openHive()
31# function to obtain a Hive object.  For example:
[221]32# @code
33# >>> import pyregfi
[257]34# >>> myHive = pyregfi.openHive('/mnt/win/c/WINDOWS/system32/config/system')
[221]35# @endcode
36#
37# Using this Hive object, one can begin investigating what top-level keys
38# exist by starting with the root Key attribute:
39# @code
40# >>> for key in myHive.root.subkeys:
41# ...   print(key.name)
42# ControlSet001
43# ControlSet003
44# LastKnownGoodRecovery
45# MountedDevices
46# Select
47# Setup
48# WPA
49# @endcode
50#
51# From there, accessing subkeys and values by name is a simple matter of:
52# @code
53# >>> myKey = myHive.root.subkeys['Select']
54# >>> myValue = myKey.values['Current']
55# @endcode
56#
57# The data associated with a Value can be obtained through the fetch_data()
58# method:
59# @code
60# >>> print(myValue.fetch_data())
61# 1
62# @endcode
63#
64# While useful for simple exercises, using the subkeys object for deeply nested
65# paths is not efficient and doesn't make for particularly attractive code. 
66# Instead, a special-purpose HiveIterator class is provided for simplicity of
67# use and fast access to specific known paths:
68# @code
69# >>> myIter = pyregfi.HiveIterator(myHive)
70# >>> myIter.descend(['ControlSet001','Control','NetworkProvider','HwOrder'])
71# >>> myKey = myIter.current_key()
72# >>> print(myKey.values['ProviderOrder'].fetch_data())
73# RDPNP,LanmanWorkstation,WebClient
74# @endcode
75#
76# The first two lines above can be simplified in some "syntactic sugar" provided
77# by the Hive.subtree() method.  Also, as one might expect, the HiveIterator
78# also acts as an iterator, producing keys in a depth-first order.
79# For instance, to traverse all keys under the ControlSet003\\Services key,
80# printing their names as we go, we could do:
81# @code
82# >>> for key in Hive.subtree(['ControlSet003','Services']):
83# >>>   print(key.name)
84# Services
85# Abiosdsk
86# abp480n5
87# Parameters
88# PnpInterface
89# ACPI
90# [...]
91# @endcode
92#
93# Note that "Services" was printed first, since the subtree is traversed as a
94# "preordering depth-first" search starting with the HiveIterator's current_key(). 
95# As one might expect, traversals of subtrees stops when all elements in a
96# specific subtree (and none outside of it) have been traversed.
97#
98# For more information, peruse the various attributes and methods available on
99# the Hive, HiveIterator, Key, Value, and Security classes.
100#
101# @note @ref regfi is a read-only library by design and there
102# are no plans to implement write support.
103#
104# @note At present, pyregfi has been tested with Python versions 2.6 and 3.1
105#
106# @note Developers strive to make pyregfi thread-safe.
107#
108# @note Key and Value names are case-sensitive in regfi and pyregfi
109#
[204]110import sys
[219]111import time
[204]112import ctypes
113import ctypes.util
[228]114import threading
115from pyregfi.structures import *
[204]116
[228]117
[221]118## An enumeration of registry Value data types
[210]119#
[221]120# @note This is a static class, there is no need to instantiate it.
121#       Just access its attributes directly as DATA_TYPES.SZ, etc
122class DATA_TYPES(object):
123    ## None / Unknown
124    NONE                       =  0
125    ## String
126    SZ                         =  1
127    ## String with %...% expansions
128    EXPAND_SZ                  =  2
129    ## Binary buffer
130    BINARY                     =  3
131    ## 32 bit integer (little endian)
132    DWORD                      =  4 # DWORD, little endian
133    ## 32 bit integer (little endian)
134    DWORD_LE                   =  4
135    ## 32 bit integer (big endian)
136    DWORD_BE                   =  5 # DWORD, big endian
137    ## Symbolic link
138    LINK                       =  6
139    ## List of strings
140    MULTI_SZ                   =  7
141    ## Unknown structure
142    RESOURCE_LIST              =  8
143    ## Unknown structure
144    FULL_RESOURCE_DESCRIPTOR   =  9
145    ## Unknown structure
146    RESOURCE_REQUIREMENTS_LIST = 10
147    ## 64 bit integer
148    QWORD                      = 11 # 64-bit little endian
[205]149
150
[227]151## An enumeration of log message types
152#
153# @note This is a static class, there is no need to instantiate it.
154#       Just access its attributes directly as LOG_TYPES.INFO, etc
155class LOG_TYPES(object):
156    ## Informational messages, useful in debugging
157    INFO  =  0x01
158    ## Non-critical problems in structure parsing or intepretation
159    WARN  =  0x04
160    ## Major failures
161    ERROR =  0x10
162
163
[208]164def _buffer2bytearray(char_pointer, length):
165    if length == 0 or char_pointer == None:
166        return None
167   
168    ret_val = bytearray(length)
169    for i in range(0,length):
170        ret_val[i] = char_pointer[i][0]
171
172    return ret_val
173
174
[215]175def _strlist2charss(str_list):
176    ret_val = []
177    for s in str_list:
178        ret_val.append(s.encode('utf-8', 'replace'))
179
[220]180    ret_val = (ctypes.c_char_p*(len(str_list)+1))(*ret_val)
[215]181    # Terminate the char** with a NULL pointer
182    ret_val[-1] = 0
183
184    return ret_val
185
186
[209]187def _charss2strlist(chars_pointer):
188    ret_val = []
189    i = 0
190    s = chars_pointer[i]
[252]191    while s:
[213]192        ret_val.append(s.decode('utf-8', 'replace'))
[209]193        i += 1
194        s = chars_pointer[i]
[208]195
[209]196    return ret_val
[208]197
[210]198
[252]199
[233]200## Returns the (py)regfi library version
201#
202# @return A string indicating the version
203def getVersion():
204    return regfi.regfi_version()
205
206
[221]207## Retrieves messages produced by regfi during parsing and interpretation
208#
209# The regfi C library may generate log messages stored in a special thread-safe
210# global data structure.  These messages should be retrieved periodically or
211# after each major operation by callers to determine if any errors or warnings
212# should be reported to the user.  Failure to retrieve these could result in
213# excessive memory consumption.
[232]214def getLogMessages():
[221]215    msgs = regfi.regfi_log_get_str()
[226]216    if not msgs:
[221]217        return ''
218    return msgs.decode('utf-8')
219
220
[227]221## Sets the types of log messages to record
222#
223# @param log_types A sequence of message types that regfi should generate.
224#                  Message types can be found in the LOG_TYPES enumeration.
225#
226# @return True on success, False on failure.  Failures are rare, but could
227#         indicate that global logging is not operating as expected.
228#
229# Example:
230# @code
[232]231# setLogMask((LOG_TYPES.ERROR, LOG_TYPES.WARN, LOG_TYPES.INFO))
[227]232# @endcode
233#
234# The message mask is a global (all hives, iterators), thread-specific value.
235# For more information, see @ref regfi_log_set_mask.
236#
[232]237def setLogMask(log_types):
[227]238    mask = 0
239    for m in log_types:
240        mask |= m
241    return regfi.regfi_log_set_mask(mask)
242
243
[232]244## Opens a file as a registry hive
245#
246# @param path The file path of a hive, as one would provide to the
247#             open() built-in
248#
249# @return A new Hive instance
250def openHive(path):
251    fh = open(path, 'rb')
252    return Hive(fh)
253
254
[221]255## Abstract class for most objects returned by the library
[212]256class _StructureWrapper(object):
[214]257    _hive = None
258    _base = None
[206]259
[207]260    def __init__(self, hive, base):
[215]261        if not hive:
262            raise Exception("Could not create _StructureWrapper,"
263                            + " hive is NULL.  Current log:\n"
[232]264                            + getLogMessages())
[215]265        if not base:
266            raise Exception("Could not create _StructureWrapper,"
267                            + " base is NULL.  Current log:\n"
[232]268                            + getLogMessages())
[214]269        self._hive = hive
270        self._base = base
[206]271
[224]272
[221]273    # Memory management for most regfi structures is taken care of here
[206]274    def __del__(self):
[255]275        if self._base:
276            regfi.regfi_free_record(self._hive.file, self._base)
[206]277
[224]278
[221]279    # Any attribute requests not explicitly defined in subclasses gets passed
280    # to the equivalent REGFI_* structure defined in structures.py
[206]281    def __getattr__(self, name):
[214]282        return getattr(self._base.contents, name)
[224]283
[221]284   
285    ## Test for equality
286    #
287    # Records returned by pyregfi may be compared with one another.  For example:
288    # @code
289    #  >>> key2 = key1.subkeys['child']
290    #  >>> key1 == key2
291    #  False
292    #  >>> key1 != key2
293    #  True
294    #  >>> key1 == key2.get_parent()
295    #  True
296    # @endcode
[206]297    def __eq__(self, other):
298        return (type(self) == type(other)) and (self.offset == other.offset)
299
[224]300
[206]301    def __ne__(self, other):
302        return (not self.__eq__(other))
303
[208]304
[221]305class Key():
[206]306    pass
307
[221]308
309class Value():
[206]310    pass
311
[221]312
[253]313
314## Represents a registry SK record which contains a security descriptor
315#
[206]316class Security(_StructureWrapper):
[257]317    ## Number of registry Keys referencing this SK record
[253]318    ref_count = 1
[206]319
[253]320    ## The absolute file offset of the SK record's cell in the Hive file
321    offset = 0xCAFEBABE
322
[257]323    ## The @ref winsec.SecurityDescriptor for this SK record
[253]324    descriptor = object()
325
326    def __init__(self, hive, base):
327        super(Security, self).__init__(hive, base)
328        # XXX: add checks for NULL pointers
329        self.descriptor = winsec.SecurityDescriptor(base.contents.sec_desc.contents)
330
[257]331    ## Loads the "next" Security record in the hive
[253]332    #
333    # @note
334    # SK records are included in a circular, doubly-linked list.
335    # To iterate over all SK records, be sure to check for the repetition of
336    # the SK record you started with to determine when all have been traversed.
337    def next_security(self):
338        return Security(self._hive,
339                        regfi.regfi_next_sk(self._hive.file, self._base))
340
341    ## Loads the "previous" Security record in the hive
342    #
343    # @note
344    # SK records are included in a circular, doubly-linked list.
345    # To iterate over all SK records, be sure to check for the repetition of
346    # the SK record you started with to determine when all have been traversed.
347    def prev_security(self):
348        return Security(self._hive,
349                        regfi.regfi_prev_sk(self._hive.file, self._base))
350
351
[221]352## Abstract class for ValueList and SubkeyList
[212]353class _GenericList(object):
[257]354    # XXX: consider implementing keys(), values(), items() and other dictionary methods
[214]355    _hive = None
[224]356    _key_base = None
[214]357    _length = None
358    _current = None
[207]359
[221]360    # implementation-specific functions for SubkeyList and ValueList
[214]361    _fetch_num = None
362    _find_element = None
363    _get_element = None
364    _constructor = None
[208]365
[207]366    def __init__(self, key):
[224]367        if not key:
368            raise Exception("Could not create _GenericList; key is NULL."
[232]369                            + "Current log:\n" + getLogMessages())
[252]370
371        base = regfi.regfi_reference_record(key._hive.file, key._base)
372        if not base:
[224]373            raise Exception("Could not create _GenericList; memory error."
[232]374                            + "Current log:\n" + getLogMessages())
[252]375        self._key_base = cast(base, type(key._base))
[224]376        self._length = self._fetch_num(self._key_base)
[214]377        self._hive = key._hive
378
[207]379   
[224]380    def __del__(self):
[228]381        regfi.regfi_free_record(self._hive.file, self._key_base)
[224]382
[228]383
[221]384    ## Length of list
[207]385    def __len__(self):
[214]386        return self._length
[207]387
[221]388
389    ## Retrieves a list element by name
390    #
[257]391    # @param name The name of the subkey or value desired. 
392    #             This is case-sensitive.
393    #
394    # @note The registry format does inherently prevent multiple
395    #       subkeys or values from having the same name. 
396    #       This interface simply returns the first match. 
397    #       Lookups using this method could also fail due to incorrectly
398    #       encoded strings.
399    #       To identify any duplicates, use the iterator interface to
400    #       check every list element.
401    #
[221]402    # @return the first element whose name matches, or None if the element
403    #         could not be found
[207]404    def __getitem__(self, name):
[257]405        # XXX: Consider interpreting integer names as offsets in the underlying list
[220]406        index = ctypes.c_uint32()
[208]407        if isinstance(name, str):
408            name = name.encode('utf-8')
409
[209]410        if name != None:
411            name = create_string_buffer(bytes(name))
412
[224]413        if self._find_element(self._hive.file, self._key_base, 
[220]414                              name, byref(index)):
415            return self._constructor(self._hive,
[214]416                                     self._get_element(self._hive.file,
[224]417                                                       self._key_base,
[214]418                                                       index))
[207]419        raise KeyError('')
420
[257]421
422    ## Fetches the requested element by name, or the default value if the lookup
423    #  fails.
424    #
[209]425    def get(self, name, default):
426        try:
427            return self[name]
428        except KeyError:
429            return default
430   
[207]431    def __iter__(self):
[214]432        self._current = 0
[207]433        return self
434   
435    def __next__(self):
[214]436        if self._current >= self._length:
[207]437            raise StopIteration('')
438
[224]439        elem = self._get_element(self._hive.file, self._key_base,
[220]440                                 ctypes.c_uint32(self._current))
[214]441        self._current += 1
442        return self._constructor(self._hive, elem)
[207]443   
[212]444    # For Python 2.x
[214]445    next = __next__
[207]446
[212]447
[221]448## The list of subkeys associated with a Key
449#
450# This attribute is both iterable:
451# @code
452#   for k in myKey.subkeys:
453#     ...
454# @endcode
455# and accessible as a dictionary:
456# @code
457#   mySubkey = myKey.subkeys["keyName"]
458# @endcode
459#
[257]460# You may also request the len() of a subkeys list.
461# However keys(), values(), items() and similar methods are not currently
462# implemented.
[221]463class SubkeyList(_GenericList):
[214]464    _fetch_num = regfi.regfi_fetch_num_subkeys
465    _find_element = regfi.regfi_find_subkey
466    _get_element = regfi.regfi_get_subkey
[208]467
468
[221]469## The list of values associated with a Key
470#
471# This attribute is both iterable:
472# @code
473#   for v in myKey.values:
474#     ...
475# @endcode
476# and accessible as a dictionary:
477# @code
478#   myValue = myKey.values["valueName"]
479# @endcode
480#
[257]481# You may also request the len() of a values list.
482# However keys(), values(), items() and similar methods are not currently
483# implemented.
[221]484class ValueList(_GenericList):
[214]485    _fetch_num = regfi.regfi_fetch_num_values
486    _find_element = regfi.regfi_find_value
487    _get_element = regfi.regfi_get_value
[208]488
489
[215]490## Registry key
[221]491# These represent registry keys (@ref REGFI_NK records) and provide
492# access to their subkeys, values, and other metadata.
493#
[257]494# @note Key instances may provide access to more attributes than are
[221]495#       documented here.  However, undocumented attributes may change over time
496#       and are not officially supported.  If you need access to an attribute
[257]497#       not shown here, see @ref pyregfi.structures.
[207]498class Key(_StructureWrapper):
[221]499    ## A @ref ValueList object representing the list of Values
500    #  stored on this Key
[207]501    values = None
[221]502
503    ## A @ref SubkeyList object representing the list of subkeys
504    #  stored on this Key
[208]505    subkeys = None
[207]506
[221]507    ## The raw Key name as an uninterpreted bytearray
508    name_raw = (b"...")
509   
510    ## The name of the Key as a (unicode) string
511    name = "..."
512   
513    ## The absolute file offset of the Key record's cell in the Hive file
514    offset = 0xCAFEBABE
515
516    ## This Key's last modified time represented as the number of seconds
517    #  since the UNIX epoch in UTC; similar to what time.time() returns
518    modified = 1300000000.123456
519
520    ## The NK record's flags field
521    flags = 0x10110001
522
[207]523    def __init__(self, hive, base):
524        super(Key, self).__init__(hive, base)
[221]525        self.values = ValueList(self)
526        self.subkeys = SubkeyList(self)
[207]527
[208]528    def __getattr__(self, name):
529        if name == "name":
[219]530            ret_val = super(Key, self).__getattr__(name)
531
[252]532            if not ret_val:
[209]533                ret_val = self.name_raw
534            else:
[213]535                ret_val = ret_val.decode('utf-8', 'replace')
[209]536               
[208]537        elif name == "name_raw":
[219]538            ret_val = super(Key, self).__getattr__(name)
[208]539            length = super(Key, self).__getattr__('name_length')
540            ret_val = _buffer2bytearray(ret_val, length)
541       
[219]542        elif name == "modified":
[256]543            ret_val = regfi.regfi_nt2unix_time(self._base.contents.mtime)
[219]544
545        else:
546            ret_val = super(Key, self).__getattr__(name)
547
[208]548        return ret_val
549
[221]550
551    ## Retrieves the Security properties for this key
[207]552    def fetch_security(self):
[214]553        return Security(self._hive,
[215]554                        regfi.regfi_fetch_sk(self._hive.file, self._base))
[207]555
[221]556
557    ## Retrieves the class name for this key
558    #
559    # Class names are typically stored as UTF-16LE strings, so these are decoded
560    # into proper python (unicode) strings.  However, if this fails, a bytearray
561    # is instead returned containing the raw buffer stored for the class name.
562    #
563    # @return The class name as a string or bytearray.  None if a class name
564    #         doesn't exist or an unrecoverable error occurred during retrieval.
[219]565    def fetch_classname(self):
566        ret_val = None
567        cn_p = regfi.regfi_fetch_classname(self._hive.file, self._base)
568        if cn_p:
569            cn_struct = cn_p.contents
570            if cn_struct.interpreted:
571                ret_val = cn_struct.interpreted.decode('utf-8', 'replace')
572            else:
573                ret_val = _buffer2bytearray(cn_struct.raw,
574                                            cn_struct.size)
[228]575            regfi.regfi_free_record(self._hive.file, cn_p)
[219]576
577        return ret_val
578
[221]579
580    ## Retrieves this key's parent key
581    #
582    # @return The parent's Key instance or None if current key is root
583    #         (or an error occured)
[215]584    def get_parent(self):
[218]585        if self.is_root():
586            return None
[215]587        parent_base = regfi.regfi_get_parentkey(self._hive.file, self._base)
588        if parent_base:
589            return Key(self._hive, parent_base)
590        return None
591
[257]592
593    ## Checks to see if this Key is the root of its Hive
594    #
595    #  @return True if it is, False otherwise
[215]596    def is_root(self):
[218]597        return (self._hive.root == self)
[215]598
599
[210]600## Registry value (metadata)
601#
602# These represent registry values (@ref REGFI_VK records) and provide
603# access to their associated data.
[221]604#
[257]605# @note Value instances may provide access to more attributes than are
[221]606#       documented here.  However, undocumented attributes may change over time
607#       and are not officially supported.  If you need access to an attribute
[257]608#       not shown here, see @ref pyregfi.structures.
[208]609class Value(_StructureWrapper):
[221]610    ## The raw Value name as an uninterpreted bytearray
611    name_raw = (b"...")
612   
613    ## The name of the Value as a (unicode) string
614    name = "..."
615   
616    ## The absolute file offset of the Value record's cell in the Hive file
617    offset = 0xCAFEBABE
618
619    ## The length of data advertised in the VK record
620    data_size = 0xCAFEBABE
621
622    ## An integer which represents the data type for this Value's data
623    # Typically this value is one of 12 types defined in @ref DATA_TYPES,
624    # but in some cases (the SAM hive) it may be used for other purposes
625    type = DATA_TYPES.NONE
626
627    ## The VK record's flags field
628    flags = 0x10110001
629
630    ## Retrieves the Value's data according to advertised type
631    #
632    # Data is loaded from its cell(s) and then interpreted based on the data
633    # type recorded in the Value.  It is not uncommon for data to be stored with
634    # the wrong type or even with invalid types.  If you have difficulty
635    # obtaining desired data here, use @ref fetch_raw_data().
636    #
637    # @return The interpreted representation of the data as one of several
638    #         possible Python types, as listed below.  None if any failure
639    #         occurred during extraction or conversion.
640    #
641    # @retval string for SZ, EXPAND_SZ, and LINK
642    # @retval int for DWORD, DWORD_BE, and QWORD
643    # @retval list(string) for MULTI_SZ
644    # @retval bytearray for NONE, BINARY, RESOURCE_LIST,
645    #         FULL_RESOURCE_DESCRIPTOR, and RESOURCE_REQUIREMENTS_LIST
646    #
[219]647    def fetch_data(self):
[209]648        ret_val = None
[219]649        data_p = regfi.regfi_fetch_data(self._hive.file, self._base)
650        if not data_p:
651            return None
652        data_struct = data_p.contents
[208]653
[219]654        if data_struct.interpreted_size == 0:
655            ret_val = None
[221]656        elif data_struct.type in (DATA_TYPES.SZ, DATA_TYPES.EXPAND_SZ, DATA_TYPES.LINK):
[219]657            # Unicode strings
658            ret_val = data_struct.interpreted.string.decode('utf-8', 'replace')
[221]659        elif data_struct.type in (DATA_TYPES.DWORD, DATA_TYPES.DWORD_BE):
[219]660            # 32 bit integers
661            ret_val = data_struct.interpreted.dword
[221]662        elif data_struct.type == DATA_TYPES.QWORD:
[219]663            # 64 bit integers
664            ret_val = data_struct.interpreted.qword
[221]665        elif data_struct.type == DATA_TYPES.MULTI_SZ:
[219]666            ret_val = _charss2strlist(data_struct.interpreted.multiple_string)
[221]667        elif data_struct.type in (DATA_TYPES.NONE, DATA_TYPES.RESOURCE_LIST,
668                                  DATA_TYPES.FULL_RESOURCE_DESCRIPTOR,
669                                  DATA_TYPES.RESOURCE_REQUIREMENTS_LIST,
670                                  DATA_TYPES.BINARY):
[219]671            ret_val = _buffer2bytearray(data_struct.interpreted.none,
672                                        data_struct.interpreted_size)
[209]673
[228]674        regfi.regfi_free_record(self._hive.file, data_p)
[219]675        return ret_val
[221]676   
677
678    ## Retrieves raw representation of Value's data
679    #
680    # @return A bytearray containing the data
681    #
[219]682    def fetch_raw_data(self):
683        ret_val = None
684        # XXX: should we load the data without interpretation instead?
685        data_p = regfi.regfi_fetch_data(self._hive.file, self._base)
686        if not data_p:
687            return None
[209]688
[219]689        data_struct = data_p.contents
690        ret_val = _buffer2bytearray(data_struct.raw,
691                                    data_struct.size)
[228]692        regfi.regfi_free_record(self._hive.file, data_p)
[208]693        return ret_val
694
[221]695
[219]696    def __getattr__(self, name):
697        ret_val = super(Value, self).__getattr__(name)
698        if name == "name":
[252]699            if not ret_val:
[219]700                ret_val = self.name_raw
701            else:
702                ret_val = ret_val.decode('utf-8', 'replace')
[208]703
[219]704        elif name == "name_raw":
705            length = super(Value, self).__getattr__('name_length')
706            ret_val = _buffer2bytearray(ret_val, length)
707
708        return ret_val
709
710
[208]711# Avoids chicken/egg class definitions.
712# Also makes for convenient code reuse in these lists' parent classes.
[221]713SubkeyList._constructor = Key
714ValueList._constructor = Value
[208]715
716
717
[210]718## Represents a single registry hive (file)
719class Hive():
[204]720    file = None
721    raw_file = None
[255]722    _fh = None
723    #_root = None
[218]724
[255]725
[221]726    ## The root Key of this Hive
727    root = None
728
729    ## This Hives's last modified time represented as the number of seconds
730    #  since the UNIX epoch in UTC; similar to what time.time() returns
731    modified = 1300000000.123456
732
733    ## First sequence number
734    sequence1 = 12345678
735
736    ## Second sequence number
737    sequence2 = 12345678
738
739    ## Major version
740    major_version = 1
741
742    ## Minor version
743    minor_version = 5
744
745    ## Constructor
746    #
[232]747    # Initialize a new Hive based on a Python file object.  To open a file by
748    # path, see @ref openHive.
749    #
[221]750    # @param fh A Python file object.  The constructor first looks for a valid
751    #           fileno attribute on this object and uses it if possible. 
752    #           Otherwise, the seek and read methods are used for file
753    #           access.
754    #
[232]755    # @note Supplied file must be seekable.  Do not perform any operation on
756    #       the provided file object while a Hive is using it.  Do not
757    #       construct multiple Hive instances from the same file object.
758    #       If a file must be accessed by separate code and pyregfi
759    #       simultaneously, use a separate file descriptor.  Hives are
760    #       thread-safe, so multiple threads may use a single Hive object.
[204]761    def __init__(self, fh):
[226]762        # The fileno method may not exist, or it may throw an exception
763        # when called if the file isn't backed with a descriptor.
[255]764        self._fh = fh
[226]765        fn = None
[205]766        try:
[226]767            # XXX: Native calls to Windows filenos don't seem to work. 
768            #      Need to investigate why.
[261]769            if not is_win32 and hasattr(fh, 'fileno'):
[226]770                fn = fh.fileno()
[205]771        except:
772            pass
[204]773
[226]774        if fn != None:
775            self.file = regfi.regfi_alloc(fn, REGFI_ENCODING_UTF8)
776            if not self.file:
777                # XXX: switch to non-generic exception
778                raise Exception("Could not open registry file.  Current log:\n"
[232]779                                + getLogMessages())
[226]780        else:
781            fh.seek(0)
782            self.raw_file = structures.REGFI_RAW_FILE()
783            self.raw_file.fh = fh
784            self.raw_file.seek = seek_cb_type(self.raw_file.cb_seek)
785            self.raw_file.read = read_cb_type(self.raw_file.cb_read)
786            self.file = regfi.regfi_alloc_cb(pointer(self.raw_file), REGFI_ENCODING_UTF8)
787            if not self.file:
788                # XXX: switch to non-generic exception
789                raise Exception("Could not open registry file.  Current log:\n"
[232]790                                + getLogMessages())
[226]791
792
[204]793    def __getattr__(self, name):
[218]794        if name == "root":
[224]795            # XXX: This creates reference loops.  Need to cache better inside regfi
796            #if self._root == None:
797            #    self._root = Key(self, regfi.regfi_get_rootkey(self.file))
798            #return self._root
799            return Key(self, regfi.regfi_get_rootkey(self.file))
[218]800
[221]801        elif name == "modified":
[256]802            return regfi.regfi_nt2unix_time(self._base.contents.mtime)
[221]803
[204]804        return getattr(self.file.contents, name)
[221]805
[205]806   
[210]807    def __del__(self):
[255]808        if self.file:
809            regfi.regfi_free(self.file)
[204]810
[205]811    def __iter__(self):
812        return HiveIterator(self)
[204]813
[215]814
[210]815    ## Creates a @ref HiveIterator initialized at the specified path in
[221]816    #  the hive.
[210]817    #
[221]818    # @param path A list of Key names which represent an absolute path within
819    #             the Hive
820    #
821    # @return A @ref HiveIterator which is positioned at the specified path.
822    #
823    # @exception Exception If the path could not be found/traversed
[206]824    def subtree(self, path):
825        hi = HiveIterator(self)
826        hi.descend(path)
827        return hi
[205]828
[206]829
[210]830## A special purpose iterator for registry hives
831#
832# Iterating over an object of this type causes all keys in a specific
833# hive subtree to be returned in a depth-first manner. These iterators
834# are typically created using the @ref Hive.subtree() function on a @ref Hive
835# object.
836#
837# HiveIterators can also be used to manually traverse up and down a
838# registry hive as they retain information about the current position in
839# the hive, along with which iteration state for subkeys and values for
840# every parent key.  See the @ref up and @ref down methods for more
841# information.
[205]842class HiveIterator():
[220]843    _hive = None
844    _iter = None
845    _iteration_root = None
[228]846    _lock = None
[205]847
848    def __init__(self, hive):
[228]849        self._iter = regfi.regfi_iterator_new(hive.file)
[226]850        if not self._iter:
[205]851            raise Exception("Could not create iterator.  Current log:\n"
[232]852                            + getLogMessages())
[214]853        self._hive = hive
[228]854        self._lock = threading.RLock()
855   
[205]856    def __getattr__(self, name):
[228]857        self._lock.acquire()
858        ret_val = getattr(self._iter.contents, name)
859        self._lock.release()
860        return ret_val
[205]861
[228]862    def __del__(self):
863        self._lock.acquire()
[220]864        regfi.regfi_iterator_free(self._iter)
[228]865        self._lock.release()
[205]866
867    def __iter__(self):
[228]868        self._lock.acquire()
[220]869        self._iteration_root = None
[228]870        self._lock.release()
[205]871        return self
872
873    def __next__(self):
[228]874        self._lock.acquire()
[220]875        if self._iteration_root == None:
[228]876            self._iteration_root = self.current_key().offset
[220]877        elif not regfi.regfi_iterator_down(self._iter):
878            up_ret = regfi.regfi_iterator_up(self._iter)
[206]879            while (up_ret and
[220]880                   not regfi.regfi_iterator_next_subkey(self._iter)):
[228]881                if self._iteration_root == self.current_key().offset:
[220]882                    self._iteration_root = None
[228]883                    self._lock.release()
[206]884                    raise StopIteration('')
[220]885                up_ret = regfi.regfi_iterator_up(self._iter)
[205]886
887            if not up_ret:
[221]888                self._iteration_root = None
[228]889                self._lock.release()
[205]890                raise StopIteration('')
891           
[210]892            # XXX: Use non-generic exception
[220]893            if not regfi.regfi_iterator_down(self._iter):
[228]894                self._lock.release()
[205]895                raise Exception('Error traversing iterator downward.'+
[232]896                                ' Current log:\n'+ getLogMessages())
[205]897
[220]898        regfi.regfi_iterator_first_subkey(self._iter)
[228]899        ret_val = self.current_key()
900        self._lock.release()
[205]901
[228]902        return ret_val
903
904
[212]905    # For Python 2.x
[214]906    next = __next__
[212]907
[221]908    # XXX: Should add sanity checks on some of these traversal functions
909    #      to throw exceptions if a traversal/retrieval *should* have worked
910    #      but failed for some reason.
911
912    ## Descends the iterator to a subkey
913    #
914    # Descends the iterator one level to the current subkey, or a subkey
915    # specified by name.
916    #
917    # @param subkey_name If specified, locates specified subkey by name
918    #                    (via find_subkey()) and descends to it.
919    #
920    # @return True if successful, False otherwise
[220]921    def down(self, subkey_name=None):
[228]922        ret_val = None
[220]923        if subkey_name == None:
[228]924            self._lock.acquire()
925            ret_val = regfi.regfi_iterator_down(self._iter)
[220]926        else:
927            if name != None:
928                name = name.encode('utf-8')
[228]929            self._lock.acquire()
930            ret_val = (regfi.regfi_iterator_find_subkey(self._iter, name) 
931                       and regfi.regfi_iterator_down(self._iter))
932       
933        self._lock.release()
934        return ret_val
[206]935
[221]936
937    ## Causes the iterator to ascend to the current Key's parent
938    #
939    # @return True if successful, False otherwise
940    #
941    # @note The state of current subkeys and values at this level in the tree
942    #       is lost as a side effect.  That is, if you go up() and then back
943    #       down() again, current_subkey() and current_value() will return
944    #       default selections.
[206]945    def up(self):
[228]946        self._lock.acquire()
947        ret_val = regfi.regfi_iterator_up(self._iter)
948        self._lock.release()
949        return ret_val
[206]950
[221]951
952    ## Selects first subkey of current key
953    #
954    # @return A Key instance for the first subkey. 
955    #         None on error or if the current key has no subkeys.
[220]956    def first_subkey(self):
[228]957        ret_val = None
958        self._lock.acquire()
[220]959        if regfi.regfi_iterator_first_subkey(self._iter):
[228]960            ret_val = self.current_subkey()
961        self._lock.release()
962        return ret_val
[220]963
[221]964
965    ## Selects first value of current Key
966    #
967    # @return A Value instance for the first value. 
968    #         None on error or if the current key has no values.
[220]969    def first_value(self):
[228]970        ret_val = None
971        self._lock.acquire()
[220]972        if regfi.regfi_iterator_first_value(self._iter):
[228]973            ret_val = self.current_value()
974        self._lock.release()
975        return ret_val
[220]976
[221]977
978    ## Selects the next subkey in the current Key's list
979    #
980    # @return A Key instance for the next subkey.
981    #         None if there are no remaining subkeys or an error occurred.
[220]982    def next_subkey(self):
[228]983        ret_val = None
984        self._lock.acquire()
[220]985        if regfi.regfi_iterator_next_subkey(self._iter):
[228]986            ret_val = self.current_subkey()
987        self._lock.release()
988        return ret_val
[220]989
[221]990
991    ## Selects the next value in the current Key's list
992   
993    # @return A Value instance for the next value.
994    #         None if there are no remaining values or an error occurred.
[220]995    def next_value(self):
[228]996        ret_val = None
997        self._lock.acquire()
[220]998        if regfi.regfi_iterator_next_value(self._iter):
[228]999            ret_val = self.current_value()
1000        self._lock.release()
1001        return ret_val
[220]1002
[221]1003
1004    ## Selects the first subkey which has the specified name
1005    #
1006    # @return A Key instance for the selected key.
1007    #         None if it could not be located or an error occurred.
[220]1008    def find_subkey(self, name):
1009        if name != None:
1010            name = name.encode('utf-8')
[228]1011        ret_val = None
1012        self._lock.acquire()
[220]1013        if regfi.regfi_iterator_find_subkey(self._iter, name):
[228]1014            ret_val = self.current_subkey()
1015        self._lock.release()
1016        return ret_val
[220]1017
[221]1018
1019    ## Selects the first value which has the specified name
1020    #
1021    # @return A Value instance for the selected value.
1022    #         None if it could not be located or an error occurred.
[220]1023    def find_value(self, name):
1024        if name != None:
1025            name = name.encode('utf-8')
[228]1026        ret_val = None
1027        self._lock.acquire()
[220]1028        if regfi.regfi_iterator_find_value(self._iter, name):
[228]1029            ret_val = self.current_value()
1030        self._lock.release()
1031        return ret_val
[220]1032
[221]1033    ## Retrieves the currently selected subkey
1034    #
1035    # @return A Key instance of the current subkey
[220]1036    def current_subkey(self):
[228]1037        self._lock.acquire()
1038        ret_val = Key(self._hive, regfi.regfi_iterator_cur_subkey(self._iter))
1039        self._lock.release()
1040        return ret_val
[220]1041
[221]1042    ## Retrieves the currently selected value
1043    #
1044    # @return A Value instance of the current value
[220]1045    def current_value(self):
[228]1046        self._lock.acquire()
1047        ret_val = Value(self._hive, regfi.regfi_iterator_cur_value(self._iter))
1048        self._lock.release()
1049        return ret_val
[220]1050
[221]1051    ## Retrieves the current key
1052    #
1053    # @return A Key instance of the current position of the iterator
[220]1054    def current_key(self):
[228]1055        self._lock.acquire()
1056        ret_val = Key(self._hive, regfi.regfi_iterator_cur_key(self._iter))
1057        self._lock.release()
1058        return ret_val
[220]1059
[221]1060    ## Traverse downward multiple levels
1061    #
1062    # This is more efficient than calling down() multiple times
1063    #
1064    # @param path A list of Key names which represent the path to descend
1065    #
1066    # @exception Exception If path could not be located
[206]1067    def descend(self, path):
[215]1068        cpath = _strlist2charss(path)
[206]1069
[228]1070        self._lock.acquire()
[252]1071        result = regfi.regfi_iterator_descend(self._iter, cpath)
[228]1072        self._lock.release()
1073        if not result:
1074            # XXX: Use non-generic exception
[232]1075            raise Exception('Could not locate path.\n'+getLogMessages())
[221]1076
[252]1077    ## Obtains a list of the current key's ancestry
1078    #
1079    # @return A list of all parent keys starting with the root Key and ending
1080    #         with the current Key
1081    def ancestry(self):
1082        self._lock.acquire()
1083        result = regfi.regfi_iterator_ancestry(self._iter)
1084        self._lock.release()
[221]1085
[252]1086        ret_val = []
1087        i = 0
1088        k = result[i]
1089        while k:
1090            k = cast(regfi.regfi_reference_record(self._hive.file, k), POINTER(REGFI_NK))
1091            ret_val.append(Key(self._hive, k))
1092            i += 1
1093            k = result[i]
1094
1095        regfi.regfi_free_record(self._hive.file, result)
1096        return ret_val
1097
1098    ## Obtains the current path of the iterator
1099    #
1100    # @return A list of key names starting with the root up to and
1101    #         including the current key
1102    #
1103    def current_path(self):
1104        ancestry = self.ancestry()
1105        return [str(a.name) for a in ancestry]
1106
1107
[221]1108# Freeing symbols defined for the sake of documentation
1109del Value.name,Value.name_raw,Value.offset,Value.data_size,Value.type,Value.flags
1110del Key.name,Key.name_raw,Key.offset,Key.modified,Key.flags
1111del Hive.root,Hive.modified,Hive.sequence1,Hive.sequence2,Hive.major_version,Hive.minor_version
[253]1112del Security.ref_count,Security.offset,Security.descriptor
Note: See TracBrowser for help on using the repository browser.