source: trunk/src/common.c @ 117

Last change on this file since 117 was 116, checked in by tim, 16 years ago

fixed major bug in reglookup-recover; now recovers much more data
rolled back release version to 0.9.0
added date range checking in regfi's NK parsing for deleted records

File size: 10.1 KB
Line 
1/*
2 * A utility to read a Windows NT/2K/XP/2K3 registry file, using
3 * Gerald Carter''s regfio interface.
4 *
5 * Copyright (C) 2005-2008 Timothy D. Morgan
6 * Copyright (C) 2002 Richard Sharpe, rsharpe@richardsharpe.com
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; version 3 of the License.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
20 *
21 * $Id: $
22 */
23
24#include <iconv.h>
25iconv_t conv_desc;
26
27const char* key_special_chars = ",\"\\/";
28const char* subfield_special_chars = ",\"\\|";
29const char* common_special_chars = ",\"\\";
30
31#define REGLOOKUP_VERSION "0.9.0"
32
33
34void bailOut(int code, char* message)
35{
36  fprintf(stderr, message);
37  exit(code);
38}
39
40
41/* Returns a newly malloc()ed string which contains original buffer,
42 * except for non-printable or special characters are quoted in hex
43 * with the syntax '\xQQ' where QQ is the hex ascii value of the quoted
44 * character.  A null terminator is added, since only ascii, not binary,
45 * is returned.
46 */
47static char* quote_buffer(const unsigned char* str, 
48                          unsigned int len, const char* special)
49{
50  unsigned int i, added_len;
51  unsigned int num_written = 0;
52
53  unsigned int buf_len = sizeof(char)*(len+1);
54  char* ret_val = malloc(buf_len);
55  char* tmp_buf;
56
57  if(ret_val == NULL)
58    return NULL;
59
60  for(i=0; i<len; i++)
61  {
62    if(buf_len <= (num_written+5))
63    {
64      /* Expand the buffer by the memory consumption rate seen so far
65       * times the amount of input left to process.  The expansion is bounded
66       * below by a minimum safety increase, and above by the maximum possible
67       * output string length.  This should minimize both the number of
68       * reallocs() and the amount of wasted memory.
69       */
70      added_len = (len-i)*num_written/(i+1);
71      if((buf_len+added_len) > (len*4+1))
72        buf_len = len*4+1;
73      else
74      {
75        if (added_len < 5)
76          buf_len += 5;
77        else
78          buf_len += added_len;
79      }
80
81      tmp_buf = realloc(ret_val, buf_len);
82      if(tmp_buf == NULL)
83      {
84        free(ret_val);
85        return NULL;
86      }
87      ret_val = tmp_buf;
88    }
89   
90    if(str[i] < 32 || str[i] > 126 || strchr(special, str[i]) != NULL)
91    {
92      num_written += snprintf(ret_val + num_written, buf_len - num_written,
93                              "\\x%.2X", str[i]);
94    }
95    else
96      ret_val[num_written++] = str[i];
97  }
98  ret_val[num_written] = '\0';
99
100  return ret_val;
101}
102
103
104/* Returns a newly malloc()ed string which contains original string,
105 * except for non-printable or special characters are quoted in hex
106 * with the syntax '\xQQ' where QQ is the hex ascii value of the quoted
107 * character.
108 */
109static char* quote_string(const char* str, const char* special)
110{
111  unsigned int len;
112
113  if(str == NULL)
114    return NULL;
115
116  len = strlen(str);
117  return quote_buffer((const unsigned char*)str, len, special);
118}
119
120
121/*
122 * Convert from UTF-16LE to ASCII.  Accepts a Unicode buffer, uni, and
123 * it's length, uni_max.  Writes ASCII to the buffer ascii, whose size
124 * is ascii_max.  Writes at most (ascii_max-1) bytes to ascii, and null
125 * terminates the string.  Returns the length of the string stored in
126 * ascii.  On error, returns a negative errno code.
127 */
128static int uni_to_ascii(unsigned char* uni, char* ascii, 
129                        unsigned int uni_max, unsigned int ascii_max)
130{
131  char* inbuf = (char*)uni;
132  char* outbuf = ascii;
133  size_t in_len = (size_t)uni_max;
134  size_t out_len = (size_t)(ascii_max-1);
135  int ret;
136
137  /* Set up conversion descriptor. */
138  conv_desc = iconv_open("US-ASCII", "UTF-16LE");
139
140  ret = iconv(conv_desc, &inbuf, &in_len, &outbuf, &out_len);
141  if(ret == -1)
142  {
143    iconv_close(conv_desc);
144    return -errno;
145  }
146  *outbuf = '\0';
147
148  iconv_close(conv_desc); 
149  return strlen(ascii);
150}
151
152
153/*
154 * Convert a data value to a string for display.  Returns NULL on error,
155 * and the string to display if there is no error, or a non-fatal
156 * error.  On any error (fatal or non-fatal) occurs, (*error_msg) will
157 * be set to a newly allocated string, containing an error message.  If
158 * a memory allocation failure occurs while generating the error
159 * message, both the return value and (*error_msg) will be NULL.  It
160 * is the responsibility of the caller to free both a non-NULL return
161 * value, and a non-NULL (*error_msg).
162 */
163static char* data_to_ascii(unsigned char* datap, uint32 len, uint32 type, 
164                           char** error_msg)
165{
166  char* asciip;
167  char* ascii;
168  unsigned char* cur_str;
169  char* cur_ascii;
170  char* cur_quoted;
171  char* tmp_err;
172  const char* str_type;
173  uint32 i;
174  uint32 cur_str_len;
175  uint32 ascii_max, cur_str_max;
176  uint32 str_rem, cur_str_rem, alen;
177  int ret_err;
178  unsigned short num_nulls;
179
180  if(datap == NULL)
181  {
182    *error_msg = (char*)malloc(24);
183    if(*error_msg == NULL)
184      return NULL;
185    strcpy(*error_msg, "Data pointer was NULL.");
186    return NULL;
187  }
188  *error_msg = NULL;
189
190  switch (type) 
191  {
192  case REG_SZ:
193  case REG_EXPAND_SZ:
194    /* REG_LINK is a symbolic link, stored as a unicode string. */
195  case REG_LINK:
196    ascii_max = sizeof(char)*(len+1);
197    ascii = malloc(ascii_max);
198    if(ascii == NULL)
199      return NULL;
200   
201    /* Sometimes values have binary stored in them.  If the unicode
202     * conversion fails, just quote it raw.
203     */
204    ret_err = uni_to_ascii(datap, ascii, len, ascii_max);
205    if(ret_err < 0)
206    {
207      tmp_err = strerror(-ret_err);
208      str_type = regfi_type_val2str(type);
209      *error_msg = (char*)malloc(65+strlen(str_type)+strlen(tmp_err)+1);
210      if(*error_msg == NULL)
211      {
212        free(ascii);
213        return NULL;
214      }
215      sprintf(*error_msg, "Unicode conversion failed on %s field; "
216               "printing as binary.  Error: %s", str_type, tmp_err);
217     
218      cur_quoted = quote_buffer(datap, len, common_special_chars);
219    }
220    else
221      cur_quoted = quote_string(ascii, common_special_chars);
222    free(ascii);
223    if(cur_quoted == NULL)
224    {
225      *error_msg = (char*)malloc(27+1);
226      if(*error_msg != NULL)
227        strcpy(*error_msg, "Buffer could not be quoted.");
228    }
229    return cur_quoted;
230    break;
231
232  case REG_DWORD:
233    ascii_max = sizeof(char)*(8+2+1);
234    ascii = malloc(ascii_max);
235    if(ascii == NULL)
236      return NULL;
237
238    snprintf(ascii, ascii_max, "0x%.2X%.2X%.2X%.2X", 
239             datap[3], datap[2], datap[1], datap[0]);
240    return ascii;
241    break;
242
243  case REG_DWORD_BE:
244    ascii_max = sizeof(char)*(8+2+1);
245    ascii = malloc(ascii_max);
246    if(ascii == NULL)
247      return NULL;
248
249    snprintf(ascii, ascii_max, "0x%.2X%.2X%.2X%.2X", 
250             datap[0], datap[1], datap[2], datap[3]);
251    return ascii;
252    break;
253
254  case REG_QWORD:
255    ascii_max = sizeof(char)*(16+2+1);
256    ascii = malloc(ascii_max);
257    if(ascii == NULL)
258      return NULL;
259
260    snprintf(ascii, ascii_max, "0x%.2X%.2X%.2X%.2X%.2X%.2X%.2X%.2X",
261             datap[7], datap[6], datap[5], datap[4],
262             datap[3], datap[2], datap[1], datap[0]);
263    return ascii;
264    break;
265   
266
267  /* XXX: this MULTI_SZ parser is pretty inefficient.  Should be
268   *      redone with fewer malloc calls and better string concatenation.
269   *      Also, gives lame output when "\0\0" is the string.
270   */
271  case REG_MULTI_SZ:
272    ascii_max = sizeof(char)*(len*4+1);
273    cur_str_max = sizeof(char)*(len+1);
274    cur_str = malloc(cur_str_max);
275    cur_ascii = malloc(cur_str_max);
276    ascii = malloc(ascii_max);
277    if(ascii == NULL || cur_str == NULL || cur_ascii == NULL)
278      return NULL;
279
280    /* Reads until it reaches 4 consecutive NULLs,
281     * which is two nulls in unicode, or until it reaches len, or until we
282     * run out of buffer.  The latter should never happen, but we shouldn't
283     * trust our file to have the right lengths/delimiters.
284     */
285    asciip = ascii;
286    num_nulls = 0;
287    str_rem = ascii_max;
288    cur_str_rem = cur_str_max;
289    cur_str_len = 0;
290
291    for(i=0; (i < len) && str_rem > 0; i++)
292    {
293      *(cur_str+cur_str_len) = *(datap+i);
294      if(*(cur_str+cur_str_len) == 0)
295        num_nulls++;
296      else
297        num_nulls = 0;
298      cur_str_len++;
299
300      if(num_nulls == 2)
301      {
302        ret_err = uni_to_ascii(cur_str, cur_ascii, cur_str_len-1, cur_str_max);
303        if(ret_err < 0)
304        {
305          /* XXX: should every sub-field error be enumerated? */
306          if(*error_msg == NULL)
307          {
308            tmp_err = strerror(-ret_err);
309            *error_msg = (char*)malloc(90+strlen(tmp_err)+1);
310            if(*error_msg == NULL)
311            {
312              free(cur_str);
313              free(cur_ascii);
314              free(ascii);
315              return NULL;
316            }
317            sprintf(*error_msg, "Unicode conversion failed on at least one "
318                    "MULTI_SZ sub-field; printing as binary.  Error: %s",
319                    tmp_err);
320          }
321          cur_quoted = quote_buffer(cur_str, cur_str_len-1, 
322                                    subfield_special_chars);
323        }
324        else
325          cur_quoted = quote_string(cur_ascii, subfield_special_chars);
326
327        alen = snprintf(asciip, str_rem, "%s", cur_quoted);
328        asciip += alen;
329        str_rem -= alen;
330        free(cur_quoted);
331
332        if(*(datap+i+1) == 0 && *(datap+i+2) == 0)
333          break;
334        else
335        {
336          if(str_rem > 0)
337          {
338            asciip[0] = '|';
339            asciip[1] = '\0';
340            asciip++;
341            str_rem--;
342          }
343          memset(cur_str, 0, cur_str_max);
344          cur_str_len = 0;
345          num_nulls = 0;
346          /* To eliminate leading nulls in subsequent strings. */
347          i++;
348        }
349      }
350    }
351    *asciip = 0;
352    free(cur_str);
353    free(cur_ascii);
354    return ascii;
355    break;
356
357  /* XXX: Dont know what to do with these yet, just print as binary... */
358  default:
359    /* XXX: It would be really nice if this message somehow included the
360     *      name of the current value we're having trouble with, since
361     *      stderr/stdout don't always sync nicely.
362     */
363    fprintf(stderr, "WARNING: Unrecognized registry data type (0x%.8X); quoting as binary.\n", type);
364   
365  case REG_NONE:
366  case REG_RESOURCE_LIST:
367  case REG_FULL_RESOURCE_DESCRIPTOR:
368  case REG_RESOURCE_REQUIREMENTS_LIST:
369
370  case REG_BINARY:
371    return quote_buffer(datap, len, common_special_chars);
372    break;
373  }
374
375  return NULL;
376}
Note: See TracBrowser for help on using the repository browser.