1 /* hivex - Windows Registry "hive" extraction library.
2 * Copyright (C) 2009-2010 Red Hat Inc.
3 * Derived from code by Petter Nordahl-Hagen under a compatible license:
4 * Copyright (c) 1997-2007 Petter Nordahl-Hagen.
5 * Derived from code by Markus Stephany under a compatible license:
6 * Copyright (c) 2000-2004, Markus Stephany.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation;
11 * version 2.1 of the License.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * See file LICENSE for the full license.
37 #define STREQ(a,b) (strcmp((a),(b)) == 0)
38 #define STRCASEEQ(a,b) (strcasecmp((a),(b)) == 0)
39 //#define STRNEQ(a,b) (strcmp((a),(b)) != 0)
40 //#define STRCASENEQ(a,b) (strcasecmp((a),(b)) != 0)
41 #define STREQLEN(a,b,n) (strncmp((a),(b),(n)) == 0)
42 //#define STRCASEEQLEN(a,b,n) (strncasecmp((a),(b),(n)) == 0)
43 //#define STRNEQLEN(a,b,n) (strncmp((a),(b),(n)) != 0)
44 //#define STRCASENEQLEN(a,b,n) (strncasecmp((a),(b),(n)) != 0)
45 //#define STRPREFIX(a,b) (strncmp((a),(b),strlen((b))) == 0)
48 #include "byte_conversions.h"
50 static char *windows_utf16_to_utf8 (/* const */ char *input, size_t len);
58 /* Memory-mapped (readonly) registry file. */
61 struct ntreg_header *hdr;
64 /* Use a bitmap to store which file offsets are valid (point to a
65 * used block). We only need to store 1 bit per 32 bits of the file
66 * (because blocks are 4-byte aligned). We found that the average
67 * block size in a registry file is ~50 bytes. So roughly 1 in 12
68 * bits in the bitmap will be set, making it likely a more efficient
69 * structure than a hash table.
72 #define BITMAP_SET(bitmap,off) (bitmap[(off)>>5] |= 1 << (((off)>>2)&7))
73 #define BITMAP_CLR(bitmap,off) (bitmap[(off)>>5] &= ~ (1 << (((off)>>2)&7)))
74 #define BITMAP_TST(bitmap,off) (bitmap[(off)>>5] & (1 << (((off)>>2)&7)))
75 #define IS_VALID_BLOCK(h,off) \
76 (((off) & 3) == 0 && \
78 (off) < (h)->size && \
79 BITMAP_TST((h)->bitmap,(off)))
81 /* Fields from the header, extracted from little-endianness hell. */
82 size_t rootoffs; /* Root key offset (always an nk-block). */
83 size_t endpages; /* Offset of end of pages. */
86 /* NB. All fields are little endian. */
88 char magic[4]; /* "regf" */
91 char last_modified[8];
92 uint32_t major_ver; /* 1 */
93 uint32_t minor_ver; /* 3 */
94 uint32_t unknown5; /* 0 */
95 uint32_t unknown6; /* 1 */
96 uint32_t offset; /* offset of root key record - 4KB */
97 uint32_t blocks; /* pointer AFTER last hbin in file - 4KB */
98 uint32_t unknown7; /* 1 */
100 char name[64]; /* original file name of hive */
101 char unknown_guid1[16];
102 char unknown_guid2[16];
105 char unknown_guid3[16];
110 uint32_t csum; /* checksum: xor of dwords 0-0x1fb. */
112 char unknown11[3528];
114 char unknown_guid4[16];
115 char unknown_guid5[16];
116 char unknown_guid6[16];
120 } __attribute__((__packed__));
122 struct ntreg_hbin_page {
123 char magic[4]; /* "hbin" */
124 uint32_t offset_first; /* offset from 1st block */
125 uint32_t page_size; /* size of this page (multiple of 4KB) */
127 /* Linked list of blocks follows here. */
128 } __attribute__((__packed__));
130 struct ntreg_hbin_block {
131 int32_t seg_len; /* length of this block (-ve for used block) */
132 char id[2]; /* the block type (eg. "nk" for nk record) */
133 /* Block data follows here. */
134 } __attribute__((__packed__));
136 #define BLOCK_ID_EQ(h,offs,eqid) \
137 (STREQLEN (((struct ntreg_hbin_block *)((h)->addr + (offs)))->id, (eqid), 2))
140 block_len (hive_h *h, size_t blkoff, int *used)
142 struct ntreg_hbin_block *block;
143 block = (struct ntreg_hbin_block *) (h->addr + blkoff);
145 int32_t len = le32toh (block->seg_len);
156 struct ntreg_nk_record {
157 int32_t seg_len; /* length (always -ve because used) */
158 char id[2]; /* "nk" */
162 uint32_t parent; /* offset of owner/parent */
163 uint32_t nr_subkeys; /* number of subkeys */
164 uint32_t nr_subkeys_volatile;
165 uint32_t subkey_lf; /* lf record containing list of subkeys */
166 uint32_t subkey_lf_volatile;
167 uint32_t nr_values; /* number of values */
168 uint32_t vallist; /* value-list record */
169 uint32_t sk; /* offset of sk-record */
170 uint32_t classname; /* offset of classname record */
171 uint16_t max_subkey_name_len; /* maximum length of a subkey name in bytes
172 if the subkey was reencoded as UTF-16LE */
175 uint32_t max_vk_name_len; /* maximum length of any vk name in bytes
176 if the name was reencoded as UTF-16LE */
177 uint32_t max_vk_data_len; /* maximum length of any vk data in bytes */
179 uint16_t name_len; /* length of name */
180 uint16_t classname_len; /* length of classname */
181 char name[1]; /* name follows here */
182 } __attribute__((__packed__));
184 struct ntreg_lf_record {
186 char id[2]; /* "lf" */
187 uint16_t nr_keys; /* number of keys in this record */
189 uint32_t offset; /* offset of nk-record for this subkey */
190 char hash[4]; /* hash of subkey name */
192 } __attribute__((__packed__));
194 struct ntreg_ri_record {
196 char id[2]; /* "ri" */
197 uint16_t nr_offsets; /* number of pointers to lh records */
198 uint32_t offset[1]; /* list of pointers to lh records */
199 } __attribute__((__packed__));
201 /* This has no ID header. */
202 struct ntreg_value_list {
204 uint32_t offset[1]; /* list of pointers to vk records */
205 } __attribute__((__packed__));
207 struct ntreg_vk_record {
208 int32_t seg_len; /* length (always -ve because used) */
209 char id[2]; /* "vk" */
210 uint16_t name_len; /* length of name */
211 /* length of the data:
212 * If data_len is <= 4, then it's stored inline.
213 * If data_len is 0x80000000, then it's an inline dword.
214 * Top bit may be set or not set at random.
217 uint32_t data_offset; /* pointer to the data (or data if inline) */
218 uint32_t data_type; /* type of the data */
219 uint16_t flags; /* bit 0 set => key name ASCII,
220 bit 0 clr => key name UTF-16.
221 Only seen ASCII here in the wild.
222 NB: this is CLEAR for default key. */
224 char name[1]; /* key name follows here */
225 } __attribute__((__packed__));
228 header_checksum (const hive_h *h)
230 uint32_t *daddr = (uint32_t *) h->addr;
234 for (i = 0; i < 0x1fc / 4; ++i) {
235 sum ^= le32toh (*daddr);
243 hivex_open (const char *filename, int flags)
247 assert (sizeof (struct ntreg_header) == 0x1000);
248 assert (offsetof (struct ntreg_header, csum) == 0x1fc);
250 h = calloc (1, sizeof *h);
254 h->msglvl = flags & HIVEX_OPEN_MSGLVL_MASK;
256 const char *debug = getenv ("HIVEX_DEBUG");
257 if (debug && STREQ (debug, "1"))
261 fprintf (stderr, "hivex_open: created handle %p\n", h);
263 h->filename = strdup (filename);
264 if (h->filename == NULL)
267 h->fd = open (filename, O_RDONLY);
272 if (fstat (h->fd, &statbuf) == -1)
275 h->size = statbuf.st_size;
277 h->addr = mmap (NULL, h->size, PROT_READ, MAP_SHARED, h->fd, 0);
278 if (h->addr == MAP_FAILED)
282 fprintf (stderr, "hivex_open: mapped file at %p\n", h->addr);
285 if (h->hdr->magic[0] != 'r' ||
286 h->hdr->magic[1] != 'e' ||
287 h->hdr->magic[2] != 'g' ||
288 h->hdr->magic[3] != 'f') {
289 fprintf (stderr, "hivex: %s: not a Windows NT Registry hive file\n",
295 /* Check major version. */
296 uint32_t major_ver = le32toh (h->hdr->major_ver);
297 if (major_ver != 1) {
299 "hivex: %s: hive file major version %" PRIu32 " (expected 1)\n",
300 filename, major_ver);
305 h->bitmap = calloc (1 + h->size / 32, 1);
306 if (h->bitmap == NULL)
309 /* Header checksum. */
310 uint32_t sum = header_checksum (h);
311 if (sum != le32toh (h->hdr->csum)) {
312 fprintf (stderr, "hivex: %s: bad checksum in hive header\n", filename);
317 if (h->msglvl >= 2) {
318 char *name = windows_utf16_to_utf8 (h->hdr->name, 64);
321 "hivex_open: header fields:\n"
322 " file version %" PRIu32 ".%" PRIu32 "\n"
323 " sequence nos %" PRIu32 " %" PRIu32 "\n"
324 " (sequences nos should match if hive was synched at shutdown)\n"
325 " original file name %s\n"
326 " (only 32 chars are stored, name is probably truncated)\n"
327 " root offset 0x%x + 0x1000\n"
328 " end of last page 0x%x + 0x1000 (total file size 0x%zx)\n"
329 " checksum 0x%x (calculated 0x%x)\n",
330 major_ver, le32toh (h->hdr->minor_ver),
331 le32toh (h->hdr->sequence1), le32toh (h->hdr->sequence2),
332 name ? name : "(conversion failed)",
333 le32toh (h->hdr->offset),
334 le32toh (h->hdr->blocks), h->size,
335 le32toh (h->hdr->csum), sum);
339 h->rootoffs = le32toh (h->hdr->offset) + 0x1000;
340 h->endpages = le32toh (h->hdr->blocks) + 0x1000;
343 fprintf (stderr, "hivex_open: root offset = 0x%zx\n", h->rootoffs);
345 /* We'll set this flag when we see a block with the root offset (ie.
348 int seen_root_block = 0, bad_root_block = 0;
350 /* Collect some stats. */
351 size_t pages = 0; /* Number of hbin pages read. */
352 size_t smallest_page = SIZE_MAX, largest_page = 0;
353 size_t blocks = 0; /* Total number of blocks found. */
354 size_t smallest_block = SIZE_MAX, largest_block = 0, blocks_bytes = 0;
355 size_t used_blocks = 0; /* Total number of used blocks found. */
356 size_t used_size = 0; /* Total size (bytes) of used blocks. */
358 /* Read the pages and blocks. The aim here is to be robust against
359 * corrupt or malicious registries. So we make sure the loops
360 * always make forward progress. We add the address of each block
361 * we read to a hash table so pointers will only reference the start
365 struct ntreg_hbin_page *page;
366 for (off = 0x1000; off < h->size; off += le32toh (page->page_size)) {
367 if (off >= h->endpages)
370 page = (struct ntreg_hbin_page *) (h->addr + off);
371 if (page->magic[0] != 'h' ||
372 page->magic[1] != 'b' ||
373 page->magic[2] != 'i' ||
374 page->magic[3] != 'n') {
375 fprintf (stderr, "hivex: %s: trailing garbage at end of file (at 0x%zx, after %zu pages)\n",
376 filename, off, pages);
381 size_t page_size = le32toh (page->page_size);
383 fprintf (stderr, "hivex_open: page at 0x%zx, size %zu\n", off, page_size);
385 if (page_size < smallest_page) smallest_page = page_size;
386 if (page_size > largest_page) largest_page = page_size;
388 if (page_size <= sizeof (struct ntreg_hbin_page) ||
389 (page_size & 0x0fff) != 0) {
390 fprintf (stderr, "hivex: %s: page size %zu at 0x%zx, bad registry\n",
391 filename, page_size, off);
396 /* Read the blocks in this page. */
398 struct ntreg_hbin_block *block;
400 for (blkoff = off + 0x20;
401 blkoff < off + page_size;
405 int is_root = blkoff == h->rootoffs;
409 block = (struct ntreg_hbin_block *) (h->addr + blkoff);
411 seg_len = block_len (h, blkoff, &used);
412 if (seg_len <= 4 || (seg_len & 3) != 0) {
413 fprintf (stderr, "hivex: %s: block size %" PRIu32 " at 0x%zx, bad registry\n",
414 filename, le32toh (block->seg_len), blkoff);
420 fprintf (stderr, "hivex_open: %s block id %d,%d at 0x%zx size %zu%s\n",
421 used ? "used" : "free", block->id[0], block->id[1], blkoff,
422 seg_len, is_root ? " (root)" : "");
424 blocks_bytes += seg_len;
425 if (seg_len < smallest_block) smallest_block = seg_len;
426 if (seg_len > largest_block) largest_block = seg_len;
428 if (is_root && !used)
433 used_size += seg_len;
435 /* Root block must be an nk-block. */
436 if (is_root && (block->id[0] != 'n' || block->id[1] != 'k'))
439 /* Note this blkoff is a valid address. */
440 BITMAP_SET (h->bitmap, blkoff);
445 if (!seen_root_block) {
446 fprintf (stderr, "hivex: %s: no root block found\n", filename);
451 if (bad_root_block) {
452 fprintf (stderr, "hivex: %s: bad root block (free or not nk)\n", filename);
459 "hivex_open: successfully read Windows Registry hive file:\n"
460 " pages: %zu [sml: %zu, lge: %zu]\n"
461 " blocks: %zu [sml: %zu, avg: %zu, lge: %zu]\n"
462 " blocks used: %zu\n"
463 " bytes used: %zu\n",
464 pages, smallest_page, largest_page,
465 blocks, smallest_block, blocks_bytes / blocks, largest_block,
466 used_blocks, used_size);
474 if (h->addr && h->size && h->addr != MAP_FAILED)
475 munmap (h->addr, h->size);
486 hivex_close (hive_h *h)
491 munmap (h->addr, h->size);
500 hivex_root (hive_h *h)
502 hive_node_h ret = h->rootoffs;
503 if (!IS_VALID_BLOCK (h, ret)) {
511 hivex_node_name (hive_h *h, hive_node_h node)
513 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
518 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
520 /* AFAIK the node name is always plain ASCII, so no conversion
521 * to UTF-8 is necessary. However we do need to nul-terminate
525 /* nk->name_len is unsigned, 16 bit, so this is safe ... However
526 * we have to make sure the length doesn't exceed the block length.
528 size_t len = le16toh (nk->name_len);
529 size_t seg_len = block_len (h, node, NULL);
530 if (sizeof (struct ntreg_nk_record) + len - 1 > seg_len) {
532 fprintf (stderr, "hivex_node_name: returning EFAULT because node name is too long (%zu, %zu)\n",
538 char *ret = malloc (len + 1);
541 memcpy (ret, nk->name, len);
547 /* I think the documentation for the sk and classname fields in the nk
548 * record is wrong, or else the offset field is in the wrong place.
549 * Otherwise this makes no sense. Disabled this for now -- it's not
550 * useful for reading the registry anyway.
554 hivex_node_security (hive_h *h, hive_node_h node)
556 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
561 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
563 hive_node_h ret = le32toh (nk->sk);
565 if (!IS_VALID_BLOCK (h, ret)) {
573 hivex_node_classname (hive_h *h, hive_node_h node)
575 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
580 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
582 hive_node_h ret = le32toh (nk->classname);
584 if (!IS_VALID_BLOCK (h, ret)) {
592 /* Structure for returning 0-terminated lists of offsets (nodes,
602 init_offset_list (struct offset_list *list)
606 list->offsets = NULL;
609 #define INIT_OFFSET_LIST(name) \
610 struct offset_list name; \
611 init_offset_list (&name)
613 /* Preallocates the offset_list, but doesn't make the contents longer. */
615 grow_offset_list (struct offset_list *list, size_t alloc)
617 assert (alloc >= list->len);
618 size_t *p = realloc (list->offsets, alloc * sizeof (size_t));
627 add_to_offset_list (struct offset_list *list, size_t offset)
629 if (list->len >= list->alloc) {
630 if (grow_offset_list (list, list->alloc ? list->alloc * 2 : 4) == -1)
633 list->offsets[list->len] = offset;
639 free_offset_list (struct offset_list *list)
641 free (list->offsets);
645 return_offset_list (struct offset_list *list)
647 if (add_to_offset_list (list, 0) == -1)
649 return list->offsets; /* caller frees */
652 /* Iterate over children, returning child nodes and intermediate blocks. */
654 get_children (hive_h *h, hive_node_h node,
655 hive_node_h **children_ret, size_t **blocks_ret)
657 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
662 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
664 size_t nr_subkeys_in_nk = le32toh (nk->nr_subkeys);
666 INIT_OFFSET_LIST (children);
667 INIT_OFFSET_LIST (blocks);
669 /* Deal with the common "no subkeys" case quickly. */
670 if (nr_subkeys_in_nk == 0)
673 /* Arbitrarily limit the number of subkeys we will ever deal with. */
674 if (nr_subkeys_in_nk > 1000000) {
679 /* Preallocate space for the children. */
680 if (grow_offset_list (&children, nr_subkeys_in_nk) == -1)
683 /* The subkey_lf field can point either to an lf-record, which is
684 * the common case, or if there are lots of subkeys, to an
687 size_t subkey_lf = le32toh (nk->subkey_lf);
689 if (!IS_VALID_BLOCK (h, subkey_lf)) {
691 fprintf (stderr, "hivex_node_children: returning EFAULT because subkey_lf is not a valid block (%zu)\n",
697 if (add_to_offset_list (&blocks, subkey_lf) == -1)
700 struct ntreg_hbin_block *block =
701 (struct ntreg_hbin_block *) (h->addr + subkey_lf);
703 /* Points to lf-record? (Note, also "lh" but that is basically the
704 * same as "lf" as far as we are concerned here).
706 if (block->id[0] == 'l' && (block->id[1] == 'f' || block->id[1] == 'h')) {
707 struct ntreg_lf_record *lf = (struct ntreg_lf_record *) block;
709 /* Check number of subkeys in the nk-record matches number of subkeys
712 size_t nr_subkeys_in_lf = le16toh (lf->nr_keys);
715 fprintf (stderr, "hivex_node_children: nr_subkeys_in_nk = %zu, nr_subkeys_in_lf = %zu\n",
716 nr_subkeys_in_nk, nr_subkeys_in_lf);
718 if (nr_subkeys_in_nk != nr_subkeys_in_lf) {
723 size_t len = block_len (h, subkey_lf, NULL);
724 if (8 + nr_subkeys_in_lf * 8 > len) {
726 fprintf (stderr, "hivex_node_children: returning EFAULT because too many subkeys (%zu, %zu)\n",
727 nr_subkeys_in_lf, len);
733 for (i = 0; i < nr_subkeys_in_lf; ++i) {
734 hive_node_h subkey = le32toh (lf->keys[i].offset);
736 if (!IS_VALID_BLOCK (h, subkey)) {
738 fprintf (stderr, "hivex_node_children: returning EFAULT because subkey is not a valid block (0x%zx)\n",
743 if (add_to_offset_list (&children, subkey) == -1)
748 /* Points to ri-record? */
749 else if (block->id[0] == 'r' && block->id[1] == 'i') {
750 struct ntreg_ri_record *ri = (struct ntreg_ri_record *) block;
752 size_t nr_offsets = le16toh (ri->nr_offsets);
754 /* Count total number of children. */
756 for (i = 0; i < nr_offsets; ++i) {
757 hive_node_h offset = ri->offset[i];
759 if (!IS_VALID_BLOCK (h, offset)) {
761 fprintf (stderr, "hivex_node_children: returning EFAULT because ri-offset is not a valid block (0x%zx)\n",
766 if (!BLOCK_ID_EQ (h, offset, "lf") && !BLOCK_ID_EQ (h, offset, "lh")) {
771 if (add_to_offset_list (&blocks, offset) == -1)
774 struct ntreg_lf_record *lf =
775 (struct ntreg_lf_record *) (h->addr + offset);
777 count += le16toh (lf->nr_keys);
781 fprintf (stderr, "hivex_node_children: nr_subkeys_in_nk = %zu, counted = %zu\n",
782 nr_subkeys_in_nk, count);
784 if (nr_subkeys_in_nk != count) {
789 /* Copy list of children. Note nr_subkeys_in_nk is limited to
790 * something reasonable above.
792 for (i = 0; i < nr_offsets; ++i) {
793 hive_node_h offset = ri->offset[i];
795 if (!IS_VALID_BLOCK (h, offset)) {
797 fprintf (stderr, "hivex_node_children: returning EFAULT because ri-offset is not a valid block (0x%zx)\n",
802 if (!BLOCK_ID_EQ (h, offset, "lf") && !BLOCK_ID_EQ (h, offset, "lh")) {
807 struct ntreg_lf_record *lf =
808 (struct ntreg_lf_record *) (h->addr + offset);
811 for (j = 0; j < le16toh (lf->nr_keys); ++j) {
812 hive_node_h subkey = le32toh (lf->keys[j].offset);
814 if (!IS_VALID_BLOCK (h, subkey)) {
816 fprintf (stderr, "hivex_node_children: returning EFAULT because indirect subkey is not a valid block (0x%zx)\n",
821 if (add_to_offset_list (&children, subkey) == -1)
827 /* else not supported, set errno and fall through */
830 free_offset_list (&children);
831 free_offset_list (&blocks);
835 *children_ret = return_offset_list (&children);
836 *blocks_ret = return_offset_list (&blocks);
837 if (!*children_ret || !*blocks_ret)
843 hivex_node_children (hive_h *h, hive_node_h node)
845 hive_node_h *children;
848 if (get_children (h, node, &children, &blocks) == -1)
855 /* Very inefficient, but at least having a separate API call
856 * allows us to make it more efficient in future.
859 hivex_node_get_child (hive_h *h, hive_node_h node, const char *nname)
861 hive_node_h *children = NULL;
865 children = hivex_node_children (h, node);
866 if (!children) goto error;
869 for (i = 0; children[i] != 0; ++i) {
870 name = hivex_node_name (h, children[i]);
871 if (!name) goto error;
872 if (STRCASEEQ (name, nname)) {
876 free (name); name = NULL;
886 hivex_node_parent (hive_h *h, hive_node_h node)
888 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
893 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
895 hive_node_h ret = le32toh (nk->parent);
897 if (!IS_VALID_BLOCK (h, ret)) {
899 fprintf (stderr, "hivex_node_parent: returning EFAULT because parent is not a valid block (0x%zx)\n",
908 get_values (hive_h *h, hive_node_h node,
909 hive_value_h **values_ret, size_t **blocks_ret)
911 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
916 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
918 size_t nr_values = le32toh (nk->nr_values);
921 fprintf (stderr, "hivex_node_values: nr_values = %zu\n", nr_values);
923 INIT_OFFSET_LIST (values);
924 INIT_OFFSET_LIST (blocks);
926 /* Deal with the common "no values" case quickly. */
930 /* Arbitrarily limit the number of values we will ever deal with. */
931 if (nr_values > 100000) {
936 /* Preallocate space for the values. */
937 if (grow_offset_list (&values, nr_values) == -1)
940 /* Get the value list and check it looks reasonable. */
941 size_t vlist_offset = le32toh (nk->vallist);
942 vlist_offset += 0x1000;
943 if (!IS_VALID_BLOCK (h, vlist_offset)) {
945 fprintf (stderr, "hivex_node_values: returning EFAULT because value list is not a valid block (0x%zx)\n",
951 if (add_to_offset_list (&blocks, vlist_offset) == -1)
954 struct ntreg_value_list *vlist =
955 (struct ntreg_value_list *) (h->addr + vlist_offset);
957 size_t len = block_len (h, vlist_offset, NULL);
958 if (4 + nr_values * 4 > len) {
960 fprintf (stderr, "hivex_node_values: returning EFAULT because value list is too long (%zu, %zu)\n",
967 for (i = 0; i < nr_values; ++i) {
968 hive_node_h value = vlist->offset[i];
970 if (!IS_VALID_BLOCK (h, value)) {
972 fprintf (stderr, "hivex_node_values: returning EFAULT because value is not a valid block (0x%zx)\n",
977 if (add_to_offset_list (&values, value) == -1)
982 *values_ret = return_offset_list (&values);
983 *blocks_ret = return_offset_list (&blocks);
984 if (!*values_ret || !*blocks_ret)
989 free_offset_list (&values);
990 free_offset_list (&blocks);
995 hivex_node_values (hive_h *h, hive_node_h node)
997 hive_value_h *values;
1000 if (get_values (h, node, &values, &blocks) == -1)
1007 /* Very inefficient, but at least having a separate API call
1008 * allows us to make it more efficient in future.
1011 hivex_node_get_value (hive_h *h, hive_node_h node, const char *key)
1013 hive_value_h *values = NULL;
1015 hive_value_h ret = 0;
1017 values = hivex_node_values (h, node);
1018 if (!values) goto error;
1021 for (i = 0; values[i] != 0; ++i) {
1022 name = hivex_value_key (h, values[i]);
1023 if (!name) goto error;
1024 if (STRCASEEQ (name, key)) {
1028 free (name); name = NULL;
1038 hivex_value_key (hive_h *h, hive_value_h value)
1040 if (!IS_VALID_BLOCK (h, value) || !BLOCK_ID_EQ (h, value, "vk")) {
1045 struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + value);
1047 /* AFAIK the key is always plain ASCII, so no conversion to UTF-8 is
1048 * necessary. However we do need to nul-terminate the string.
1051 /* vk->name_len is unsigned, 16 bit, so this is safe ... However
1052 * we have to make sure the length doesn't exceed the block length.
1054 size_t len = le16toh (vk->name_len);
1055 size_t seg_len = block_len (h, value, NULL);
1056 if (sizeof (struct ntreg_vk_record) + len - 1 > seg_len) {
1058 fprintf (stderr, "hivex_value_key: returning EFAULT because key length is too long (%zu, %zu)\n",
1064 char *ret = malloc (len + 1);
1067 memcpy (ret, vk->name, len);
1073 hivex_value_type (hive_h *h, hive_value_h value, hive_type *t, size_t *len)
1075 if (!IS_VALID_BLOCK (h, value) || !BLOCK_ID_EQ (h, value, "vk")) {
1080 struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + value);
1083 *t = le32toh (vk->data_type);
1086 *len = le32toh (vk->data_len);
1087 if (*len == 0x80000000) { /* special case */
1089 if (t) *t = hive_t_dword;
1098 hivex_value_value (hive_h *h, hive_value_h value,
1099 hive_type *t_rtn, size_t *len_rtn)
1101 if (!IS_VALID_BLOCK (h, value) || !BLOCK_ID_EQ (h, value, "vk")) {
1106 struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + value);
1111 t = le32toh (vk->data_type);
1113 len = le32toh (vk->data_len);
1114 if (len == 0x80000000) { /* special case */
1121 fprintf (stderr, "hivex_value_value: value=0x%zx, t=%d, len=%zu\n",
1129 /* Arbitrarily limit the length that we will read. */
1130 if (len > 1000000) {
1135 char *ret = malloc (len);
1139 /* If length is <= 4 it's always stored inline. */
1141 memcpy (ret, (char *) &vk->data_offset, len);
1145 size_t data_offset = le32toh (vk->data_offset);
1146 data_offset += 0x1000;
1147 if (!IS_VALID_BLOCK (h, data_offset)) {
1149 fprintf (stderr, "hivex_value_value: returning EFAULT because data offset is not a valid block (0x%zx)\n",
1156 /* Check that the declared size isn't larger than the block its in. */
1157 size_t blen = block_len (h, data_offset, NULL);
1158 if (len > blen - 4 /* subtract 4 for block header */) {
1160 fprintf (stderr, "hivex_value_value: returning EFAULT because data is longer than its block (data 0x%zx, data len %zu, block len %zu)\n",
1161 data_offset, len, blen);
1167 char *data = h->addr + data_offset + 4;
1168 memcpy (ret, data, len);
1173 windows_utf16_to_utf8 (/* const */ char *input, size_t len)
1175 iconv_t ic = iconv_open ("UTF-8", "UTF-16");
1176 if (ic == (iconv_t) -1)
1179 /* iconv(3) has an insane interface ... */
1181 /* Mostly UTF-8 will be smaller, so this is a good initial guess. */
1182 size_t outalloc = len;
1186 size_t outlen = outalloc;
1187 char *out = malloc (outlen + 1);
1197 size_t r = iconv (ic, &inp, &inlen, &outp, &outlen);
1198 if (r == (size_t) -1) {
1199 if (errno == E2BIG) {
1200 size_t prev = outalloc;
1201 /* Try again with a larger output buffer. */
1204 if (outalloc < prev)
1209 /* Else some conversion failure, eg. EILSEQ, EINVAL. */
1225 hivex_value_string (hive_h *h, hive_value_h value)
1229 char *data = hivex_value_value (h, value, &t, &len);
1234 if (t != hive_t_string && t != hive_t_expand_string && t != hive_t_link) {
1240 char *ret = windows_utf16_to_utf8 (data, len);
1249 free_strings (char **argv)
1254 for (i = 0; argv[i] != NULL; ++i)
1260 /* Get the length of a UTF-16 format string. Handle the string as
1261 * pairs of bytes, looking for the first \0\0 pair.
1264 utf16_string_len_in_bytes (const char *str)
1268 while (str[0] || str[1]) {
1276 /* http://blogs.msdn.com/oldnewthing/archive/2009/10/08/9904646.aspx */
1278 hivex_value_multiple_strings (hive_h *h, hive_value_h value)
1282 char *data = hivex_value_value (h, value, &t, &len);
1287 if (t != hive_t_multiple_strings) {
1293 size_t nr_strings = 0;
1294 char **ret = malloc ((1 + nr_strings) * sizeof (char *));
1304 while (p < data + len && (plen = utf16_string_len_in_bytes (p)) > 0) {
1306 char **ret2 = realloc (ret, (1 + nr_strings) * sizeof (char *));
1314 ret[nr_strings-1] = windows_utf16_to_utf8 (p, plen);
1315 ret[nr_strings] = NULL;
1316 if (ret[nr_strings-1] == NULL) {
1322 p += plen + 2 /* skip over UTF-16 \0\0 at the end of this string */;
1330 hivex_value_dword (hive_h *h, hive_value_h value)
1334 char *data = hivex_value_value (h, value, &t, &len);
1339 if ((t != hive_t_dword && t != hive_t_dword_be) || len != 4) {
1345 int32_t ret = *(int32_t*)data;
1347 if (t == hive_t_dword) /* little endian */
1348 ret = le32toh (ret);
1350 ret = be32toh (ret);
1356 hivex_value_qword (hive_h *h, hive_value_h value)
1360 char *data = hivex_value_value (h, value, &t, &len);
1365 if (t != hive_t_qword || len != 8) {
1371 int64_t ret = *(int64_t*)data;
1373 ret = le64toh (ret); /* always little endian */
1379 hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len,
1380 void *opaque, int flags)
1382 return hivex_visit_node (h, hivex_root (h), visitor, len, opaque, flags);
1385 static int hivex__visit_node (hive_h *h, hive_node_h node, const struct hivex_visitor *vtor, char *unvisited, void *opaque, int flags);
1388 hivex_visit_node (hive_h *h, hive_node_h node,
1389 const struct hivex_visitor *visitor, size_t len, void *opaque,
1392 struct hivex_visitor vtor;
1393 memset (&vtor, 0, sizeof vtor);
1395 /* Note that len might be larger *or smaller* than the expected size. */
1396 size_t copysize = len <= sizeof vtor ? len : sizeof vtor;
1397 memcpy (&vtor, visitor, copysize);
1399 /* This bitmap records unvisited nodes, so we don't loop if the
1400 * registry contains cycles.
1402 char *unvisited = malloc (1 + h->size / 32);
1403 if (unvisited == NULL)
1405 memcpy (unvisited, h->bitmap, 1 + h->size / 32);
1407 int r = hivex__visit_node (h, node, &vtor, unvisited, opaque, flags);
1413 hivex__visit_node (hive_h *h, hive_node_h node,
1414 const struct hivex_visitor *vtor, char *unvisited,
1415 void *opaque, int flags)
1417 int skip_bad = flags & HIVEX_VISIT_SKIP_BAD;
1419 hive_value_h *values = NULL;
1420 hive_node_h *children = NULL;
1426 /* Return -1 on all callback errors. However on internal errors,
1427 * check if skip_bad is set and suppress those errors if so.
1431 if (!BITMAP_TST (unvisited, node)) {
1433 fprintf (stderr, "hivex__visit_node: contains cycle: visited node 0x%zx already\n",
1437 return skip_bad ? 0 : -1;
1439 BITMAP_CLR (unvisited, node);
1441 name = hivex_node_name (h, node);
1442 if (!name) return skip_bad ? 0 : -1;
1443 if (vtor->node_start && vtor->node_start (h, opaque, node, name) == -1)
1446 values = hivex_node_values (h, node);
1448 ret = skip_bad ? 0 : -1;
1452 for (i = 0; values[i] != 0; ++i) {
1456 if (hivex_value_type (h, values[i], &t, &len) == -1) {
1457 ret = skip_bad ? 0 : -1;
1461 key = hivex_value_key (h, values[i]);
1463 ret = skip_bad ? 0 : -1;
1467 if (vtor->value_any) {
1468 str = hivex_value_value (h, values[i], &t, &len);
1470 ret = skip_bad ? 0 : -1;
1473 if (vtor->value_any (h, opaque, node, values[i], t, len, key, str) == -1)
1475 free (str); str = NULL;
1480 str = hivex_value_value (h, values[i], &t, &len);
1482 ret = skip_bad ? 0 : -1;
1485 if (t != hive_t_none) {
1486 ret = skip_bad ? 0 : -1;
1489 if (vtor->value_none &&
1490 vtor->value_none (h, opaque, node, values[i], t, len, key, str) == -1)
1492 free (str); str = NULL;
1496 case hive_t_expand_string:
1498 str = hivex_value_string (h, values[i]);
1500 if (errno != EILSEQ && errno != EINVAL) {
1501 ret = skip_bad ? 0 : -1;
1504 if (vtor->value_string_invalid_utf16) {
1505 str = hivex_value_value (h, values[i], &t, &len);
1506 if (vtor->value_string_invalid_utf16 (h, opaque, node, values[i], t, len, key, str) == -1)
1508 free (str); str = NULL;
1512 if (vtor->value_string &&
1513 vtor->value_string (h, opaque, node, values[i], t, len, key, str) == -1)
1515 free (str); str = NULL;
1519 case hive_t_dword_be: {
1520 int32_t i32 = hivex_value_dword (h, values[i]);
1521 if (vtor->value_dword &&
1522 vtor->value_dword (h, opaque, node, values[i], t, len, key, i32) == -1)
1527 case hive_t_qword: {
1528 int64_t i64 = hivex_value_qword (h, values[i]);
1529 if (vtor->value_qword &&
1530 vtor->value_qword (h, opaque, node, values[i], t, len, key, i64) == -1)
1536 str = hivex_value_value (h, values[i], &t, &len);
1538 ret = skip_bad ? 0 : -1;
1541 if (t != hive_t_binary) {
1542 ret = skip_bad ? 0 : -1;
1545 if (vtor->value_binary &&
1546 vtor->value_binary (h, opaque, node, values[i], t, len, key, str) == -1)
1548 free (str); str = NULL;
1551 case hive_t_multiple_strings:
1552 strs = hivex_value_multiple_strings (h, values[i]);
1554 if (errno != EILSEQ && errno != EINVAL) {
1555 ret = skip_bad ? 0 : -1;
1558 if (vtor->value_string_invalid_utf16) {
1559 str = hivex_value_value (h, values[i], &t, &len);
1560 if (vtor->value_string_invalid_utf16 (h, opaque, node, values[i], t, len, key, str) == -1)
1562 free (str); str = NULL;
1566 if (vtor->value_multiple_strings &&
1567 vtor->value_multiple_strings (h, opaque, node, values[i], t, len, key, strs) == -1)
1569 free_strings (strs); strs = NULL;
1572 case hive_t_resource_list:
1573 case hive_t_full_resource_description:
1574 case hive_t_resource_requirements_list:
1576 str = hivex_value_value (h, values[i], &t, &len);
1578 ret = skip_bad ? 0 : -1;
1581 if (vtor->value_other &&
1582 vtor->value_other (h, opaque, node, values[i], t, len, key, str) == -1)
1584 free (str); str = NULL;
1589 free (key); key = NULL;
1592 children = hivex_node_children (h, node);
1593 if (children == NULL) {
1594 ret = skip_bad ? 0 : -1;
1598 for (i = 0; children[i] != 0; ++i) {
1600 fprintf (stderr, "hivex__visit_node: %s: visiting subkey %d (0x%zx)\n",
1601 name, i, children[i]);
1603 if (hivex__visit_node (h, children[i], vtor, unvisited, opaque, flags) == -1)
1607 if (vtor->node_end && vtor->node_end (h, opaque, node, name) == -1)
1618 free_strings (strs);