1 /* hivex - Windows Registry "hive" extraction library.
2 * Copyright (C) 2009-2011 Red Hat Inc.
3 * Derived from code by Petter Nordahl-Hagen under a compatible license:
4 * Copyright (c) 1997-2007 Petter Nordahl-Hagen.
5 * Derived from code by Markus Stephany under a compatible license:
6 * Copyright (c) 2000-2004, Markus Stephany.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation;
11 * version 2.1 of the License.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * See file LICENSE for the full license.
39 /* On systems without mmap (and munmap), use a replacement function. */
44 #include "full-read.h"
45 #include "full-write.h"
48 #include "hivex-internal.h"
49 #include "byte_conversions.h"
51 /* These limits are in place to stop really stupid stuff and/or exploits. */
52 #define HIVEX_MAX_SUBKEYS 15000
53 #define HIVEX_MAX_VALUES 10000
54 #define HIVEX_MAX_VALUE_LEN 1000000
55 #define HIVEX_MAX_ALLOCATION 1000000
57 static char *windows_utf16_to_utf8 (/* const */ char *input, size_t len);
58 static size_t utf16_string_len_in_bytes_max (const char *str, size_t len);
60 /* NB. All fields are little endian. */
62 char magic[4]; /* "regf" */
65 int64_t last_modified;
66 uint32_t major_ver; /* 1 */
67 uint32_t minor_ver; /* 3 */
68 uint32_t unknown5; /* 0 */
69 uint32_t unknown6; /* 1 */
70 uint32_t offset; /* offset of root key record - 4KB */
71 uint32_t blocks; /* pointer AFTER last hbin in file - 4KB */
72 uint32_t unknown7; /* 1 */
74 char name[64]; /* original file name of hive */
75 char unknown_guid1[16];
76 char unknown_guid2[16];
79 char unknown_guid3[16];
84 uint32_t csum; /* checksum: xor of dwords 0-0x1fb. */
88 char unknown_guid4[16];
89 char unknown_guid5[16];
90 char unknown_guid6[16];
94 } __attribute__((__packed__));
96 struct ntreg_hbin_page {
97 char magic[4]; /* "hbin" */
98 uint32_t offset_first; /* offset from 1st block */
99 uint32_t page_size; /* size of this page (multiple of 4KB) */
101 /* Linked list of blocks follows here. */
102 } __attribute__((__packed__));
104 struct ntreg_hbin_block {
105 int32_t seg_len; /* length of this block (-ve for used block) */
106 char id[2]; /* the block type (eg. "nk" for nk record) */
107 /* Block data follows here. */
108 } __attribute__((__packed__));
110 #define BLOCK_ID_EQ(h,offs,eqid) \
111 (STREQLEN (((struct ntreg_hbin_block *)((h)->addr + (offs)))->id, (eqid), 2))
114 block_len (hive_h *h, size_t blkoff, int *used)
116 struct ntreg_hbin_block *block;
117 block = (struct ntreg_hbin_block *) (h->addr + blkoff);
119 int32_t len = le32toh (block->seg_len);
130 struct ntreg_nk_record {
131 int32_t seg_len; /* length (always -ve because used) */
132 char id[2]; /* "nk" */
136 uint32_t parent; /* offset of owner/parent */
137 uint32_t nr_subkeys; /* number of subkeys */
138 uint32_t nr_subkeys_volatile;
139 uint32_t subkey_lf; /* lf record containing list of subkeys */
140 uint32_t subkey_lf_volatile;
141 uint32_t nr_values; /* number of values */
142 uint32_t vallist; /* value-list record */
143 uint32_t sk; /* offset of sk-record */
144 uint32_t classname; /* offset of classname record */
145 uint16_t max_subkey_name_len; /* maximum length of a subkey name in bytes
146 if the subkey was reencoded as UTF-16LE */
149 uint32_t max_vk_name_len; /* maximum length of any vk name in bytes
150 if the name was reencoded as UTF-16LE */
151 uint32_t max_vk_data_len; /* maximum length of any vk data in bytes */
153 uint16_t name_len; /* length of name */
154 uint16_t classname_len; /* length of classname */
155 char name[1]; /* name follows here */
156 } __attribute__((__packed__));
158 struct ntreg_lf_record {
160 char id[2]; /* "lf"|"lh" */
161 uint16_t nr_keys; /* number of keys in this record */
163 uint32_t offset; /* offset of nk-record for this subkey */
164 char hash[4]; /* hash of subkey name */
166 } __attribute__((__packed__));
168 struct ntreg_ri_record {
170 char id[2]; /* "ri" */
171 uint16_t nr_offsets; /* number of pointers to lh records */
172 uint32_t offset[1]; /* list of pointers to lh records */
173 } __attribute__((__packed__));
175 /* This has no ID header. */
176 struct ntreg_value_list {
178 uint32_t offset[1]; /* list of pointers to vk records */
179 } __attribute__((__packed__));
181 struct ntreg_vk_record {
182 int32_t seg_len; /* length (always -ve because used) */
183 char id[2]; /* "vk" */
184 uint16_t name_len; /* length of name */
185 /* length of the data:
186 * If data_len is <= 4, then it's stored inline.
187 * Top bit is set to indicate inline.
190 uint32_t data_offset; /* pointer to the data (or data if inline) */
191 uint32_t data_type; /* type of the data */
192 uint16_t flags; /* bit 0 set => key name ASCII,
193 bit 0 clr => key name UTF-16.
194 Only seen ASCII here in the wild.
195 NB: this is CLEAR for default key. */
197 char name[1]; /* key name follows here */
198 } __attribute__((__packed__));
200 struct ntreg_sk_record {
201 int32_t seg_len; /* length (always -ve because used) */
202 char id[2]; /* "sk" */
204 uint32_t sk_next; /* linked into a circular list */
206 uint32_t refcount; /* reference count */
207 uint32_t sec_len; /* length of security info */
208 char sec_desc[1]; /* security info follows */
209 } __attribute__((__packed__));
212 header_checksum (const hive_h *h)
214 uint32_t *daddr = (uint32_t *) h->addr;
218 for (i = 0; i < 0x1fc / 4; ++i) {
219 sum ^= le32toh (*daddr);
226 #define HIVEX_OPEN_MSGLVL_MASK (HIVEX_OPEN_VERBOSE|HIVEX_OPEN_DEBUG)
229 hivex_open (const char *filename, int flags)
233 assert (sizeof (struct ntreg_header) == 0x1000);
234 assert (offsetof (struct ntreg_header, csum) == 0x1fc);
236 h = calloc (1, sizeof *h);
240 h->msglvl = flags & HIVEX_OPEN_MSGLVL_MASK;
242 const char *debug = getenv ("HIVEX_DEBUG");
243 if (debug && STREQ (debug, "1"))
247 fprintf (stderr, "hivex_open: created handle %p\n", h);
249 h->writable = !!(flags & HIVEX_OPEN_WRITE);
250 h->filename = strdup (filename);
251 if (h->filename == NULL)
255 h->fd = open (filename, O_RDONLY | O_CLOEXEC);
257 h->fd = open (filename, O_RDONLY);
262 fcntl (h->fd, F_SETFD, FD_CLOEXEC);
266 if (fstat (h->fd, &statbuf) == -1)
269 h->size = statbuf.st_size;
272 h->addr = mmap (NULL, h->size, PROT_READ, MAP_SHARED, h->fd, 0);
273 if (h->addr == MAP_FAILED)
277 fprintf (stderr, "hivex_open: mapped file at %p\n", h->addr);
279 h->addr = malloc (h->size);
283 if (full_read (h->fd, h->addr, h->size) < h->size)
286 /* We don't need the file descriptor along this path, since we
287 * have read all the data.
289 if (close (h->fd) == -1)
295 if (h->hdr->magic[0] != 'r' ||
296 h->hdr->magic[1] != 'e' ||
297 h->hdr->magic[2] != 'g' ||
298 h->hdr->magic[3] != 'f') {
299 fprintf (stderr, "hivex: %s: not a Windows NT Registry hive file\n",
305 /* Check major version. */
306 uint32_t major_ver = le32toh (h->hdr->major_ver);
307 if (major_ver != 1) {
309 "hivex: %s: hive file major version %" PRIu32 " (expected 1)\n",
310 filename, major_ver);
315 h->bitmap = calloc (1 + h->size / 32, 1);
316 if (h->bitmap == NULL)
319 /* Header checksum. */
320 uint32_t sum = header_checksum (h);
321 if (sum != le32toh (h->hdr->csum)) {
322 fprintf (stderr, "hivex: %s: bad checksum in hive header\n", filename);
327 /* Last modified time. */
328 h->last_modified = le64toh ((int64_t) h->hdr->last_modified);
330 if (h->msglvl >= 2) {
331 char *name = windows_utf16_to_utf8 (h->hdr->name, 64);
334 "hivex_open: header fields:\n"
335 " file version %" PRIu32 ".%" PRIu32 "\n"
336 " sequence nos %" PRIu32 " %" PRIu32 "\n"
337 " (sequences nos should match if hive was synched at shutdown)\n"
338 " last modified %" PRIu64 "\n"
339 " (Windows filetime, x 100 ns since 1601-01-01)\n"
340 " original file name %s\n"
341 " (only 32 chars are stored, name is probably truncated)\n"
342 " root offset 0x%x + 0x1000\n"
343 " end of last page 0x%x + 0x1000 (total file size 0x%zx)\n"
344 " checksum 0x%x (calculated 0x%x)\n",
345 major_ver, le32toh (h->hdr->minor_ver),
346 le32toh (h->hdr->sequence1), le32toh (h->hdr->sequence2),
348 name ? name : "(conversion failed)",
349 le32toh (h->hdr->offset),
350 le32toh (h->hdr->blocks), h->size,
351 le32toh (h->hdr->csum), sum);
355 h->rootoffs = le32toh (h->hdr->offset) + 0x1000;
356 h->endpages = le32toh (h->hdr->blocks) + 0x1000;
359 fprintf (stderr, "hivex_open: root offset = 0x%zx\n", h->rootoffs);
361 /* We'll set this flag when we see a block with the root offset (ie.
364 int seen_root_block = 0, bad_root_block = 0;
366 /* Collect some stats. */
367 size_t pages = 0; /* Number of hbin pages read. */
368 size_t smallest_page = SIZE_MAX, largest_page = 0;
369 size_t blocks = 0; /* Total number of blocks found. */
370 size_t smallest_block = SIZE_MAX, largest_block = 0, blocks_bytes = 0;
371 size_t used_blocks = 0; /* Total number of used blocks found. */
372 size_t used_size = 0; /* Total size (bytes) of used blocks. */
374 /* Read the pages and blocks. The aim here is to be robust against
375 * corrupt or malicious registries. So we make sure the loops
376 * always make forward progress. We add the address of each block
377 * we read to a hash table so pointers will only reference the start
381 struct ntreg_hbin_page *page;
382 for (off = 0x1000; off < h->size; off += le32toh (page->page_size)) {
383 if (off >= h->endpages)
386 page = (struct ntreg_hbin_page *) (h->addr + off);
387 if (page->magic[0] != 'h' ||
388 page->magic[1] != 'b' ||
389 page->magic[2] != 'i' ||
390 page->magic[3] != 'n') {
391 fprintf (stderr, "hivex: %s: trailing garbage at end of file "
392 "(at 0x%zx, after %zu pages)\n",
393 filename, off, pages);
398 size_t page_size = le32toh (page->page_size);
400 fprintf (stderr, "hivex_open: page at 0x%zx, size %zu\n", off, page_size);
402 if (page_size < smallest_page) smallest_page = page_size;
403 if (page_size > largest_page) largest_page = page_size;
405 if (page_size <= sizeof (struct ntreg_hbin_page) ||
406 (page_size & 0x0fff) != 0) {
407 fprintf (stderr, "hivex: %s: page size %zu at 0x%zx, bad registry\n",
408 filename, page_size, off);
413 /* Read the blocks in this page. */
415 struct ntreg_hbin_block *block;
417 for (blkoff = off + 0x20;
418 blkoff < off + page_size;
422 int is_root = blkoff == h->rootoffs;
426 block = (struct ntreg_hbin_block *) (h->addr + blkoff);
428 seg_len = block_len (h, blkoff, &used);
429 if (seg_len <= 4 || (seg_len & 3) != 0) {
430 fprintf (stderr, "hivex: %s: block size %" PRIu32 " at 0x%zx,"
432 filename, le32toh (block->seg_len), blkoff);
438 fprintf (stderr, "hivex_open: %s block id %d,%d at 0x%zx size %zu%s\n",
439 used ? "used" : "free", block->id[0], block->id[1], blkoff,
440 seg_len, is_root ? " (root)" : "");
442 blocks_bytes += seg_len;
443 if (seg_len < smallest_block) smallest_block = seg_len;
444 if (seg_len > largest_block) largest_block = seg_len;
446 if (is_root && !used)
451 used_size += seg_len;
453 /* Root block must be an nk-block. */
454 if (is_root && (block->id[0] != 'n' || block->id[1] != 'k'))
457 /* Note this blkoff is a valid address. */
458 BITMAP_SET (h->bitmap, blkoff);
463 if (!seen_root_block) {
464 fprintf (stderr, "hivex: %s: no root block found\n", filename);
469 if (bad_root_block) {
470 fprintf (stderr, "hivex: %s: bad root block (free or not nk)\n", filename);
477 "hivex_open: successfully read Windows Registry hive file:\n"
478 " pages: %zu [sml: %zu, lge: %zu]\n"
479 " blocks: %zu [sml: %zu, avg: %zu, lge: %zu]\n"
480 " blocks used: %zu\n"
481 " bytes used: %zu\n",
482 pages, smallest_page, largest_page,
483 blocks, smallest_block, blocks_bytes / blocks, largest_block,
484 used_blocks, used_size);
492 if (h->addr && h->size && h->addr != MAP_FAILED) {
494 munmap (h->addr, h->size);
508 hivex_close (hive_h *h)
513 fprintf (stderr, "hivex_close\n");
517 munmap (h->addr, h->size);
530 /*----------------------------------------------------------------------
535 hivex_root (hive_h *h)
537 hive_node_h ret = h->rootoffs;
538 if (!IS_VALID_BLOCK (h, ret)) {
539 errno = HIVEX_NO_KEY;
546 hivex_node_struct_length (hive_h *h, hive_node_h node)
548 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
553 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
554 size_t name_len = le16toh (nk->name_len);
555 /* -1 to avoid double-counting the first name character */
556 size_t ret = name_len + sizeof (struct ntreg_nk_record) - 1;
558 size_t seg_len = block_len (h, node, &used);
561 fprintf (stderr, "hivex_node_struct_length: returning EFAULT because"
562 " node name is too long (%zu, %zu)\n", name_len, seg_len);
570 hivex_node_name (hive_h *h, hive_node_h node)
572 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
577 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
579 /* AFAIK the node name is always plain ASCII, so no conversion
580 * to UTF-8 is necessary. However we do need to nul-terminate
584 /* nk->name_len is unsigned, 16 bit, so this is safe ... However
585 * we have to make sure the length doesn't exceed the block length.
587 size_t len = le16toh (nk->name_len);
588 size_t seg_len = block_len (h, node, NULL);
589 if (sizeof (struct ntreg_nk_record) + len - 1 > seg_len) {
591 fprintf (stderr, "hivex_node_name: returning EFAULT because node name"
592 " is too long (%zu, %zu)\n",
598 char *ret = malloc (len + 1);
601 memcpy (ret, nk->name, len);
607 timestamp_check (hive_h *h, hive_node_h node, int64_t timestamp)
611 fprintf (stderr, "hivex: timestamp_check: "
612 "negative time reported at %zu: %" PRIi64 "\n",
622 hivex_last_modified (hive_h *h)
624 return timestamp_check (h, 0, h->last_modified);
628 hivex_node_timestamp (hive_h *h, hive_node_h node)
632 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
637 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
639 ret = le64toh (nk->timestamp);
640 return timestamp_check (h, node, ret);
644 /* I think the documentation for the sk and classname fields in the nk
645 * record is wrong, or else the offset field is in the wrong place.
646 * Otherwise this makes no sense. Disabled this for now -- it's not
647 * useful for reading the registry anyway.
651 hivex_node_security (hive_h *h, hive_node_h node)
653 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
658 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
660 hive_node_h ret = le32toh (nk->sk);
662 if (!IS_VALID_BLOCK (h, ret)) {
670 hivex_node_classname (hive_h *h, hive_node_h node)
672 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
677 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
679 hive_node_h ret = le32toh (nk->classname);
681 if (!IS_VALID_BLOCK (h, ret)) {
689 /* Structure for returning 0-terminated lists of offsets (nodes,
699 init_offset_list (struct offset_list *list)
703 list->offsets = NULL;
706 #define INIT_OFFSET_LIST(name) \
707 struct offset_list name; \
708 init_offset_list (&name)
710 /* Preallocates the offset_list, but doesn't make the contents longer. */
712 grow_offset_list (struct offset_list *list, size_t alloc)
714 assert (alloc >= list->len);
715 size_t *p = realloc (list->offsets, alloc * sizeof (size_t));
724 add_to_offset_list (struct offset_list *list, size_t offset)
726 if (list->len >= list->alloc) {
727 if (grow_offset_list (list, list->alloc ? list->alloc * 2 : 4) == -1)
730 list->offsets[list->len] = offset;
736 free_offset_list (struct offset_list *list)
738 free (list->offsets);
742 return_offset_list (struct offset_list *list)
744 if (add_to_offset_list (list, 0) == -1)
746 return list->offsets; /* caller frees */
749 /* Iterate over children, returning child nodes and intermediate blocks. */
750 #define GET_CHILDREN_NO_CHECK_NK 1
753 get_children (hive_h *h, hive_node_h node,
754 hive_node_h **children_ret, size_t **blocks_ret,
757 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
762 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
764 size_t nr_subkeys_in_nk = le32toh (nk->nr_subkeys);
766 INIT_OFFSET_LIST (children);
767 INIT_OFFSET_LIST (blocks);
769 /* Deal with the common "no subkeys" case quickly. */
770 if (nr_subkeys_in_nk == 0)
773 /* Arbitrarily limit the number of subkeys we will ever deal with. */
774 if (nr_subkeys_in_nk > HIVEX_MAX_SUBKEYS) {
776 fprintf (stderr, "hivex: get_children: returning ERANGE because "
777 "nr_subkeys_in_nk > HIVEX_MAX_SUBKEYS (%zu > %d)\n",
778 nr_subkeys_in_nk, HIVEX_MAX_SUBKEYS);
783 /* Preallocate space for the children. */
784 if (grow_offset_list (&children, nr_subkeys_in_nk) == -1)
787 /* The subkey_lf field can point either to an lf-record, which is
788 * the common case, or if there are lots of subkeys, to an
791 size_t subkey_lf = le32toh (nk->subkey_lf);
793 if (!IS_VALID_BLOCK (h, subkey_lf)) {
795 fprintf (stderr, "hivex_node_children: returning EFAULT"
796 " because subkey_lf is not a valid block (0x%zx)\n",
802 if (add_to_offset_list (&blocks, subkey_lf) == -1)
805 struct ntreg_hbin_block *block =
806 (struct ntreg_hbin_block *) (h->addr + subkey_lf);
808 /* Points to lf-record? (Note, also "lh" but that is basically the
809 * same as "lf" as far as we are concerned here).
811 if (block->id[0] == 'l' && (block->id[1] == 'f' || block->id[1] == 'h')) {
812 struct ntreg_lf_record *lf = (struct ntreg_lf_record *) block;
814 /* Check number of subkeys in the nk-record matches number of subkeys
817 size_t nr_subkeys_in_lf = le16toh (lf->nr_keys);
820 fprintf (stderr, "hivex_node_children: nr_subkeys_in_nk = %zu,"
821 " nr_subkeys_in_lf = %zu\n",
822 nr_subkeys_in_nk, nr_subkeys_in_lf);
824 if (nr_subkeys_in_nk != nr_subkeys_in_lf) {
829 size_t len = block_len (h, subkey_lf, NULL);
830 if (8 + nr_subkeys_in_lf * 8 > len) {
832 fprintf (stderr, "hivex_node_children: returning EFAULT"
833 " because too many subkeys (%zu, %zu)\n",
834 nr_subkeys_in_lf, len);
840 for (i = 0; i < nr_subkeys_in_lf; ++i) {
841 hive_node_h subkey = le32toh (lf->keys[i].offset);
843 if (!(flags & GET_CHILDREN_NO_CHECK_NK)) {
844 if (!IS_VALID_BLOCK (h, subkey)) {
846 fprintf (stderr, "hivex_node_children: returning EFAULT"
847 " because subkey is not a valid block (0x%zx)\n",
853 if (add_to_offset_list (&children, subkey) == -1)
858 /* Points to ri-record? */
859 else if (block->id[0] == 'r' && block->id[1] == 'i') {
860 struct ntreg_ri_record *ri = (struct ntreg_ri_record *) block;
862 size_t nr_offsets = le16toh (ri->nr_offsets);
864 /* Count total number of children. */
866 for (i = 0; i < nr_offsets; ++i) {
867 hive_node_h offset = le32toh (ri->offset[i]);
869 if (!IS_VALID_BLOCK (h, offset)) {
871 fprintf (stderr, "hivex_node_children: returning EFAULT"
872 " because ri-offset is not a valid block (0x%zx)\n",
877 if (!BLOCK_ID_EQ (h, offset, "lf") && !BLOCK_ID_EQ (h, offset, "lh")) {
879 fprintf (stderr, "get_children: returning ENOTSUP"
880 " because ri-record offset does not point to lf/lh (0x%zx)\n",
886 if (add_to_offset_list (&blocks, offset) == -1)
889 struct ntreg_lf_record *lf =
890 (struct ntreg_lf_record *) (h->addr + offset);
892 count += le16toh (lf->nr_keys);
896 fprintf (stderr, "hivex_node_children: nr_subkeys_in_nk = %zu,"
898 nr_subkeys_in_nk, count);
900 if (nr_subkeys_in_nk != count) {
905 /* Copy list of children. Note nr_subkeys_in_nk is limited to
906 * something reasonable above.
908 for (i = 0; i < nr_offsets; ++i) {
909 hive_node_h offset = le32toh (ri->offset[i]);
911 if (!IS_VALID_BLOCK (h, offset)) {
913 fprintf (stderr, "hivex_node_children: returning EFAULT"
914 " because ri-offset is not a valid block (0x%zx)\n",
919 if (!BLOCK_ID_EQ (h, offset, "lf") && !BLOCK_ID_EQ (h, offset, "lh")) {
921 fprintf (stderr, "get_children: returning ENOTSUP"
922 " because ri-record offset does not point to lf/lh (0x%zx)\n",
928 struct ntreg_lf_record *lf =
929 (struct ntreg_lf_record *) (h->addr + offset);
932 for (j = 0; j < le16toh (lf->nr_keys); ++j) {
933 hive_node_h subkey = le32toh (lf->keys[j].offset);
935 if (!(flags & GET_CHILDREN_NO_CHECK_NK)) {
936 if (!IS_VALID_BLOCK (h, subkey)) {
938 fprintf (stderr, "hivex_node_children: returning EFAULT"
939 " because indirect subkey is not a valid block (0x%zx)\n",
945 if (add_to_offset_list (&children, subkey) == -1)
951 /* else not supported, set errno and fall through */
953 fprintf (stderr, "get_children: returning ENOTSUP"
954 " because subkey block is not lf/lh/ri (0x%zx, %d, %d)\n",
955 subkey_lf, block->id[0], block->id[1]);
958 free_offset_list (&children);
959 free_offset_list (&blocks);
963 *children_ret = return_offset_list (&children);
964 *blocks_ret = return_offset_list (&blocks);
965 if (!*children_ret || !*blocks_ret)
971 hivex_node_children (hive_h *h, hive_node_h node)
973 hive_node_h *children;
976 if (get_children (h, node, &children, &blocks, 0) == -1)
983 /* Very inefficient, but at least having a separate API call
984 * allows us to make it more efficient in future.
987 hivex_node_get_child (hive_h *h, hive_node_h node, const char *nname)
989 hive_node_h *children = NULL;
993 children = hivex_node_children (h, node);
994 if (!children) goto error;
997 for (i = 0; children[i] != 0; ++i) {
998 name = hivex_node_name (h, children[i]);
999 if (!name) goto error;
1000 if (STRCASEEQ (name, nname)) {
1004 free (name); name = NULL;
1014 hivex_node_parent (hive_h *h, hive_node_h node)
1016 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
1021 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
1023 hive_node_h ret = le32toh (nk->parent);
1025 if (!IS_VALID_BLOCK (h, ret)) {
1027 fprintf (stderr, "hivex_node_parent: returning EFAULT"
1028 " because parent is not a valid block (0x%zx)\n",
1037 get_values (hive_h *h, hive_node_h node,
1038 hive_value_h **values_ret, size_t **blocks_ret)
1040 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
1045 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
1047 size_t nr_values = le32toh (nk->nr_values);
1050 fprintf (stderr, "hivex_node_values: nr_values = %zu\n", nr_values);
1052 INIT_OFFSET_LIST (values);
1053 INIT_OFFSET_LIST (blocks);
1055 /* Deal with the common "no values" case quickly. */
1059 /* Arbitrarily limit the number of values we will ever deal with. */
1060 if (nr_values > HIVEX_MAX_VALUES) {
1062 fprintf (stderr, "hivex: get_values: returning ERANGE"
1063 " because nr_values > HIVEX_MAX_VALUES (%zu > %d)\n",
1064 nr_values, HIVEX_MAX_VALUES);
1069 /* Preallocate space for the values. */
1070 if (grow_offset_list (&values, nr_values) == -1)
1073 /* Get the value list and check it looks reasonable. */
1074 size_t vlist_offset = le32toh (nk->vallist);
1075 vlist_offset += 0x1000;
1076 if (!IS_VALID_BLOCK (h, vlist_offset)) {
1078 fprintf (stderr, "hivex_node_values: returning EFAULT"
1079 " because value list is not a valid block (0x%zx)\n",
1085 if (add_to_offset_list (&blocks, vlist_offset) == -1)
1088 struct ntreg_value_list *vlist =
1089 (struct ntreg_value_list *) (h->addr + vlist_offset);
1091 size_t len = block_len (h, vlist_offset, NULL);
1092 if (4 + nr_values * 4 > len) {
1094 fprintf (stderr, "hivex_node_values: returning EFAULT"
1095 " because value list is too long (%zu, %zu)\n",
1102 for (i = 0; i < nr_values; ++i) {
1103 hive_node_h value = le32toh (vlist->offset[i]);
1105 if (!IS_VALID_BLOCK (h, value)) {
1107 fprintf (stderr, "hivex_node_values: returning EFAULT"
1108 " because value is not a valid block (0x%zx)\n",
1113 if (add_to_offset_list (&values, value) == -1)
1118 *values_ret = return_offset_list (&values);
1119 *blocks_ret = return_offset_list (&blocks);
1120 if (!*values_ret || !*blocks_ret)
1125 free_offset_list (&values);
1126 free_offset_list (&blocks);
1131 hivex_node_values (hive_h *h, hive_node_h node)
1133 hive_value_h *values;
1136 if (get_values (h, node, &values, &blocks) == -1)
1143 /* Very inefficient, but at least having a separate API call
1144 * allows us to make it more efficient in future.
1147 hivex_node_get_value (hive_h *h, hive_node_h node, const char *key)
1149 hive_value_h *values = NULL;
1151 hive_value_h ret = 0;
1153 values = hivex_node_values (h, node);
1154 if (!values) goto error;
1157 for (i = 0; values[i] != 0; ++i) {
1158 name = hivex_value_key (h, values[i]);
1159 if (!name) goto error;
1160 if (STRCASEEQ (name, key)) {
1164 free (name); name = NULL;
1174 hivex_value_struct_length (hive_h *h, hive_value_h value)
1179 key_len = hivex_value_key_len (h, value);
1180 if (key_len == 0 && errno != 0)
1183 /* -1 to avoid double-counting the first name character */
1184 return key_len + sizeof (struct ntreg_vk_record) - 1;
1188 hivex_value_key_len (hive_h *h, hive_value_h value)
1190 if (!IS_VALID_BLOCK (h, value) || !BLOCK_ID_EQ (h, value, "vk")) {
1195 struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + value);
1197 /* vk->name_len is unsigned, 16 bit, so this is safe ... However
1198 * we have to make sure the length doesn't exceed the block length.
1200 size_t ret = le16toh (vk->name_len);
1201 size_t seg_len = block_len (h, value, NULL);
1202 if (sizeof (struct ntreg_vk_record) + ret - 1 > seg_len) {
1204 fprintf (stderr, "hivex_value_key_len: returning EFAULT"
1205 " because key length is too long (%zu, %zu)\n",
1214 hivex_value_key (hive_h *h, hive_value_h value)
1216 if (!IS_VALID_BLOCK (h, value) || !BLOCK_ID_EQ (h, value, "vk")) {
1221 struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + value);
1223 /* AFAIK the key is always plain ASCII, so no conversion to UTF-8 is
1224 * necessary. However we do need to nul-terminate the string.
1227 size_t len = hivex_value_key_len (h, value);
1228 if (len == 0 && errno != 0)
1231 char *ret = malloc (len + 1);
1234 memcpy (ret, vk->name, len);
1240 hivex_value_type (hive_h *h, hive_value_h value, hive_type *t, size_t *len)
1242 if (!IS_VALID_BLOCK (h, value) || !BLOCK_ID_EQ (h, value, "vk")) {
1247 struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + value);
1250 *t = le32toh (vk->data_type);
1253 *len = le32toh (vk->data_len);
1254 *len &= 0x7fffffff; /* top bit indicates if data is stored inline */
1261 hivex_value_value (hive_h *h, hive_value_h value,
1262 hive_type *t_rtn, size_t *len_rtn)
1264 if (!IS_VALID_BLOCK (h, value) || !BLOCK_ID_EQ (h, value, "vk")) {
1269 struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + value);
1275 t = le32toh (vk->data_type);
1277 len = le32toh (vk->data_len);
1278 is_inline = !!(len & 0x80000000);
1282 fprintf (stderr, "hivex_value_value: value=0x%zx, t=%d, len=%zu, inline=%d\n",
1283 value, t, len, is_inline);
1290 if (is_inline && len > 4) {
1295 /* Arbitrarily limit the length that we will read. */
1296 if (len > HIVEX_MAX_VALUE_LEN) {
1298 fprintf (stderr, "hivex_value_value: returning ERANGE because data "
1299 "length > HIVEX_MAX_VALUE_LEN (%zu > %d)\n",
1300 len, HIVEX_MAX_SUBKEYS);
1305 char *ret = malloc (len);
1310 memcpy (ret, (char *) &vk->data_offset, len);
1314 size_t data_offset = le32toh (vk->data_offset);
1315 data_offset += 0x1000;
1316 if (!IS_VALID_BLOCK (h, data_offset)) {
1318 fprintf (stderr, "hivex_value_value: returning EFAULT because data "
1319 "offset is not a valid block (0x%zx)\n",
1326 /* Check that the declared size isn't larger than the block its in.
1328 * XXX Some apparently valid registries are seen to have this,
1329 * so turn this into a warning and substitute the smaller length
1332 size_t blen = block_len (h, data_offset, NULL);
1333 if (len > blen - 4 /* subtract 4 for block header */) {
1335 fprintf (stderr, "hivex_value_value: warning: declared data length "
1336 "is longer than the block it is in "
1337 "(data 0x%zx, data len %zu, block len %zu)\n",
1338 data_offset, len, blen);
1341 /* Return the smaller length to the caller too. */
1346 char *data = h->addr + data_offset + 4;
1347 memcpy (ret, data, len);
1352 windows_utf16_to_utf8 (/* const */ char *input, size_t len)
1354 iconv_t ic = iconv_open ("UTF-8", "UTF-16");
1355 if (ic == (iconv_t) -1)
1358 /* iconv(3) has an insane interface ... */
1360 /* Mostly UTF-8 will be smaller, so this is a good initial guess. */
1361 size_t outalloc = len;
1365 size_t outlen = outalloc;
1366 char *out = malloc (outlen + 1);
1376 size_t r = iconv (ic, &inp, &inlen, &outp, &outlen);
1377 if (r == (size_t) -1) {
1378 if (errno == E2BIG) {
1380 size_t prev = outalloc;
1381 /* Try again with a larger output buffer. */
1384 if (outalloc < prev) {
1392 /* Else some conversion failure, eg. EILSEQ, EINVAL. */
1408 hivex_value_string (hive_h *h, hive_value_h value)
1412 char *data = hivex_value_value (h, value, &t, &len);
1417 if (t != hive_t_string && t != hive_t_expand_string && t != hive_t_link) {
1423 /* Deal with the case where Windows has allocated a large buffer
1424 * full of random junk, and only the first few bytes of the buffer
1425 * contain a genuine UTF-16 string.
1427 * In this case, iconv would try to process the junk bytes as UTF-16
1428 * and inevitably find an illegal sequence (EILSEQ). Instead, stop
1429 * after we find the first \0\0.
1431 * (Found by Hilko Bengen in a fresh Windows XP SOFTWARE hive).
1433 size_t slen = utf16_string_len_in_bytes_max (data, len);
1437 char *ret = windows_utf16_to_utf8 (data, len);
1446 free_strings (char **argv)
1451 for (i = 0; argv[i] != NULL; ++i)
1457 /* Get the length of a UTF-16 format string. Handle the string as
1458 * pairs of bytes, looking for the first \0\0 pair. Only read up to
1459 * 'len' maximum bytes.
1462 utf16_string_len_in_bytes_max (const char *str, size_t len)
1466 while (len >= 2 && (str[0] || str[1])) {
1475 /* http://blogs.msdn.com/oldnewthing/archive/2009/10/08/9904646.aspx */
1477 hivex_value_multiple_strings (hive_h *h, hive_value_h value)
1481 char *data = hivex_value_value (h, value, &t, &len);
1486 if (t != hive_t_multiple_strings) {
1492 size_t nr_strings = 0;
1493 char **ret = malloc ((1 + nr_strings) * sizeof (char *));
1503 while (p < data + len &&
1504 (plen = utf16_string_len_in_bytes_max (p, data + len - p)) > 0) {
1506 char **ret2 = realloc (ret, (1 + nr_strings) * sizeof (char *));
1514 ret[nr_strings-1] = windows_utf16_to_utf8 (p, plen);
1515 ret[nr_strings] = NULL;
1516 if (ret[nr_strings-1] == NULL) {
1522 p += plen + 2 /* skip over UTF-16 \0\0 at the end of this string */;
1530 hivex_value_dword (hive_h *h, hive_value_h value)
1534 char *data = hivex_value_value (h, value, &t, &len);
1539 if ((t != hive_t_dword && t != hive_t_dword_be) || len != 4) {
1545 int32_t ret = *(int32_t*)data;
1547 if (t == hive_t_dword) /* little endian */
1548 ret = le32toh (ret);
1550 ret = be32toh (ret);
1556 hivex_value_qword (hive_h *h, hive_value_h value)
1560 char *data = hivex_value_value (h, value, &t, &len);
1565 if (t != hive_t_qword || len != 8) {
1571 int64_t ret = *(int64_t*)data;
1573 ret = le64toh (ret); /* always little endian */
1578 /*----------------------------------------------------------------------
1583 hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len,
1584 void *opaque, int flags)
1586 return hivex_visit_node (h, hivex_root (h), visitor, len, opaque, flags);
1589 static int hivex__visit_node (hive_h *h, hive_node_h node,
1590 const struct hivex_visitor *vtor,
1591 char *unvisited, void *opaque, int flags);
1594 hivex_visit_node (hive_h *h, hive_node_h node,
1595 const struct hivex_visitor *visitor, size_t len, void *opaque,
1598 struct hivex_visitor vtor;
1599 memset (&vtor, 0, sizeof vtor);
1601 /* Note that len might be larger *or smaller* than the expected size. */
1602 size_t copysize = len <= sizeof vtor ? len : sizeof vtor;
1603 memcpy (&vtor, visitor, copysize);
1605 /* This bitmap records unvisited nodes, so we don't loop if the
1606 * registry contains cycles.
1608 char *unvisited = malloc (1 + h->size / 32);
1609 if (unvisited == NULL)
1611 memcpy (unvisited, h->bitmap, 1 + h->size / 32);
1613 int r = hivex__visit_node (h, node, &vtor, unvisited, opaque, flags);
1619 hivex__visit_node (hive_h *h, hive_node_h node,
1620 const struct hivex_visitor *vtor, char *unvisited,
1621 void *opaque, int flags)
1623 int skip_bad = flags & HIVEX_VISIT_SKIP_BAD;
1625 hive_value_h *values = NULL;
1626 hive_node_h *children = NULL;
1632 /* Return -1 on all callback errors. However on internal errors,
1633 * check if skip_bad is set and suppress those errors if so.
1637 if (!BITMAP_TST (unvisited, node)) {
1639 fprintf (stderr, "hivex__visit_node: contains cycle:"
1640 " visited node 0x%zx already\n",
1644 return skip_bad ? 0 : -1;
1646 BITMAP_CLR (unvisited, node);
1648 name = hivex_node_name (h, node);
1649 if (!name) return skip_bad ? 0 : -1;
1650 if (vtor->node_start && vtor->node_start (h, opaque, node, name) == -1)
1653 values = hivex_node_values (h, node);
1655 ret = skip_bad ? 0 : -1;
1659 for (i = 0; values[i] != 0; ++i) {
1663 if (hivex_value_type (h, values[i], &t, &len) == -1) {
1664 ret = skip_bad ? 0 : -1;
1668 key = hivex_value_key (h, values[i]);
1670 ret = skip_bad ? 0 : -1;
1674 if (vtor->value_any) {
1675 str = hivex_value_value (h, values[i], &t, &len);
1677 ret = skip_bad ? 0 : -1;
1680 if (vtor->value_any (h, opaque, node, values[i], t, len, key, str) == -1)
1682 free (str); str = NULL;
1687 str = hivex_value_value (h, values[i], &t, &len);
1689 ret = skip_bad ? 0 : -1;
1692 if (t != hive_t_none) {
1693 ret = skip_bad ? 0 : -1;
1696 if (vtor->value_none &&
1697 vtor->value_none (h, opaque, node, values[i], t, len, key, str) == -1)
1699 free (str); str = NULL;
1703 case hive_t_expand_string:
1705 str = hivex_value_string (h, values[i]);
1707 if (errno != EILSEQ && errno != EINVAL) {
1708 ret = skip_bad ? 0 : -1;
1711 if (vtor->value_string_invalid_utf16) {
1712 str = hivex_value_value (h, values[i], &t, &len);
1713 if (vtor->value_string_invalid_utf16 (h, opaque, node, values[i],
1714 t, len, key, str) == -1)
1716 free (str); str = NULL;
1720 if (vtor->value_string &&
1721 vtor->value_string (h, opaque, node, values[i],
1722 t, len, key, str) == -1)
1724 free (str); str = NULL;
1728 case hive_t_dword_be: {
1729 int32_t i32 = hivex_value_dword (h, values[i]);
1730 if (vtor->value_dword &&
1731 vtor->value_dword (h, opaque, node, values[i],
1732 t, len, key, i32) == -1)
1737 case hive_t_qword: {
1738 int64_t i64 = hivex_value_qword (h, values[i]);
1739 if (vtor->value_qword &&
1740 vtor->value_qword (h, opaque, node, values[i],
1741 t, len, key, i64) == -1)
1747 str = hivex_value_value (h, values[i], &t, &len);
1749 ret = skip_bad ? 0 : -1;
1752 if (t != hive_t_binary) {
1753 ret = skip_bad ? 0 : -1;
1756 if (vtor->value_binary &&
1757 vtor->value_binary (h, opaque, node, values[i],
1758 t, len, key, str) == -1)
1760 free (str); str = NULL;
1763 case hive_t_multiple_strings:
1764 strs = hivex_value_multiple_strings (h, values[i]);
1766 if (errno != EILSEQ && errno != EINVAL) {
1767 ret = skip_bad ? 0 : -1;
1770 if (vtor->value_string_invalid_utf16) {
1771 str = hivex_value_value (h, values[i], &t, &len);
1772 if (vtor->value_string_invalid_utf16 (h, opaque, node, values[i],
1773 t, len, key, str) == -1)
1775 free (str); str = NULL;
1779 if (vtor->value_multiple_strings &&
1780 vtor->value_multiple_strings (h, opaque, node, values[i],
1781 t, len, key, strs) == -1)
1783 free_strings (strs); strs = NULL;
1786 case hive_t_resource_list:
1787 case hive_t_full_resource_description:
1788 case hive_t_resource_requirements_list:
1790 str = hivex_value_value (h, values[i], &t, &len);
1792 ret = skip_bad ? 0 : -1;
1795 if (vtor->value_other &&
1796 vtor->value_other (h, opaque, node, values[i],
1797 t, len, key, str) == -1)
1799 free (str); str = NULL;
1804 free (key); key = NULL;
1807 children = hivex_node_children (h, node);
1808 if (children == NULL) {
1809 ret = skip_bad ? 0 : -1;
1813 for (i = 0; children[i] != 0; ++i) {
1815 fprintf (stderr, "hivex__visit_node: %s: visiting subkey %d (0x%zx)\n",
1816 name, i, children[i]);
1818 if (hivex__visit_node (h, children[i], vtor, unvisited, opaque, flags) == -1)
1822 if (vtor->node_end && vtor->node_end (h, opaque, node, name) == -1)
1833 free_strings (strs);
1837 /*----------------------------------------------------------------------
1841 /* Allocate an hbin (page), extending the malloc'd space if necessary,
1842 * and updating the hive handle fields (but NOT the hive disk header
1843 * -- the hive disk header is updated when we commit). This function
1844 * also extends the bitmap if necessary.
1846 * 'allocation_hint' is the size of the block allocation we would like
1847 * to make. Normally registry blocks are very small (avg 50 bytes)
1848 * and are contained in standard-sized pages (4KB), but the registry
1849 * can support blocks which are larger than a standard page, in which
1850 * case it creates a page of 8KB, 12KB etc.
1853 * > 0 : offset of first usable byte of new page (after page header)
1854 * 0 : error (errno set)
1857 allocate_page (hive_h *h, size_t allocation_hint)
1859 /* In almost all cases this will be 1. */
1860 size_t nr_4k_pages =
1861 1 + (allocation_hint + sizeof (struct ntreg_hbin_page) - 1) / 4096;
1862 assert (nr_4k_pages >= 1);
1864 /* 'extend' is the number of bytes to extend the file by. Note that
1865 * hives found in the wild often contain slack between 'endpages'
1866 * and the actual end of the file, so we don't always need to make
1869 ssize_t extend = h->endpages + nr_4k_pages * 4096 - h->size;
1871 if (h->msglvl >= 2) {
1872 fprintf (stderr, "allocate_page: current endpages = 0x%zx,"
1873 " current size = 0x%zx\n",
1874 h->endpages, h->size);
1875 fprintf (stderr, "allocate_page: extending file by %zd bytes"
1876 " (<= 0 if no extension)\n",
1881 size_t oldsize = h->size;
1882 size_t newsize = h->size + extend;
1883 char *newaddr = realloc (h->addr, newsize);
1884 if (newaddr == NULL)
1887 size_t oldbitmapsize = 1 + oldsize / 32;
1888 size_t newbitmapsize = 1 + newsize / 32;
1889 char *newbitmap = realloc (h->bitmap, newbitmapsize);
1890 if (newbitmap == NULL) {
1897 h->bitmap = newbitmap;
1899 memset (h->addr + oldsize, 0, newsize - oldsize);
1900 memset (h->bitmap + oldbitmapsize, 0, newbitmapsize - oldbitmapsize);
1903 size_t offset = h->endpages;
1904 h->endpages += nr_4k_pages * 4096;
1907 fprintf (stderr, "allocate_page: new endpages = 0x%zx, new size = 0x%zx\n",
1908 h->endpages, h->size);
1910 /* Write the hbin header. */
1911 struct ntreg_hbin_page *page =
1912 (struct ntreg_hbin_page *) (h->addr + offset);
1913 page->magic[0] = 'h';
1914 page->magic[1] = 'b';
1915 page->magic[2] = 'i';
1916 page->magic[3] = 'n';
1917 page->offset_first = htole32 (offset - 0x1000);
1918 page->page_size = htole32 (nr_4k_pages * 4096);
1919 memset (page->unknown, 0, sizeof (page->unknown));
1922 fprintf (stderr, "allocate_page: new page at 0x%zx\n", offset);
1924 /* Offset of first usable byte after the header. */
1925 return offset + sizeof (struct ntreg_hbin_page);
1928 /* Allocate a single block, first allocating an hbin (page) at the end
1929 * of the current file if necessary. NB. To keep the implementation
1930 * simple and more likely to be correct, we do not reuse existing free
1933 * seg_len is the size of the block (this INCLUDES the block header).
1934 * The header of the block is initialized to -seg_len (negative to
1935 * indicate used). id[2] is the block ID (type), eg. "nk" for nk-
1936 * record. The block bitmap is updated to show this block as valid.
1937 * The rest of the contents of the block will be zero.
1939 * **NB** Because allocate_block may reallocate the memory, all
1940 * pointers into the memory become potentially invalid. I really
1941 * love writing in C, can't you tell?
1944 * > 0 : offset of new block
1945 * 0 : error (errno set)
1948 allocate_block (hive_h *h, size_t seg_len, const char id[2])
1956 /* The caller probably forgot to include the header. Note that
1957 * value lists have no ID field, so seg_len == 4 would be possible
1958 * for them, albeit unusual.
1961 fprintf (stderr, "allocate_block: refusing too small allocation (%zu),"
1962 " returning ERANGE\n", seg_len);
1967 /* Refuse really large allocations. */
1968 if (seg_len > HIVEX_MAX_ALLOCATION) {
1970 fprintf (stderr, "allocate_block: refusing large allocation (%zu),"
1971 " returning ERANGE\n", seg_len);
1976 /* Round up allocation to multiple of 8 bytes. All blocks must be
1977 * on an 8 byte boundary.
1979 seg_len = (seg_len + 7) & ~7;
1981 /* Allocate a new page if necessary. */
1982 if (h->endblocks == 0 || h->endblocks + seg_len > h->endpages) {
1983 size_t newendblocks = allocate_page (h, seg_len);
1984 if (newendblocks == 0)
1986 h->endblocks = newendblocks;
1989 size_t offset = h->endblocks;
1992 fprintf (stderr, "allocate_block: new block at 0x%zx, size %zu\n",
1995 struct ntreg_hbin_block *blockhdr =
1996 (struct ntreg_hbin_block *) (h->addr + offset);
1998 memset (blockhdr, 0, seg_len);
2000 blockhdr->seg_len = htole32 (- (int32_t) seg_len);
2001 if (id[0] && id[1] && seg_len >= sizeof (struct ntreg_hbin_block)) {
2002 blockhdr->id[0] = id[0];
2003 blockhdr->id[1] = id[1];
2006 BITMAP_SET (h->bitmap, offset);
2008 h->endblocks += seg_len;
2010 /* If there is space after the last block in the last page, then we
2011 * have to put a dummy free block header here to mark the rest of
2014 ssize_t rem = h->endpages - h->endblocks;
2017 fprintf (stderr, "allocate_block: marking remainder of page free"
2018 " starting at 0x%zx, size %zd\n", h->endblocks, rem);
2022 blockhdr = (struct ntreg_hbin_block *) (h->addr + h->endblocks);
2023 blockhdr->seg_len = htole32 ((int32_t) rem);
2029 /* 'offset' must point to a valid, used block. This function marks
2030 * the block unused (by updating the seg_len field) and invalidates
2031 * the bitmap. It does NOT do this recursively, so to avoid creating
2032 * unreachable used blocks, callers may have to recurse over the hive
2033 * structures. Also callers must ensure there are no references to
2034 * this block from other parts of the hive.
2037 mark_block_unused (hive_h *h, size_t offset)
2039 assert (h->writable);
2040 assert (IS_VALID_BLOCK (h, offset));
2043 fprintf (stderr, "mark_block_unused: marking 0x%zx unused\n", offset);
2045 struct ntreg_hbin_block *blockhdr =
2046 (struct ntreg_hbin_block *) (h->addr + offset);
2048 size_t seg_len = block_len (h, offset, NULL);
2049 blockhdr->seg_len = htole32 (seg_len);
2051 BITMAP_CLR (h->bitmap, offset);
2054 /* Delete all existing values at this node. */
2056 delete_values (hive_h *h, hive_node_h node)
2058 assert (h->writable);
2060 hive_value_h *values;
2062 if (get_values (h, node, &values, &blocks) == -1)
2066 for (i = 0; blocks[i] != 0; ++i)
2067 mark_block_unused (h, blocks[i]);
2071 for (i = 0; values[i] != 0; ++i) {
2072 struct ntreg_vk_record *vk =
2073 (struct ntreg_vk_record *) (h->addr + values[i]);
2077 len = le32toh (vk->data_len);
2078 is_inline = !!(len & 0x80000000); /* top bit indicates is inline */
2081 if (!is_inline) { /* non-inline, so remove data block */
2082 size_t data_offset = le32toh (vk->data_offset);
2083 data_offset += 0x1000;
2084 mark_block_unused (h, data_offset);
2087 /* remove vk record */
2088 mark_block_unused (h, values[i]);
2093 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
2094 nk->nr_values = htole32 (0);
2095 nk->vallist = htole32 (0xffffffff);
2101 hivex_commit (hive_h *h, const char *filename, int flags)
2113 filename = filename ? : h->filename;
2114 int fd = open (filename, O_WRONLY|O_CREAT|O_TRUNC|O_NOCTTY, 0666);
2118 /* Update the header fields. */
2119 uint32_t sequence = le32toh (h->hdr->sequence1);
2121 h->hdr->sequence1 = htole32 (sequence);
2122 h->hdr->sequence2 = htole32 (sequence);
2123 /* XXX Ought to update h->hdr->last_modified. */
2124 h->hdr->blocks = htole32 (h->endpages - 0x1000);
2126 /* Recompute header checksum. */
2127 uint32_t sum = header_checksum (h);
2128 h->hdr->csum = htole32 (sum);
2131 fprintf (stderr, "hivex_commit: new header checksum: 0x%x\n", sum);
2133 if (full_write (fd, h->addr, h->size) != h->size) {
2140 if (close (fd) == -1)
2146 /* Calculate the hash for a lf or lh record offset.
2149 calc_hash (const char *type, const char *name, char *ret)
2151 size_t len = strlen (name);
2153 if (STRPREFIX (type, "lf"))
2154 /* Old-style, not used in current registries. */
2155 memcpy (ret, name, len < 4 ? len : 4);
2157 /* New-style for lh-records. */
2160 for (i = 0; i < len; ++i) {
2161 c = c_toupper (name[i]);
2165 *((uint32_t *) ret) = htole32 (h);
2169 /* Create a completely new lh-record containing just the single node. */
2171 new_lh_record (hive_h *h, const char *name, hive_node_h node)
2173 static const char id[2] = { 'l', 'h' };
2174 size_t seg_len = sizeof (struct ntreg_lf_record);
2175 size_t offset = allocate_block (h, seg_len, id);
2179 struct ntreg_lf_record *lh = (struct ntreg_lf_record *) (h->addr + offset);
2180 lh->nr_keys = htole16 (1);
2181 lh->keys[0].offset = htole32 (node - 0x1000);
2182 calc_hash ("lh", name, lh->keys[0].hash);
2187 /* Insert node into existing lf/lh-record at position.
2188 * This allocates a new record and marks the old one as unused.
2191 insert_lf_record (hive_h *h, size_t old_offs, size_t posn,
2192 const char *name, hive_node_h node)
2194 assert (IS_VALID_BLOCK (h, old_offs));
2196 /* Work around C stupidity.
2197 * http://www.redhat.com/archives/libguestfs/2010-February/msg00056.html
2199 int test = BLOCK_ID_EQ (h, old_offs, "lf") || BLOCK_ID_EQ (h, old_offs, "lh");
2202 struct ntreg_lf_record *old_lf =
2203 (struct ntreg_lf_record *) (h->addr + old_offs);
2204 size_t nr_keys = le16toh (old_lf->nr_keys);
2206 nr_keys++; /* in new record ... */
2208 size_t seg_len = sizeof (struct ntreg_lf_record) + (nr_keys-1) * 8;
2210 /* Copy the old_lf->id in case it moves during allocate_block. */
2212 memcpy (id, old_lf->id, sizeof id);
2214 size_t new_offs = allocate_block (h, seg_len, id);
2218 /* old_lf could have been invalidated by allocate_block. */
2219 old_lf = (struct ntreg_lf_record *) (h->addr + old_offs);
2221 struct ntreg_lf_record *new_lf =
2222 (struct ntreg_lf_record *) (h->addr + new_offs);
2223 new_lf->nr_keys = htole16 (nr_keys);
2225 /* Copy the keys until we reach posn, insert the new key there, then
2226 * copy the remaining keys.
2229 for (i = 0; i < posn; ++i)
2230 new_lf->keys[i] = old_lf->keys[i];
2232 new_lf->keys[i].offset = htole32 (node - 0x1000);
2233 calc_hash (new_lf->id, name, new_lf->keys[i].hash);
2235 for (i = posn+1; i < nr_keys; ++i)
2236 new_lf->keys[i] = old_lf->keys[i-1];
2238 /* Old block is unused, return new block. */
2239 mark_block_unused (h, old_offs);
2243 /* Compare name with name in nk-record. */
2245 compare_name_with_nk_name (hive_h *h, const char *name, hive_node_h nk_offs)
2247 assert (IS_VALID_BLOCK (h, nk_offs));
2248 assert (BLOCK_ID_EQ (h, nk_offs, "nk"));
2250 /* Name in nk is not necessarily nul-terminated. */
2251 char *nname = hivex_node_name (h, nk_offs);
2253 /* Unfortunately we don't have a way to return errors here. */
2255 perror ("compare_name_with_nk_name");
2259 int r = strcasecmp (name, nname);
2266 hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name)
2273 if (!IS_VALID_BLOCK (h, parent) || !BLOCK_ID_EQ (h, parent, "nk")) {
2278 if (name == NULL || strlen (name) == 0) {
2283 if (hivex_node_get_child (h, parent, name) != 0) {
2288 /* Create the new nk-record. */
2289 static const char nk_id[2] = { 'n', 'k' };
2290 size_t seg_len = sizeof (struct ntreg_nk_record) + strlen (name);
2291 hive_node_h node = allocate_block (h, seg_len, nk_id);
2296 fprintf (stderr, "hivex_node_add_child: allocated new nk-record"
2297 " for child at 0x%zx\n", node);
2299 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
2300 nk->flags = htole16 (0x0020); /* key is ASCII. */
2301 nk->parent = htole32 (parent - 0x1000);
2302 nk->subkey_lf = htole32 (0xffffffff);
2303 nk->subkey_lf_volatile = htole32 (0xffffffff);
2304 nk->vallist = htole32 (0xffffffff);
2305 nk->classname = htole32 (0xffffffff);
2306 nk->name_len = htole16 (strlen (name));
2307 strcpy (nk->name, name);
2309 /* Inherit parent sk. */
2310 struct ntreg_nk_record *parent_nk =
2311 (struct ntreg_nk_record *) (h->addr + parent);
2312 size_t parent_sk_offset = le32toh (parent_nk->sk);
2313 parent_sk_offset += 0x1000;
2314 if (!IS_VALID_BLOCK (h, parent_sk_offset) ||
2315 !BLOCK_ID_EQ (h, parent_sk_offset, "sk")) {
2317 fprintf (stderr, "hivex_node_add_child: returning EFAULT"
2318 " because parent sk is not a valid block (%zu)\n",
2323 struct ntreg_sk_record *sk =
2324 (struct ntreg_sk_record *) (h->addr + parent_sk_offset);
2325 sk->refcount = htole32 (le32toh (sk->refcount) + 1);
2326 nk->sk = htole32 (parent_sk_offset - 0x1000);
2328 /* Inherit parent timestamp. */
2329 nk->timestamp = parent_nk->timestamp;
2331 /* What I found out the hard way (not documented anywhere): the
2332 * subkeys in lh-records must be kept sorted. If you just add a
2333 * subkey in a non-sorted position (eg. just add it at the end) then
2334 * Windows won't see the subkey _and_ Windows will corrupt the hive
2335 * itself when it modifies or saves it.
2337 * So use get_children() to get a list of intermediate
2338 * lf/lh-records. get_children() returns these in reading order
2339 * (which is sorted), so we look for the lf/lh-records in sequence
2340 * until we find the key name just after the one we are inserting,
2341 * and we insert the subkey just before it.
2343 * The only other case is the no-subkeys case, where we have to
2344 * create a brand new lh-record.
2346 hive_node_h *unused;
2349 if (get_children (h, parent, &unused, &blocks, 0) == -1)
2354 size_t nr_subkeys_in_parent_nk = le32toh (parent_nk->nr_subkeys);
2355 if (nr_subkeys_in_parent_nk == 0) { /* No subkeys case. */
2356 /* Free up any existing intermediate blocks. */
2357 for (i = 0; blocks[i] != 0; ++i)
2358 mark_block_unused (h, blocks[i]);
2359 size_t lh_offs = new_lh_record (h, name, node);
2365 /* Recalculate pointers that could have been invalidated by
2366 * previous call to allocate_block (via new_lh_record).
2368 nk = (struct ntreg_nk_record *) (h->addr + node);
2369 parent_nk = (struct ntreg_nk_record *) (h->addr + parent);
2372 fprintf (stderr, "hivex_node_add_child: no keys, allocated new"
2373 " lh-record at 0x%zx\n", lh_offs);
2375 parent_nk->subkey_lf = htole32 (lh_offs - 0x1000);
2377 else { /* Insert subkeys case. */
2378 size_t old_offs = 0, new_offs = 0;
2379 struct ntreg_lf_record *old_lf = NULL;
2381 /* Find lf/lh key name just after the one we are inserting. */
2382 for (i = 0; blocks[i] != 0; ++i) {
2383 if (BLOCK_ID_EQ (h, blocks[i], "lf") ||
2384 BLOCK_ID_EQ (h, blocks[i], "lh")) {
2385 old_offs = blocks[i];
2386 old_lf = (struct ntreg_lf_record *) (h->addr + old_offs);
2387 for (j = 0; j < le16toh (old_lf->nr_keys); ++j) {
2388 hive_node_h nk_offs = le32toh (old_lf->keys[j].offset);
2390 if (compare_name_with_nk_name (h, name, nk_offs) < 0)
2396 /* Insert it at the end.
2397 * old_offs points to the last lf record, set j.
2399 assert (old_offs != 0); /* should never happen if nr_subkeys > 0 */
2400 j = le16toh (old_lf->nr_keys);
2405 fprintf (stderr, "hivex_node_add_child: insert key in existing"
2406 " lh-record at 0x%zx, posn %zu\n", old_offs, j);
2408 new_offs = insert_lf_record (h, old_offs, j, name, node);
2409 if (new_offs == 0) {
2414 /* Recalculate pointers that could have been invalidated by
2415 * previous call to allocate_block (via insert_lf_record).
2417 nk = (struct ntreg_nk_record *) (h->addr + node);
2418 parent_nk = (struct ntreg_nk_record *) (h->addr + parent);
2421 fprintf (stderr, "hivex_node_add_child: new lh-record at 0x%zx\n",
2424 /* If the lf/lh-record was directly referenced by the parent nk,
2425 * then update the parent nk.
2427 if (le32toh (parent_nk->subkey_lf) + 0x1000 == old_offs)
2428 parent_nk->subkey_lf = htole32 (new_offs - 0x1000);
2429 /* Else we have to look for the intermediate ri-record and update
2433 for (i = 0; blocks[i] != 0; ++i) {
2434 if (BLOCK_ID_EQ (h, blocks[i], "ri")) {
2435 struct ntreg_ri_record *ri =
2436 (struct ntreg_ri_record *) (h->addr + blocks[i]);
2437 for (j = 0; j < le16toh (ri->nr_offsets); ++j)
2438 if (le32toh (ri->offset[j] + 0x1000) == old_offs) {
2439 ri->offset[j] = htole32 (new_offs - 0x1000);
2445 /* Not found .. This is an internal error. */
2447 fprintf (stderr, "hivex_node_add_child: returning ENOTSUP"
2448 " because could not find ri->lf link\n");
2460 /* Update nr_subkeys in parent nk. */
2461 nr_subkeys_in_parent_nk++;
2462 parent_nk->nr_subkeys = htole32 (nr_subkeys_in_parent_nk);
2464 /* Update max_subkey_name_len in parent nk. */
2465 uint16_t max = le16toh (parent_nk->max_subkey_name_len);
2466 if (max < strlen (name) * 2) /* *2 because "recoded" in UTF16-LE. */
2467 parent_nk->max_subkey_name_len = htole16 (strlen (name) * 2);
2472 /* Decrement the refcount of an sk-record, and if it reaches zero,
2473 * unlink it from the chain and delete it.
2476 delete_sk (hive_h *h, size_t sk_offset)
2478 if (!IS_VALID_BLOCK (h, sk_offset) || !BLOCK_ID_EQ (h, sk_offset, "sk")) {
2480 fprintf (stderr, "delete_sk: not an sk record: 0x%zx\n", sk_offset);
2485 struct ntreg_sk_record *sk = (struct ntreg_sk_record *) (h->addr + sk_offset);
2487 if (sk->refcount == 0) {
2489 fprintf (stderr, "delete_sk: sk record already has refcount 0: 0x%zx\n",
2497 if (sk->refcount == 0) {
2498 size_t sk_prev_offset = sk->sk_prev;
2499 sk_prev_offset += 0x1000;
2501 size_t sk_next_offset = sk->sk_next;
2502 sk_next_offset += 0x1000;
2504 /* Update sk_prev/sk_next SKs, unless they both point back to this
2505 * cell in which case we are deleting the last SK.
2507 if (sk_prev_offset != sk_offset && sk_next_offset != sk_offset) {
2508 struct ntreg_sk_record *sk_prev =
2509 (struct ntreg_sk_record *) (h->addr + sk_prev_offset);
2510 struct ntreg_sk_record *sk_next =
2511 (struct ntreg_sk_record *) (h->addr + sk_next_offset);
2513 sk_prev->sk_next = htole32 (sk_next_offset - 0x1000);
2514 sk_next->sk_prev = htole32 (sk_prev_offset - 0x1000);
2517 /* Refcount is zero so really delete this block. */
2518 mark_block_unused (h, sk_offset);
2524 /* Callback from hivex_node_delete_child which is called to delete a
2525 * node AFTER its subnodes have been visited. The subnodes have been
2526 * deleted but we still have to delete any lf/lh/li/ri records and the
2527 * value list block and values, followed by deleting the node itself.
2530 delete_node (hive_h *h, void *opaque, hive_node_h node, const char *name)
2532 /* Get the intermediate blocks. The subkeys have already been
2533 * deleted by this point, so tell get_children() not to check for
2534 * validity of the nk-records.
2536 hive_node_h *unused;
2538 if (get_children (h, node, &unused, &blocks, GET_CHILDREN_NO_CHECK_NK) == -1)
2542 /* We don't care what's in these intermediate blocks, so we can just
2543 * delete them unconditionally.
2546 for (i = 0; blocks[i] != 0; ++i)
2547 mark_block_unused (h, blocks[i]);
2551 /* Delete the values in the node. */
2552 if (delete_values (h, node) == -1)
2555 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
2557 /* If the NK references an SK, delete it. */
2558 size_t sk_offs = le32toh (nk->sk);
2559 if (sk_offs != 0xffffffff) {
2561 if (delete_sk (h, sk_offs) == -1)
2563 nk->sk = htole32 (0xffffffff);
2566 /* If the NK references a classname, delete it. */
2567 size_t cl_offs = le32toh (nk->classname);
2568 if (cl_offs != 0xffffffff) {
2570 mark_block_unused (h, cl_offs);
2571 nk->classname = htole32 (0xffffffff);
2574 /* Delete the node itself. */
2575 mark_block_unused (h, node);
2581 hivex_node_delete_child (hive_h *h, hive_node_h node)
2588 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
2593 if (node == hivex_root (h)) {
2595 fprintf (stderr, "hivex_node_delete_child: cannot delete root node\n");
2600 hive_node_h parent = hivex_node_parent (h, node);
2604 /* Delete node and all its children and values recursively. */
2605 static const struct hivex_visitor visitor = { .node_end = delete_node };
2606 if (hivex_visit_node (h, node, &visitor, sizeof visitor, NULL, 0) == -1)
2609 /* Delete the link from parent to child. We need to find the lf/lh
2610 * record which contains the offset and remove the offset from that
2611 * record, then decrement the element count in that record, and
2612 * decrement the overall number of subkeys stored in the parent
2615 hive_node_h *unused;
2617 if (get_children (h, parent, &unused, &blocks, GET_CHILDREN_NO_CHECK_NK)== -1)
2622 for (i = 0; blocks[i] != 0; ++i) {
2623 struct ntreg_hbin_block *block =
2624 (struct ntreg_hbin_block *) (h->addr + blocks[i]);
2626 if (block->id[0] == 'l' && (block->id[1] == 'f' || block->id[1] == 'h')) {
2627 struct ntreg_lf_record *lf = (struct ntreg_lf_record *) block;
2629 size_t nr_subkeys_in_lf = le16toh (lf->nr_keys);
2631 for (j = 0; j < nr_subkeys_in_lf; ++j)
2632 if (le32toh (lf->keys[j].offset) + 0x1000 == node) {
2633 for (; j < nr_subkeys_in_lf - 1; ++j)
2634 memcpy (&lf->keys[j], &lf->keys[j+1], sizeof (lf->keys[j]));
2635 lf->nr_keys = htole16 (nr_subkeys_in_lf - 1);
2641 fprintf (stderr, "hivex_node_delete_child: could not find parent"
2642 " to child link\n");
2647 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + parent);
2648 size_t nr_subkeys_in_nk = le32toh (nk->nr_subkeys);
2649 nk->nr_subkeys = htole32 (nr_subkeys_in_nk - 1);
2652 fprintf (stderr, "hivex_node_delete_child: updating nr_subkeys"
2653 " in parent 0x%zx to %zu\n", parent, nr_subkeys_in_nk);
2659 hivex_node_set_values (hive_h *h, hive_node_h node,
2660 size_t nr_values, const hive_set_value *values,
2668 if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
2673 /* Delete all existing values. */
2674 if (delete_values (h, node) == -1)
2680 /* Allocate value list node. Value lists have no id field. */
2681 static const char nul_id[2] = { 0, 0 };
2683 sizeof (struct ntreg_value_list) + (nr_values - 1) * sizeof (uint32_t);
2684 size_t vallist_offs = allocate_block (h, seg_len, nul_id);
2685 if (vallist_offs == 0)
2688 struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
2689 nk->nr_values = htole32 (nr_values);
2690 nk->vallist = htole32 (vallist_offs - 0x1000);
2692 struct ntreg_value_list *vallist =
2693 (struct ntreg_value_list *) (h->addr + vallist_offs);
2696 for (i = 0; i < nr_values; ++i) {
2697 /* Allocate vk record to store this (key, value) pair. */
2698 static const char vk_id[2] = { 'v', 'k' };
2699 seg_len = sizeof (struct ntreg_vk_record) + strlen (values[i].key);
2700 size_t vk_offs = allocate_block (h, seg_len, vk_id);
2704 /* Recalculate pointers that could have been invalidated by
2705 * previous call to allocate_block.
2707 nk = (struct ntreg_nk_record *) (h->addr + node);
2708 vallist = (struct ntreg_value_list *) (h->addr + vallist_offs);
2710 vallist->offset[i] = htole32 (vk_offs - 0x1000);
2712 struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + vk_offs);
2713 size_t name_len = strlen (values[i].key);
2714 vk->name_len = htole16 (name_len);
2715 strcpy (vk->name, values[i].key);
2716 vk->data_type = htole32 (values[i].t);
2717 uint32_t len = values[i].len;
2718 if (len <= 4) /* store it inline => set MSB flag */
2720 vk->data_len = htole32 (len);
2721 vk->flags = name_len == 0 ? 0 : 1;
2723 if (values[i].len <= 4) /* store it inline */
2724 memcpy (&vk->data_offset, values[i].value, values[i].len);
2726 size_t offs = allocate_block (h, values[i].len + 4, nul_id);
2730 /* Recalculate pointers that could have been invalidated by
2731 * previous call to allocate_block.
2733 nk = (struct ntreg_nk_record *) (h->addr + node);
2734 vallist = (struct ntreg_value_list *) (h->addr + vallist_offs);
2735 vk = (struct ntreg_vk_record *) (h->addr + vk_offs);
2737 memcpy (h->addr + offs + 4, values[i].value, values[i].len);
2738 vk->data_offset = htole32 (offs - 0x1000);
2741 if (name_len * 2 > le32toh (nk->max_vk_name_len))
2742 /* * 2 for UTF16-LE "reencoding" */
2743 nk->max_vk_name_len = htole32 (name_len * 2);
2744 if (values[i].len > le32toh (nk->max_vk_data_len))
2745 nk->max_vk_data_len = htole32 (values[i].len);
2752 hivex_node_set_value (hive_h *h, hive_node_h node,
2753 const hive_set_value *val, int flags)
2755 hive_value_h *prev_values = hivex_node_values (h, node);
2756 if (prev_values == NULL)
2761 size_t nr_values = 0;
2762 for (hive_value_h *itr = prev_values; *itr != 0; ++itr)
2765 hive_set_value *values = malloc ((nr_values + 1) * (sizeof (hive_set_value)));
2767 goto leave_prev_values;
2770 int idx_of_val = -1;
2771 hive_value_h *prev_val;
2772 for (prev_val = prev_values; *prev_val != 0; ++prev_val) {
2776 hive_set_value *value = &values[prev_val - prev_values];
2778 char *valval = hivex_value_value (h, *prev_val, &t, &len);
2779 if (valval == NULL) goto leave_partial;
2782 value->value = valval;
2786 char *valkey = hivex_value_key (h, *prev_val);
2787 if (valkey == NULL) goto leave_partial;
2790 value->key = valkey;
2792 if (STRCASEEQ (valkey, val->key))
2793 idx_of_val = prev_val - prev_values;
2796 if (idx_of_val > -1) {
2797 free (values[idx_of_val].key);
2798 free (values[idx_of_val].value);
2800 idx_of_val = nr_values;
2804 hive_set_value *value = &values[idx_of_val];
2805 *value = (hive_set_value){
2806 .key = strdup (val->key),
2807 .value = malloc (val->len),
2812 if (value->key == NULL || value->value == NULL) goto leave_partial;
2813 memcpy (value->value, val->value, val->len);
2815 retval = hivex_node_set_values (h, node, nr_values, values, 0);
2818 for (int i = 0; i < alloc_ct; i += 2) {
2819 free (values[i / 2].value);
2820 if (i + 1 < alloc_ct && values[i / 2].key != NULL)
2821 free (values[i / 2].key);