1 (* Windows Registry reverse-engineering tool.
2 * Copyright (C) 2010 Red Hat Inc.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 * For existing information on the registry format, please refer
19 * to the following documents. Note they are both incomplete
20 * and inaccurate in some respects.
22 * http://www.sentinelchicken.com/data/TheWindowsNTRegistryFileFormat.pdf
23 * http://pogostick.net/~pnh/ntpasswd/WinReg.txt
30 open Visualizer_NT_time
33 if Array.length Sys.argv <> 2 then (
34 eprintf "Error: missing argument.
35 Usage: %s hivefile > out
37 'hivefile' is the input hive file from a Windows machine
38 'out' is an output file where we will write all the keys,
39 values etc for extended debugging purposes.
40 Errors, inconsistencies and unexpected fields in the hive file
41 are written to stderr.
42 " Sys.executable_name;
46 let filename = Sys.argv.(1)
47 let basename = Filename.basename filename
50 let bits = bitstring_of_file filename
52 (* Split into header + data at the 4KB boundary. *)
53 let header, data = takebits (4096 * 8) bits, dropbits (4096 * 8) bits
55 (* Define a persistent pattern which matches the header fields. By
56 * using persistent patterns, we can reuse them later in the
59 let bitmatch header_fields =
60 { "regf" : 4*8 : string;
61 seq1 : 4*8 : littleendian;
62 seq2 : 4*8 : littleendian;
64 : littleendian, bind (nt_to_time_t last_modified);
65 major : 4*8 : littleendian;
66 minor : 4*8 : littleendian;
68 (* "Type". Contains 0. *)
69 unknown1 : 4*8 : littleendian;
71 (* "Format". Contains 1. *)
72 unknown2 : 4*8 : littleendian;
75 : littleendian, bind (get_offset root_key);
77 : littleendian, bind (get_offset end_pages);
79 (* "Cluster". Contains 1. *)
80 unknown3 : 4*8 : littleendian;
82 filename : 64*8 : string;
84 (* All three GUIDs here confirmed in Windows 7 registries. In
85 * Windows <= 2003 these GUID fields seem to contain junk.
87 * If you write zeroes to the GUID fields, load and unload in Win7
88 * REGEDIT, then Windows 7 writes some random GUIDs.
90 * Also (on Win7) unknownguid1 == unknownguid2. unknownguid3 is
93 unknownguid1 : 16*8 : bitstring;
94 unknownguid2 : 16*8 : bitstring;
96 (* Wrote zero to unknown4, loaded and unloaded it in Win7 REGEDIT,
97 * and it still contained zero. In existing registries it seems to
98 * contain random junk.
100 unknown4 : 4*8 : littleendian;
101 unknownguid3 : 16*8 : bitstring;
103 (* If you write zero to unknown5, load and unload it in REGEDIT,
104 * Windows 7 puts the string "rmtm" here. Existing registries also
105 * seen containing this string. However on older Windows it can
108 unknown5 : 4*8 : string;
110 (* This seems to contain junk from other parts of the registry. I
111 * wrote zeroes here, loaded and unloaded it in Win7 REGEDIT, and
112 * it still contained zeroes.
114 unknown6 : 340*8 : bitstring;
116 : littleendian, save_offset_to (crc_offset),
117 check (assert (crc_offset = 0x1fc * 8); true);
118 unknown7 : (0x1000-0x200)*8 : bitstring }
120 let fprintf_header chan bits =
122 | { :header_fields } ->
124 "HD %6ld %6ld %s %ld.%ld %08lx %08lx %s %s %08lx %s %s %s %08lx %s %s %s %08lx %s\n"
125 seq1 seq2 (print_time last_modified) major minor
127 (print_offset root_key) (print_offset end_pages)
128 unknown3 (print_utf16 filename)
129 (print_guid unknownguid1) (print_guid unknownguid2)
130 unknown4 (print_guid unknownguid3) unknown5
131 (print_bitstring unknown6)
132 csum (print_bitstring unknown7)
134 (* Parse the header and check it. *)
135 let root_key, end_pages =
137 | { :header_fields } ->
138 fprintf_header stdout header;
141 eprintf "HD hive file major <> 1 (major.minor = %ld.%ld)\n"
144 eprintf "HD hive file sequence numbers should match (%ld <> %ld)\n"
146 if unknown1 <> 0_l then
147 eprintf "HD unknown1 field <> 0 (%08lx)\n" unknown1;
148 if unknown2 <> 1_l then
149 eprintf "HD unknown2 field <> 1 (%08lx)\n" unknown2;
150 if unknown3 <> 1_l then
151 eprintf "HD unknown3 field <> 1 (%08lx)\n" unknown3;
152 if not (equals unknownguid1 unknownguid2) then
153 eprintf "HD unknownguid1 <> unknownguid2 (%s, %s)\n"
154 (print_guid unknownguid1) (print_guid unknownguid2);
155 (* We think this is junk.
156 if unknown4 <> 0_l then
157 eprintf "HD unknown4 field <> 0 (%08lx)\n" unknown4;
159 if unknown5 <> "rmtm" && unknown5 <> "\000\000\000\000" then
160 eprintf "HD unknown5 field <> \"rmtm\" & <> zeroes (%s)\n" unknown5;
161 (* We think this is junk.
162 if not (is_zero_bitstring unknown6) then
163 eprintf "HD unknown6 area is not zero (%s)\n"
164 (print_bitstring unknown6);
166 if not (is_zero_bitstring unknown7) then
167 eprintf "HD unknown7 area is not zero (%s)\n"
168 (print_bitstring unknown7);
172 failwithf "%s: this doesn't look like a registry hive file\n" basename
174 (* Define persistent patterns to match page and block fields. *)
175 let bitmatch page_fields =
176 { "hbin" : 4*8 : string;
178 : littleendian, bind (get_offset page_offset);
180 : littleendian, check (Int32.rem page_size 4096_l = 0_l),
181 bind (Int32.to_int page_size);
183 (* In the first hbin in the file these fields contain something.
184 * In subsequent hbins these fields are all zero.
186 * From existing hives (first hbin only):
188 * unknown1 unknown2 unknown5
189 * 00 00 00 00 00 00 00 00 9C 77 3B 02 6A 7D CA 01 00 00 00 00
190 * 00 00 00 00 00 00 00 00 50 3A 15 07 B5 9B CA 01 00 00 00 00
191 * 00 00 00 00 00 00 00 00 57 86 90 D4 9A 58 CA 01 00 00 00 00
192 * 00 00 00 00 00 00 00 00 52 3F 90 9D CF 7C CA 01 00 00 00 00
193 * 00 00 00 00 00 00 00 00 E8 86 C1 17 BD 06 CA 01 00 00 00 00
194 * 00 00 00 00 00 00 00 00 4A 77 CE 7A CF 7C CA 01 00 00 00 00
195 * 00 00 00 00 00 00 00 00 E4 EA 23 FF 69 7D CA 01 00 00 00 00
196 * 00 00 00 00 00 00 00 00 50 13 BA 8D A2 9A CA 01 00 00 00 00
197 * 00 00 00 00 00 00 00 00 0E 07 93 13 BD 06 CA 01 00 00 00 00
198 * 00 00 00 00 00 00 00 00 9D 55 D0 B3 99 58 CA 01 00 00 00 00
199 * 00 00 00 00 00 00 00 00 46 AC FF 8B CF 7C CA 01 00 00 00 00
200 * 00 00 00 00 00 00 00 00 80 29 2D 02 6A 7D CA 01 00 00 00 00
201 * 00 00 00 00 00 00 00 00 90 8D 36 07 B5 9B CA 01 00 00 00 00
202 * 00 00 00 00 00 00 00 00 5C 9B 8B B8 6A 06 CA 01 00 00 00 00
203 * 00 00 00 00 00 00 00 00 85 9F BB 99 9A 58 CA 01 00 00 00 00
204 * 00 00 00 00 00 00 00 00 BE 3D 21 02 6A 7D CA 01 00 00 00 00
205 * 00 00 00 00 00 00 00 00 70 53 09 07 B5 9B CA 01 00 00 00 00
206 * 00 00 00 00 00 00 00 00 5B 62 42 B6 9A 58 CA 01 00 00 00 00
207 * 01 00 00 00 00 00 00 00 B2 46 9B 9E CF 7C CA 01 00 00 00 00
208 * 01 00 00 00 00 00 00 00 CA 88 EE 1A BD 06 CA 01 00 00 00 00
210 * From the above we worked out that fields 3 and 4 are an NT
211 * timestamp, which seems to be "last modified" (when REGEDIT
212 * unloads a hive it updates this timestamp even if nothing
215 unknown1 : 4*8 : littleendian; (* usually zero, occasionally 1 *)
216 unknown2 : 4*8 : littleendian; (* always zero? *)
219 bind (if page_offset = 0 then nt_to_time_t last_modified
221 assert (last_modified = 0_L);
225 (* The "B.D." document said this field contains the page size, but
226 * this is not true. This misinformation has been copied to the
227 * sentinelchicken documentation too.
229 unknown5 : 4*8 : littleendian; (* always zero? *)
231 (* Now the blocks in this page follow. *)
232 blocks : (page_size - 32) * 8 : bitstring;
234 rest : -1 : bitstring }
236 let fprintf_page chan bits =
238 | { :page_fields } ->
239 fprintf chan "HB %s %08x %08lx %08lx %s %08lx\n"
240 (print_offset page_offset)
241 page_size unknown1 unknown2
242 (if page_offset = 0 then print_time last_modified
243 else string_of_float last_modified) unknown5
245 let bitmatch block_fields =
247 : littleendian, bind (Int32.to_int seg_len);
248 block_data : (abs seg_len - 4) * 8 : bitstring;
249 rest : -1 : bitstring }
251 let fprintf_block chan block_offset bits =
253 | { :block_fields } ->
254 fprintf chan "BL %s %s %d\n"
255 (print_offset block_offset)
256 (if seg_len < 0 then "used" else "free")
257 (if seg_len < 0 then -seg_len else seg_len)
259 (* Iterate over the pages and blocks. In the process we will examine
260 * each page (hbin) header. Also we will build block_list which is a
261 * list of (block offset, length, used flag, data).
263 let block_list = ref []
265 let rec loop_over_pages data data_offset =
266 if data_offset < end_pages then (
268 | { rest : -1 : bitstring } when bitstring_length rest = 0 -> ()
270 | { :page_fields } ->
271 fprintf_page stdout data;
273 assert (page_offset = data_offset);
275 if data_offset = 0 then ( (* first hbin only *)
276 if unknown1 <> 0_l then
277 eprintf "HB %s unknown1 field <> 0 (%08lx)\n"
278 (print_offset page_offset) unknown1;
279 if unknown2 <> 0_l then
280 eprintf "HB %s unknown2 field <> 0 (%08lx)\n"
281 (print_offset page_offset) unknown2;
282 if unknown5 <> 0_l then
283 eprintf "HB %s unknown5 field <> 0 (%08lx)\n"
284 (print_offset page_offset) unknown5
285 ) else ( (* subsequent hbins *)
286 if unknown1 <> 0_l || unknown2 <> 0_l || unknown5 <> 0_l then
287 eprintf "HB %s unknown fields <> 0 (%08lx %08lx %08lx)\n"
288 (print_offset page_offset)
289 unknown1 unknown2 unknown5;
290 if last_modified <> 0. then
291 eprintf "HB %s last_modified <> 0. (%g)\n"
292 (print_offset page_offset) last_modified
295 (* Loop over the blocks in this page. *)
296 loop_over_blocks blocks (data_offset + 32);
298 (* Loop over rest of the pages. *)
299 loop_over_pages rest (data_offset + page_size)
302 failwithf "%s: invalid hbin at offset %s\n"
303 basename (print_offset data_offset)
305 (* Reached the end of the official hbins in this file, BUT the
306 * file can be larger than this and might contain stuff. What
307 * does it contain after the hbins? We think just junk, but
310 if not (is_zero_bitstring data) then (
311 eprintf "Junk in file after end of pages:\n";
312 let rec loop data data_offset =
314 | { rest : -1 : bitstring } when bitstring_length rest = 0 -> ()
315 | { :page_fields } ->
316 eprintf "\tjunk hbin %s 0x%08x\n"
317 (print_offset data_offset) page_size;
318 loop rest (data_offset + page_size);
320 eprintf "\tother junk %s %s\n"
321 (print_offset data_offset) (print_bitstring data)
323 loop data data_offset
326 and loop_over_blocks blocks block_offset =
328 | { rest : -1 : bitstring } when bitstring_length rest = 0 -> ()
330 | { :block_fields } ->
331 assert (block_offset mod 8 = 0);
333 fprintf_block stdout block_offset blocks;
336 if seg_len < 0 then true, -seg_len else false, seg_len in
338 let block = block_offset, (seg_len, used, block_data) in
339 block_list := block :: !block_list;
341 (* Loop over the rest of the blocks in this page. *)
342 loop_over_blocks rest (block_offset + seg_len)
345 failwithf "%s: invalid block near offset %s\n"
346 basename (print_offset block_offset)
348 loop_over_pages data 0
350 (* Turn the block_list into a map so we can quickly look up a block
353 let block_list = !block_list
356 fun map (block_offset, block) -> IntMap.add block_offset block map
357 ) IntMap.empty block_list
358 let lookup fn offset =
360 let (_, used, _) as block = IntMap.find offset block_map in
362 failwithf "%s: %s: lookup: free block %s referenced from hive tree"
363 basename fn (print_offset offset);
366 failwithf "%s: %s: lookup: unknown block %s referenced from hive tree"
367 basename fn (print_offset offset)
369 (* Use this to mark blocks that we've visited. If the hive contains
370 * no unreferenced blocks, then by the end this should just contain
373 let mark_visited, is_not_visited, unvisited_blocks =
374 let v = ref block_map in
375 let mark_visited offset = v := IntMap.remove offset !v
376 and is_not_visited offset = IntMap.mem offset !v
377 and unvisited_blocks () = !v in
378 mark_visited, is_not_visited, unvisited_blocks
380 (* Define persistent patterns to match nk-records, vk-records and
381 * sk-records, which are the record types that we especially want to
382 * analyze later. Other blocks types (eg. value lists, lf-records)
383 * have no "spare space" so everything is known about them and we don't
386 let bitmatch nk_fields =
387 { "nk" : 2*8 : string;
388 (* Flags stored in the file as a little endian word, hence the
392 predefinedhandle : 1; keynameascii : 1; symlinkkey : 1;
393 cannotbedeleted : 1; isroot : 1; ismountpoint : 1; isvolatile : 1;
394 unknownflag8000 : 1; unknownflag4000 : 1;
395 unknownflag2000 : 1; unknownflag1000 : 1;
396 unknownflag0800 : 1; unknownflag0400 : 1;
397 virtualstore : 1; virttarget : 1;
398 timestamp : 64 : littleendian, bind (nt_to_time_t timestamp);
399 unknown1 : 4*8 : littleendian;
400 parent : 4*8 : littleendian, bind (get_offset parent);
401 nr_subkeys : 4*8 : littleendian, bind (Int32.to_int nr_subkeys);
402 nr_subkeys_vol : 4*8;
403 subkeys : 4*8 : littleendian, bind (get_offset subkeys);
405 nr_values : 4*8 : littleendian, bind (Int32.to_int nr_values);
406 vallist : 4*8 : littleendian, bind (get_offset vallist);
407 sk : 4*8 : littleendian, bind (get_offset sk);
408 classname : 4*8 : littleendian, bind (get_offset classname);
409 (* sentinelchicken.com says this is a single 32 bit field
410 * containing maximum number of bytes in a subkey name, however
411 * that does not seem to be correct. We think it is several
412 * fields, the first being the maximum number of bytes in the
413 * UTF16-LE encoded version of the subkey names, (since subkey
414 * names are usually ASCII, that would be max length of names * 2).
415 * This is a historical maximum, so it can be greater than the
416 * current maximum name field.
418 * The remaining fields are often non-zero, but the purpose is
421 * In the hives we examined the other fields had values as
423 * userflags: 0, 2, 0xa, 0xe
424 * virtcontrolflags: 0, 1
427 max_subkey_name_len : 2*8 : littleendian;
428 unknown2_userflags : 4;
429 unknown2_virtcontrolflags : 4;
432 (* sentinelchicken.com says: maximum subkey CLASSNAME length,
433 * however that does not seem to be correct. In hives I looked
434 * at, it has value 0, 0xc, 0x10, 0x18, 0x1a, 0x28.
436 unknown3 : 4*8 : littleendian;
437 (* sentinelchicken.com says: maximum number of bytes in a value
438 * name, however that does not seem to be correct. We think it is
439 * the maximum number of bytes in the UTF16-LE encoded version of
440 * the value names (since value names are usually ASCII, that would
441 * be max length of names * 2). This is a historical maximum, so
442 * it can be greater than the current maximum name field.
444 max_vk_name_len : 4*8 : littleendian, bind (Int32.to_int max_vk_name_len);
445 (* sentinelchicken.com says: maximum value data size, and this
446 * agrees with my observations. It is the largest data size (not
447 * seg_len, but vk.data_len) for any value in this key. We think
448 * that this field is a historical max, so eg if a maximally sized
449 * value is deleted then this field is not reduced. Certainly
450 * max_vk_data_len >= the measured maximum in all the hives that we
453 max_vk_data_len : 4*8 : littleendian, bind (Int32.to_int max_vk_data_len);
454 unknown6 : 4*8 : littleendian;
455 name_len : 2*8 : littleendian;
456 classname_len : 2*8 : littleendian;
457 name : name_len * 8 : string }
459 let fprintf_nk chan nk =
460 let (_, _, bits) = lookup "fprintf_nk" nk in
464 "NK %s %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s %s %08lx %s %d %ld %s %08lx %d %s %s %s %d %x %x %x %08lx %d %d %08lx %d %d %s\n"
466 (if unknownflag8000 then "8" else ".")
467 (if unknownflag4000 then "4" else ".")
468 (if unknownflag2000 then "2" else ".")
469 (if unknownflag1000 then "1" else ".")
470 (if unknownflag0800 then "8" else ".")
471 (if unknownflag0400 then "4" else ".")
472 (if virtualstore then "s" else ".")
473 (if virttarget then "t" else ".")
474 (if virtmirrored then "m" else ".")
475 (if predefinedhandle then "P" else ".")
476 (if keynameascii then "A" else ".")
477 (if symlinkkey then "S" else ".")
478 (if cannotbedeleted then "N" else ".")
479 (if isroot then "R" else ".")
480 (if ismountpoint then "M" else ".")
481 (if isvolatile then "V" else ".")
482 (print_time timestamp)
483 unknown1 (print_offset parent) nr_subkeys nr_subkeys_vol
484 (print_offset subkeys) subkeys_vol
485 nr_values (print_offset vallist)
486 (print_offset sk) (print_offset classname)
488 unknown2_userflags unknown2_virtcontrolflags unknown2_debug
489 unknown3 max_vk_name_len max_vk_data_len unknown6
490 name_len classname_len name
492 type data_t = Inline of bitstring | Offset of int
493 let bitmatch vk_fields =
494 { "vk" : 2*8 : string;
495 name_len : 2*8 : littleendian;
496 (* Top bit set means that the data is stored inline. In that case
497 * the data length must be <= 4. The length can also be 0 (or
498 * 0x80000000) if the data type is NONE.
501 : littleendian, bind (
502 let is_inline = Int32.logand data_len 0x8000_0000_l = 0x8000_0000_l in
503 let data_len = Int32.to_int (Int32.logand data_len 0x7fff_ffff_l) in
504 if is_inline then assert (data_len <= 4) else assert (data_len > 4);
507 (* The data itself depends on the type field.
509 * For REG_SZ type, the data always seems to be NUL-terminated, which
510 * means because these strings are often UTF-16LE, that the string will
511 * end with \0\0 bytes. The termination bytes are included in data_len.
513 * For REG_MULTI_SZ, see
514 * http://blogs.msdn.com/oldnewthing/archive/2009/10/08/9904646.aspx
518 let is_inline, data_len = data_len in
520 Inline (takebits (data_len*8) data)
523 bitmatch data with { offset : 4*8 : littleendian } -> offset in
524 let offset = get_offset offset in
528 t : 4*8 : littleendian, bind (Int32.to_int t);
529 (* Flags, stored as a little-endian word: *)
531 nameisascii : 1; (* Clear for default [zero-length] name, always set
532 * otherwise in registries that we found. Perhaps this
533 * is really "nameisdefault" flag?
536 (* Unknown field, usually contains something. *)
537 unknown3 : 2*8 : littleendian;
538 name : name_len * 8 : string }
540 let fprintf_vk chan vk =
541 let (_, _, bits) = lookup "fprintf_vk" vk in
546 | Inline data -> data
548 let (_, _, bits) = lookup "fprintf_vk (data)" offset in
550 let is_inline, data_len = data_len in
551 fprintf chan "VK %s %s %s %d %s%s %s %08x %s %08x %08x\n"
553 name (if is_inline then "inline" else "-") data_len
556 | Offset offset -> "["^print_offset offset^"]")
557 (print_bitstring real_data)
559 unknown1 (if nameisascii then "A" else "L")
562 let bitmatch sk_fields =
563 { "sk" : 2*8 : string;
564 unknown1 : 2*8 : littleendian;
565 sk_next : 4*8 : littleendian, bind (get_offset sk_next);
566 sk_prev : 4*8 : littleendian, bind (get_offset sk_prev);
567 refcount : 4*8 : littleendian, bind (Int32.to_int refcount);
568 sec_len : 4*8 : littleendian, bind (Int32.to_int sec_len);
569 sec_desc : sec_len * 8 : bitstring }
571 let fprintf_sk chan sk =
572 let (_, _, bits) = lookup "fprintf_sk" sk in
575 fprintf chan "SK %s %04x %s %s %d %d\n"
576 (print_offset sk) unknown1
577 (print_offset sk_next) (print_offset sk_prev)
579 (* print_bitstring sec_desc -- suppress this *)
581 (* Store lists of records we encounter (lists of offsets). *)
582 let nk_records = ref []
583 and vk_records = ref []
584 and sk_records = ref []
586 (* Functions to visit each block, starting at the root. Each block
587 * that we visit is printed.
589 let rec visit_nk ?(nk_is_root = false) nk =
590 let (_, _, bits) = lookup "visit_nk" nk in
594 fprintf_nk stdout nk;
596 nk_records := nk :: !nk_records;
598 (* Check the isroot flag is only set on the root node. *)
599 assert (isroot = nk_is_root);
601 if unknownflag8000 then
602 eprintf "NK %s unknownflag8000 is set\n" (print_offset nk);
603 if unknownflag4000 then
604 eprintf "NK %s unknownflag4000 is set\n" (print_offset nk);
605 if unknownflag2000 then
606 eprintf "NK %s unknownflag2000 is set\n" (print_offset nk);
607 if unknownflag1000 then
608 eprintf "NK %s unknownflag1000 is set\n" (print_offset nk);
609 if unknownflag0800 then
610 eprintf "NK %s unknownflag0800 is set\n" (print_offset nk);
611 if unknownflag0400 then
612 eprintf "NK %s unknownflag0400 is set\n" (print_offset nk);
613 if unknown1 <> 0_l then
614 eprintf "NK %s unknown1 <> 0 (%08lx)\n" (print_offset nk) unknown1;
615 if unknown2_userflags <> 0 then
616 eprintf "NK %s unknown2_userflags <> 0 (%x)\n"
617 (print_offset nk) unknown2_userflags;
618 if unknown2_virtcontrolflags <> 0 then
619 eprintf "NK %s unknown2_virtcontrolflags <> 0 (%x)\n"
620 (print_offset nk) unknown2_virtcontrolflags;
621 if unknown2_debug <> 0 then
622 eprintf "NK %s unknown2_debug <> 0 (%x)\n"
623 (print_offset nk) unknown2_debug;
624 if unknown3 <> 0_l then
625 eprintf "NK %s unknown3 <> 0 (%08lx)\n" (print_offset nk) unknown3;
626 if unknown6 <> 0_l then
627 eprintf "NK %s unknown6 <> 0 (%08lx)\n" (print_offset nk) unknown6;
629 (* -- common, assume it's not an error
630 if classname = -1 then
631 eprintf "NK %s has no classname\n" (print_offset nk);
632 if classname_len = 0 then
633 eprintf "NK %s has zero-length classname\n" (print_offset nk);
636 eprintf "NK %s has no sk-record\n" (print_offset nk);
638 eprintf "NK %s has zero-length name\n" (print_offset nk);
640 (* Visit the values first at this node. *)
641 let max_data_len, max_name_len =
642 if vallist <> -1 then
643 visit_vallist nr_values vallist
647 if max_vk_data_len < max_data_len then
648 eprintf "NK %s nk.max_vk_data_len (%d) < actual max data_len (%d)\n"
649 (print_offset nk) max_vk_data_len max_data_len;
651 if max_vk_name_len < max_name_len * 2 then
652 eprintf "NK %s nk.max_vk_name_len (%d) < actual max name_len * 2 (%d)\n"
653 (print_offset nk) max_vk_name_len (max_name_len * 2);
655 (* Visit the subkeys of this node. *)
656 if subkeys <> -1 then (
657 let counted, max_name_len, _ = visit_subkeys subkeys in
659 if counted <> nr_subkeys then
660 failwithf "%s: incorrect count of subkeys (%d, counted %d) in subkey list at %s\n"
661 basename nr_subkeys counted (print_offset subkeys);
663 if max_subkey_name_len < max_name_len * 2 then
664 eprintf "NK %s nk.max_subkey_name_len (%d) < actual max name_len * 2 (%d)\n"
665 (print_offset nk) max_subkey_name_len (max_name_len * 2);
668 (* Visit the sk-record and classname. *)
671 if classname <> -1 then
672 visit_classname classname classname_len;
675 failwithf "%s: invalid nk block at offset %s\n"
676 basename (print_offset nk)
679 and visit_vallist nr_values vallist =
680 let (seg_len, _, bits) = lookup "visit_vallist" vallist in
681 mark_visited vallist;
682 printf "VL %s %d %d\n" (print_offset vallist) nr_values seg_len;
683 visit_values_in_vallist nr_values vallist bits
685 and visit_values_in_vallist nr_values vallist bits =
686 if nr_values > 0 then (
688 | { rest : -1 : bitstring } when bitstring_length rest = 0 ->
689 assert (nr_values = 0);
692 | { value : 4*8 : littleendian, bind (get_offset value);
693 rest : -1 : bitstring } ->
694 let data_len, name_len = visit_vk value in
695 let max_data_len, max_name_len =
696 visit_values_in_vallist (nr_values-1) vallist rest in
697 max max_data_len data_len, max max_name_len name_len
700 failwithf "%s: invalid offset in value list at %s\n"
701 basename (print_offset vallist)
705 let (_, _, bits) = lookup "visit_vk" vk in
710 fprintf_vk stdout vk;
712 let is_inline, data_len = data_len in
714 if unknown1 <> 0 then
715 eprintf "VK %s unknown1 flags set (%02x)\n"
716 (print_offset vk) unknown1;
717 if unknown2 <> 0 then
718 eprintf "VK %s unknown2 flags set (%02x)\n"
719 (print_offset vk) unknown2;
720 if unknown3 <> 0 then
721 eprintf "VK %s unknown3 flags set (%04x)\n"
722 (print_offset vk) unknown3;
724 (* Note this is common for default [ie. zero-length] key names. *)
725 if not nameisascii && name_len > 0 then
726 eprintf "VK %s has non-ASCII name flag set (name is %s)\n"
727 (print_offset vk) (print_binary_string name);
729 vk_records := vk :: !vk_records;
733 let _ = lookup "visit_vk (data)" offset in
740 failwithf "%s: invalid vk block at offset %s\n"
741 basename (print_offset vk)
744 (* Visits subkeys, recursing through intermediate lf/lh/ri structures,
745 * and returns the number of subkeys actually seen.
747 and visit_subkeys subkeys =
748 let (_, _, bits) = lookup "visit_subkeys" subkeys in
749 mark_visited subkeys;
751 | { "lf" : 2*8 : string;
752 len : 2*8 : littleendian; (* number of subkeys of this node *)
753 rest : len*8*8 : bitstring } ->
754 printf "LF %s %d\n" (print_offset subkeys) len;
755 visit_subkeys_in_lf_list false subkeys len rest
757 | { "lh" : 2*8 : string;
758 len : 2*8 : littleendian; (* number of subkeys of this node *)
759 rest : len*8*8 : bitstring } ->
760 printf "LF %s %d\n" (print_offset subkeys) len;
761 visit_subkeys_in_lf_list true subkeys len rest
763 | { "ri" : 2*8 : string;
764 len : 2*8 : littleendian;
765 rest : len*4*8 : bitstring } ->
766 printf "RI %s %d\n" (print_offset subkeys) len;
767 visit_subkeys_in_ri_list subkeys len rest
769 (* In theory you can have an li-record here, but we've never
773 | { "nk" : 2*8 : string } ->
775 let name, name_len = name_of_nk subkeys in
779 failwithf "%s: invalid subkey node found at %s\n"
780 basename (print_offset subkeys)
783 and visit_subkeys_in_lf_list newstyle_hash subkeys_top len bits =
786 | { rest : -1 : bitstring } when bitstring_length rest = 0 ->
790 | { offset : 4*8 : littleendian, bind (get_offset offset);
791 hash : 4*8 : bitstring;
792 rest : -1 : bitstring } ->
793 let c1, name_len1, name = visit_subkeys offset in
795 check_hash offset newstyle_hash hash name;
797 let c2, name_len2, _ =
798 visit_subkeys_in_lf_list newstyle_hash subkeys_top (len-1) rest in
799 c1 + c2, max name_len1 name_len2, ""
802 failwithf "%s: invalid subkey in lf/lh list at %s\n"
803 basename (print_offset subkeys_top)
806 and visit_subkeys_in_ri_list subkeys_top len bits =
809 | { rest : -1 : bitstring } when bitstring_length rest = 0 ->
813 | { offset : 4*8 : littleendian, bind (get_offset offset);
814 rest : -1 : bitstring } ->
815 let c1, name_len1, _ = visit_subkeys offset in
816 let c2, name_len2, _ =
817 visit_subkeys_in_ri_list subkeys_top (len-1) rest in
818 c1 + c2, max name_len1 name_len2, ""
821 failwithf "%s: invalid subkey in ri list at %s\n"
822 basename (print_offset subkeys_top)
825 and check_hash offset newstyle_hash hash name =
826 if not newstyle_hash then (
827 (* Old-style lf record hash the first four bytes of the name
830 let len = String.length name in
833 bitstring_of_string (String.sub name 0 4)
835 let zeroes = zeroes_bitstring ((4-len)*8) in
836 concat [bitstring_of_string name; zeroes]
838 if not (equals hash name_bits) then
839 eprintf "LF incorrect hash for name %s, expected %s, actual %s\n"
840 name (print_bitstring name_bits) (print_bitstring hash)
842 (* New-style lh record has a proper hash. *)
843 let actual = bitmatch hash with { hash : 4*8 : littleendian } -> hash in
847 h := Int32.mul !h 37_l;
848 h := Int32.add !h (Int32.of_int (Char.code (Char.uppercase c)))
851 eprintf "LH incorrect hash for name %s, expected 0x%08lx, actual 0x%08lx\n"
856 let (_, _, bits) = lookup "name_of_nk" nk in
858 | { :nk_fields } -> name, name_len
861 let (_, _, bits) = lookup "visit_sk" sk in
862 if is_not_visited sk then (
866 fprintf_sk stdout sk;
868 if unknown1 <> 0 then
869 eprintf "SK %s unknown1 <> 0 (%04x)\n" (print_offset sk) unknown1;
871 sk_records := sk :: !sk_records
874 failwithf "%s: invalid sk-record at %s\n"
875 basename (print_offset sk)
879 and visit_classname classname classname_len =
880 let (seg_len, _, bits) = lookup "visit_classname" classname in
881 mark_visited classname;
882 assert (seg_len >= classname_len);
883 printf "CL %s %s\n" (print_offset classname) (print_bitstring bits)
886 visit_nk ~nk_is_root:true root_key
888 (* These are immutable now. *)
889 let nk_records = !nk_records
890 let vk_records = !vk_records
891 let sk_records = !sk_records
893 (* So we can rapidly tell what is an nk/vk/sk offset. *)
895 List.fold_left (fun set offs -> IntSet.add offs set) IntSet.empty nk_records
897 List.fold_left (fun set offs -> IntSet.add offs set) IntSet.empty vk_records
899 List.fold_left (fun set offs -> IntSet.add offs set) IntSet.empty sk_records
901 (* Now after visiting all the blocks, are there any used blocks which
902 * are unvisited? If there are any then that would indicate either (a)
903 * that the hive contains unreferenced blocks, or (b) that there are
904 * referenced blocks that we did not visit because we don't have a full
905 * understanding of the hive format.
907 * Windows 7 registries often contain a few of these -- not clear
908 * how serious they are, but don't fail here.
911 let unvisited = unvisited_blocks () in
915 | (_, false, _) -> () (* ignore unused blocks *)
916 | (seg_len, true, _) ->
917 eprintf "used block %s (length %d) is not referenced\n"
918 (print_offset offset) seg_len
921 (* Check the SKs are:
922 * (a) linked into a single circular list through the sk_prev/sk_next
924 * (b) refcounts are correct
927 if List.length sk_records > 0 then (
928 let sk0 = List.hd sk_records in (* start at any arbitrary sk *)
929 (* This loop follows the chain of sk pointers until we arrive
930 * back at the original, checking prev/next are consistent.
932 let rec loop visited prevsk sk =
934 if not (IntSet.mem sk sk_set) then
935 eprintf "SK %s not an sk-record (faulty sk_next somewhere)\n"
938 let _, _, bits = lookup "loop sk circular list" sk in
941 if sk_prev <> prevsk then
942 eprintf "SK %s sk_prev != previous sk (%s, %s)\n"
944 (print_offset sk_prev) (print_offset prevsk);
945 if IntSet.mem sk visited then
946 eprintf "SK %s already visited (bad circular list)\n"
948 let visited = IntSet.add sk visited in
949 loop visited sk sk_next
953 let _, _, bits = lookup "start sk circular list" sk0 in
956 loop IntSet.empty sk_prev sk0
959 (* For every nk-record, if it references an sk-record count that,
960 * then check this matches the refcounts in the sk-records
963 let refcounts = Counter.create () in
966 let _, _, bits = lookup "sk refcounter (nk)" nk in
969 Counter.incr refcounts sk
975 let _, _, bits = lookup "sk refcounter (sk)" sk in
978 let actual = Counter.get refcounts sk in
979 if actual <> refcount then
980 eprintf "SK %s incorrect refcount (actual %d, in file %d)\n"
981 (print_offset sk) actual refcount