--- /dev/null
+(* Windows Registry reverse-engineering tool.
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * For existing information on the registry format, please refer
+ * to the following documents. Note they are both incomplete
+ * and inaccurate in some respects.
+ *
+ * http://www.sentinelchicken.com/data/TheWindowsNTRegistryFileFormat.pdf
+ * http://pogostick.net/~pnh/ntpasswd/WinReg.txt
+ *)
+
+open Bitstring
+open ExtString
+open Printf
+open Visualizer_utils
+open Visualizer_NT_time
+
+let () =
+ if Array.length Sys.argv <> 2 then (
+ eprintf "Error: missing argument.
+Usage: %s hivefile > out
+where
+ 'hivefile' is the input hive file from a Windows machine
+ 'out' is an output file where we will write all the keys,
+ values etc for extended debugging purposes.
+Errors, inconsistencies and unexpected fields in the hive file
+are written to stderr.
+" Sys.executable_name;
+ exit 1
+ )
+
+let filename = Sys.argv.(1)
+let basename = Filename.basename filename
+
+(* Load the file. *)
+let bits = bitstring_of_file filename
+
+(* Split into header + data at the 4KB boundary. *)
+let header, data = takebits (4096 * 8) bits, dropbits (4096 * 8) bits
+
+(* Define a persistent pattern which matches the header fields. By
+ * using persistent patterns, we can reuse them later in the
+ * program.
+ *)
+let bitmatch header_fields =
+ { "regf" : 4*8 : string;
+ seq1 : 4*8 : littleendian;
+ seq2 : 4*8 : littleendian;
+ last_modified : 64
+ : littleendian, bind (nt_to_time_t last_modified);
+ major : 4*8 : littleendian;
+ minor : 4*8 : littleendian;
+
+ (* "Type". Contains 0. *)
+ unknown1 : 4*8 : littleendian;
+
+ (* "Format". Contains 1. *)
+ unknown2 : 4*8 : littleendian;
+
+ root_key : 4*8
+ : littleendian, bind (get_offset root_key);
+ end_pages : 4*8
+ : littleendian, bind (get_offset end_pages);
+
+ (* "Cluster". Contains 1. *)
+ unknown3 : 4*8 : littleendian;
+
+ filename : 64*8 : string;
+
+ (* All three GUIDs here confirmed in Windows 7 registries. In
+ * Windows <= 2003 these GUID fields seem to contain junk.
+ *
+ * If you write zeroes to the GUID fields, load and unload in Win7
+ * REGEDIT, then Windows 7 writes some random GUIDs.
+ *
+ * Also (on Win7) unknownguid1 == unknownguid2. unknownguid3 is
+ * different.
+ *)
+ unknownguid1 : 16*8 : bitstring;
+ unknownguid2 : 16*8 : bitstring;
+
+ (* Wrote zero to unknown4, loaded and unloaded it in Win7 REGEDIT,
+ * and it still contained zero. In existing registries it seems to
+ * contain random junk.
+ *)
+ unknown4 : 4*8 : littleendian;
+ unknownguid3 : 16*8 : bitstring;
+
+ (* If you write zero to unknown5, load and unload it in REGEDIT,
+ * Windows 7 puts the string "rmtm" here. Existing registries also
+ * seen containing this string. However on older Windows it can
+ * be all zeroes.
+ *)
+ unknown5 : 4*8 : string;
+
+ (* This seems to contain junk from other parts of the registry. I
+ * wrote zeroes here, loaded and unloaded it in Win7 REGEDIT, and
+ * it still contained zeroes.
+ *)
+ unknown6 : 340*8 : bitstring;
+ csum : 4*8
+ : littleendian, save_offset_to (crc_offset),
+ check (assert (crc_offset = 0x1fc * 8); true);
+ unknown7 : (0x1000-0x200)*8 : bitstring }
+
+let fprintf_header chan bits =
+ bitmatch bits with
+ | { :header_fields } ->
+ fprintf chan
+ "HD %6ld %6ld %s %ld.%ld %08lx %08lx %s %s %08lx %s %s %s %08lx %s %s %s %08lx %s\n"
+ seq1 seq2 (print_time last_modified) major minor
+ unknown1 unknown2
+ (print_offset root_key) (print_offset end_pages)
+ unknown3 (print_utf16 filename)
+ (print_guid unknownguid1) (print_guid unknownguid2)
+ unknown4 (print_guid unknownguid3) unknown5
+ (print_bitstring unknown6)
+ csum (print_bitstring unknown7)
+
+(* Parse the header and check it. *)
+let root_key, end_pages =
+ bitmatch header with
+ | { :header_fields } ->
+ fprintf_header stdout header;
+
+ if major <> 1_l then
+ eprintf "HD hive file major <> 1 (major.minor = %ld.%ld)\n"
+ major minor;
+ if seq1 <> seq2 then
+ eprintf "HD hive file sequence numbers should match (%ld <> %ld)\n"
+ seq1 seq2;
+ if unknown1 <> 0_l then
+ eprintf "HD unknown1 field <> 0 (%08lx)\n" unknown1;
+ if unknown2 <> 1_l then
+ eprintf "HD unknown2 field <> 1 (%08lx)\n" unknown2;
+ if unknown3 <> 1_l then
+ eprintf "HD unknown3 field <> 1 (%08lx)\n" unknown3;
+ if not (equals unknownguid1 unknownguid2) then
+ eprintf "HD unknownguid1 <> unknownguid2 (%s, %s)\n"
+ (print_guid unknownguid1) (print_guid unknownguid2);
+ (* We think this is junk.
+ if unknown4 <> 0_l then
+ eprintf "HD unknown4 field <> 0 (%08lx)\n" unknown4;
+ *)
+ if unknown5 <> "rmtm" && unknown5 <> "\000\000\000\000" then
+ eprintf "HD unknown5 field <> \"rmtm\" & <> zeroes (%s)\n" unknown5;
+ (* We think this is junk.
+ if not (is_zero_bitstring unknown6) then
+ eprintf "HD unknown6 area is not zero (%s)\n"
+ (print_bitstring unknown6);
+ *)
+ if not (is_zero_bitstring unknown7) then
+ eprintf "HD unknown7 area is not zero (%s)\n"
+ (print_bitstring unknown7);
+
+ root_key, end_pages
+ | {_} ->
+ failwithf "%s: this doesn't look like a registry hive file\n" basename
+
+(* Define persistent patterns to match page and block fields. *)
+let bitmatch page_fields =
+ { "hbin" : 4*8 : string;
+ page_offset : 4*8
+ : littleendian, bind (get_offset page_offset);
+ page_size : 4*8
+ : littleendian, check (Int32.rem page_size 4096_l = 0_l),
+ bind (Int32.to_int page_size);
+
+ (* In the first hbin in the file these fields contain something.
+ * In subsequent hbins these fields are all zero.
+ *
+ * From existing hives (first hbin only):
+ *
+ * unknown1 unknown2 unknown5
+ * 00 00 00 00 00 00 00 00 9C 77 3B 02 6A 7D CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 50 3A 15 07 B5 9B CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 57 86 90 D4 9A 58 CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 52 3F 90 9D CF 7C CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 E8 86 C1 17 BD 06 CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 4A 77 CE 7A CF 7C CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 E4 EA 23 FF 69 7D CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 50 13 BA 8D A2 9A CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 0E 07 93 13 BD 06 CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 9D 55 D0 B3 99 58 CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 46 AC FF 8B CF 7C CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 80 29 2D 02 6A 7D CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 90 8D 36 07 B5 9B CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 5C 9B 8B B8 6A 06 CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 85 9F BB 99 9A 58 CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 BE 3D 21 02 6A 7D CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 70 53 09 07 B5 9B CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 5B 62 42 B6 9A 58 CA 01 00 00 00 00
+ * 01 00 00 00 00 00 00 00 B2 46 9B 9E CF 7C CA 01 00 00 00 00
+ * 01 00 00 00 00 00 00 00 CA 88 EE 1A BD 06 CA 01 00 00 00 00
+ *
+ * From the above we worked out that fields 3 and 4 are an NT
+ * timestamp, which seems to be "last modified" (when REGEDIT
+ * unloads a hive it updates this timestamp even if nothing
+ * has been changed).
+ *)
+ unknown1 : 4*8 : littleendian; (* usually zero, occasionally 1 *)
+ unknown2 : 4*8 : littleendian; (* always zero? *)
+ last_modified : 64
+ : littleendian,
+ bind (if page_offset = 0 then nt_to_time_t last_modified
+ else (
+ assert (last_modified = 0_L);
+ 0.
+ )
+ );
+ (* The "B.D." document said this field contains the page size, but
+ * this is not true. This misinformation has been copied to the
+ * sentinelchicken documentation too.
+ *)
+ unknown5 : 4*8 : littleendian; (* always zero? *)
+
+ (* Now the blocks in this page follow. *)
+ blocks : (page_size - 32) * 8 : bitstring;
+
+ rest : -1 : bitstring }
+
+let fprintf_page chan bits =
+ bitmatch bits with
+ | { :page_fields } ->
+ fprintf chan "HB %s %08x %08lx %08lx %s %08lx\n"
+ (print_offset page_offset)
+ page_size unknown1 unknown2
+ (if page_offset = 0 then print_time last_modified
+ else string_of_float last_modified) unknown5
+
+let bitmatch block_fields =
+ { seg_len : 4*8
+ : littleendian, bind (Int32.to_int seg_len);
+ block_data : (abs seg_len - 4) * 8 : bitstring;
+ rest : -1 : bitstring }
+
+let fprintf_block chan block_offset bits =
+ bitmatch bits with
+ | { :block_fields } ->
+ fprintf chan "BL %s %s %d\n"
+ (print_offset block_offset)
+ (if seg_len < 0 then "used" else "free")
+ (if seg_len < 0 then -seg_len else seg_len)
+
+(* Iterate over the pages and blocks. In the process we will examine
+ * each page (hbin) header. Also we will build block_list which is a
+ * list of (block offset, length, used flag, data).
+ *)
+let block_list = ref []
+let () =
+ let rec loop_over_pages data data_offset =
+ if data_offset < end_pages then (
+ bitmatch data with
+ | { rest : -1 : bitstring } when bitstring_length rest = 0 -> ()
+
+ | { :page_fields } ->
+ fprintf_page stdout data;
+
+ assert (page_offset = data_offset);
+
+ if data_offset = 0 then ( (* first hbin only *)
+ if unknown1 <> 0_l then
+ eprintf "HB %s unknown1 field <> 0 (%08lx)\n"
+ (print_offset page_offset) unknown1;
+ if unknown2 <> 0_l then
+ eprintf "HB %s unknown2 field <> 0 (%08lx)\n"
+ (print_offset page_offset) unknown2;
+ if unknown5 <> 0_l then
+ eprintf "HB %s unknown5 field <> 0 (%08lx)\n"
+ (print_offset page_offset) unknown5
+ ) else ( (* subsequent hbins *)
+ if unknown1 <> 0_l || unknown2 <> 0_l || unknown5 <> 0_l then
+ eprintf "HB %s unknown fields <> 0 (%08lx %08lx %08lx)\n"
+ (print_offset page_offset)
+ unknown1 unknown2 unknown5;
+ if last_modified <> 0. then
+ eprintf "HB %s last_modified <> 0. (%g)\n"
+ (print_offset page_offset) last_modified
+ );
+
+ (* Loop over the blocks in this page. *)
+ loop_over_blocks blocks (data_offset + 32);
+
+ (* Loop over rest of the pages. *)
+ loop_over_pages rest (data_offset + page_size)
+
+ | {_} ->
+ failwithf "%s: invalid hbin at offset %s\n"
+ basename (print_offset data_offset)
+ ) else (
+ (* Reached the end of the official hbins in this file, BUT the
+ * file can be larger than this and might contain stuff. What
+ * does it contain after the hbins? We think just junk, but
+ * we're not sure.
+ *)
+ if not (is_zero_bitstring data) then (
+ eprintf "Junk in file after end of pages:\n";
+ let rec loop data data_offset =
+ bitmatch data with
+ | { rest : -1 : bitstring } when bitstring_length rest = 0 -> ()
+ | { :page_fields } ->
+ eprintf "\tjunk hbin %s 0x%08x\n"
+ (print_offset data_offset) page_size;
+ loop rest (data_offset + page_size);
+ | { _ } ->
+ eprintf "\tother junk %s %s\n"
+ (print_offset data_offset) (print_bitstring data)
+ in
+ loop data data_offset
+ )
+ )
+ and loop_over_blocks blocks block_offset =
+ bitmatch blocks with
+ | { rest : -1 : bitstring } when bitstring_length rest = 0 -> ()
+
+ | { :block_fields } ->
+ assert (block_offset mod 8 = 0);
+
+ fprintf_block stdout block_offset blocks;
+
+ let used, seg_len =
+ if seg_len < 0 then true, -seg_len else false, seg_len in
+
+ let block = block_offset, (seg_len, used, block_data) in
+ block_list := block :: !block_list;
+
+ (* Loop over the rest of the blocks in this page. *)
+ loop_over_blocks rest (block_offset + seg_len)
+
+ | {_} ->
+ failwithf "%s: invalid block near offset %s\n"
+ basename (print_offset block_offset)
+ in
+ loop_over_pages data 0
+
+(* Turn the block_list into a map so we can quickly look up a block
+ * from its offset.
+ *)
+let block_list = !block_list
+let block_map =
+ List.fold_left (
+ fun map (block_offset, block) -> IntMap.add block_offset block map
+ ) IntMap.empty block_list
+let lookup fn offset =
+ try
+ let (_, used, _) as block = IntMap.find offset block_map in
+ if not used then
+ failwithf "%s: %s: lookup: free block %s referenced from hive tree"
+ basename fn (print_offset offset);
+ block
+ with Not_found ->
+ failwithf "%s: %s: lookup: unknown block %s referenced from hive tree"
+ basename fn (print_offset offset)
+
+(* Use this to mark blocks that we've visited. If the hive contains
+ * no unreferenced blocks, then by the end this should just contain
+ * free blocks.
+ *)
+let mark_visited, is_not_visited, unvisited_blocks =
+ let v = ref block_map in
+ let mark_visited offset = v := IntMap.remove offset !v
+ and is_not_visited offset = IntMap.mem offset !v
+ and unvisited_blocks () = !v in
+ mark_visited, is_not_visited, unvisited_blocks
+
+(* Define persistent patterns to match nk-records, vk-records and
+ * sk-records, which are the record types that we especially want to
+ * analyze later. Other blocks types (eg. value lists, lf-records)
+ * have no "spare space" so everything is known about them and we don't
+ * store these.
+ *)
+let bitmatch nk_fields =
+ { "nk" : 2*8 : string;
+ (* Flags stored in the file as a little endian word, hence the
+ * unusual ordering:
+ *)
+ virtmirrored : 1;
+ predefinedhandle : 1; keynameascii : 1; symlinkkey : 1;
+ cannotbedeleted : 1; isroot : 1; ismountpoint : 1; isvolatile : 1;
+ unknownflag8000 : 1; unknownflag4000 : 1;
+ unknownflag2000 : 1; unknownflag1000 : 1;
+ unknownflag0800 : 1; unknownflag0400 : 1;
+ virtualstore : 1; virttarget : 1;
+ timestamp : 64 : littleendian, bind (nt_to_time_t timestamp);
+ unknown1 : 4*8 : littleendian;
+ parent : 4*8 : littleendian, bind (get_offset parent);
+ nr_subkeys : 4*8 : littleendian, bind (Int32.to_int nr_subkeys);
+ nr_subkeys_vol : 4*8;
+ subkeys : 4*8 : littleendian, bind (get_offset subkeys);
+ subkeys_vol : 4*8;
+ nr_values : 4*8 : littleendian, bind (Int32.to_int nr_values);
+ vallist : 4*8 : littleendian, bind (get_offset vallist);
+ sk : 4*8 : littleendian, bind (get_offset sk);
+ classname : 4*8 : littleendian, bind (get_offset classname);
+ (* sentinelchicken.com says this is a single 32 bit field
+ * containing maximum number of bytes in a subkey name, however
+ * that does not seem to be correct. We think it is two 16 bit
+ * fields, the first being the maximum number of bytes in the
+ * UTF16-LE encoded version of the subkey names, (since subkey
+ * names are usually ASCII, that would be max length of names * 2).
+ * This is a historical maximum, so it can be greater than the
+ * current maximum name field.
+ *
+ * The second field is often non-zero, but the purpose is unknown.
+ * In the hives we examined it had values 0, 1, 0x20, 0x21, 0xa0,
+ * 0xa1, 0xe1, suggesting some sort of flags.
+ *)
+ max_subkey_name_len : 2*8 : littleendian;
+ unknown2 : 2*8 : littleendian;
+ (* sentinelchicken.com says: maximum subkey CLASSNAME length,
+ * however that does not seem to be correct. In hives I looked
+ * at, it has value 0, 0xc, 0x10, 0x18, 0x1a, 0x28.
+ *)
+ unknown3 : 4*8 : littleendian;
+ (* sentinelchicken.com says: maximum number of bytes in a value
+ * name, however that does not seem to be correct. We think it is
+ * the maximum number of bytes in the UTF16-LE encoded version of
+ * the value names (since value names are usually ASCII, that would
+ * be max length of names * 2). This is a historical maximum, so
+ * it can be greater than the current maximum name field.
+ *)
+ max_vk_name_len : 4*8 : littleendian, bind (Int32.to_int max_vk_name_len);
+ (* sentinelchicken.com says: maximum value data size, and this
+ * agrees with my observations. It is the largest data size (not
+ * seg_len, but vk.data_len) for any value in this key. We think
+ * that this field is a historical max, so eg if a maximally sized
+ * value is deleted then this field is not reduced. Certainly
+ * max_vk_data_len >= the measured maximum in all the hives that we
+ * have observed.
+ *)
+ max_vk_data_len : 4*8 : littleendian, bind (Int32.to_int max_vk_data_len);
+ unknown6 : 4*8 : littleendian;
+ name_len : 2*8 : littleendian;
+ classname_len : 2*8 : littleendian;
+ name : name_len * 8 : string }
+
+let fprintf_nk chan nk =
+ let (_, _, bits) = lookup "fprintf_nk" nk in
+ bitmatch bits with
+ | { :nk_fields } ->
+ fprintf chan
+ "NK %s %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s %s %08lx %s %d %ld %s %08lx %d %s %s %s %d %04x %08lx %d %d %08lx %d %d %s\n"
+ (print_offset nk)
+ (if unknownflag8000 then "8" else ".")
+ (if unknownflag4000 then "4" else ".")
+ (if unknownflag2000 then "2" else ".")
+ (if unknownflag1000 then "1" else ".")
+ (if unknownflag0800 then "8" else ".")
+ (if unknownflag0400 then "4" else ".")
+ (if virtualstore then "s" else ".")
+ (if virttarget then "t" else ".")
+ (if virtmirrored then "m" else ".")
+ (if predefinedhandle then "P" else ".")
+ (if keynameascii then "A" else ".")
+ (if symlinkkey then "S" else ".")
+ (if cannotbedeleted then "N" else ".")
+ (if isroot then "R" else ".")
+ (if ismountpoint then "M" else ".")
+ (if isvolatile then "V" else ".")
+ (print_time timestamp)
+ unknown1 (print_offset parent) nr_subkeys nr_subkeys_vol
+ (print_offset subkeys) subkeys_vol
+ nr_values (print_offset vallist)
+ (print_offset sk) (print_offset classname)
+ max_subkey_name_len unknown2 unknown3
+ max_vk_name_len max_vk_data_len unknown6
+ name_len classname_len name
+
+type data_t = Inline of bitstring | Offset of int
+let bitmatch vk_fields =
+ { "vk" : 2*8 : string;
+ name_len : 2*8 : littleendian;
+ (* No one documents the important fact that data_len can have the
+ * top bit set (randomly or is it meaningful?). The length can
+ * also be 0 (or 0x80000000) if the data type is NONE.
+ *)
+ data_len : 4*8
+ : littleendian, bind (
+ let data_len = Int32.logand data_len 0x7fff_ffff_l in
+ Int32.to_int data_len
+ );
+ (* Inline data if len <= 4, offset otherwise.
+ *
+ * The data itself depends on the type field.
+ *
+ * For REG_SZ type, the data always seems to be NUL-terminated, which
+ * means because these strings are often UTF-16LE, that the string will
+ * end with \0\0 bytes. The termination bytes are included in data_len.
+ *
+ * For REG_MULTI_SZ, see
+ * http://blogs.msdn.com/oldnewthing/archive/2009/10/08/9904646.aspx
+ *)
+ data : 4*8
+ : bitstring, bind (
+ if data_len <= 4 then
+ Inline (takebits (data_len*8) data)
+ else (
+ let offset =
+ bitmatch data with { offset : 4*8 : littleendian } -> offset in
+ let offset = get_offset offset in
+ Offset offset
+ )
+ );
+ t : 4*8 : littleendian, bind (Int32.to_int t);
+ (* Flags, stored as a little-endian word: *)
+ unknown1 : 7;
+ nameisascii : 1; (* Clear for default [zero-length] name, always set
+ * otherwise in registries that we found. Perhaps this
+ * is really "nameisdefault" flag?
+ *)
+ unknown2 : 8;
+ (* Unknown field, usually contains something. *)
+ unknown3 : 2*8 : littleendian;
+ name : name_len * 8 : string }
+
+let fprintf_vk chan vk =
+ let (_, _, bits) = lookup "fprintf_vk" vk in
+ bitmatch bits with
+ | { :vk_fields } ->
+ let real_data =
+ match data with
+ | Inline data -> data
+ | Offset offset ->
+ let (_, _, bits) = lookup "fprintf_vk (data)" offset in
+ bits in
+ fprintf chan "VK %s %s %d %s%s %s %08x %s %08x %08x\n"
+ (print_offset vk)
+ name data_len
+ (match data with
+ | Inline _ -> ""
+ | Offset offset -> "["^print_offset offset^"]")
+ (print_bitstring real_data)
+ (print_vk_type t)
+ unknown1 (if nameisascii then "A" else "L")
+ unknown2 unknown3
+
+let bitmatch sk_fields =
+ { "sk" : 2*8 : string;
+ unknown1 : 2*8 : littleendian;
+ sk_next : 4*8 : littleendian, bind (get_offset sk_next);
+ sk_prev : 4*8 : littleendian, bind (get_offset sk_prev);
+ refcount : 4*8 : littleendian, bind (Int32.to_int refcount);
+ sec_len : 4*8 : littleendian, bind (Int32.to_int sec_len);
+ sec_desc : sec_len * 8 : bitstring }
+
+let fprintf_sk chan sk =
+ let (_, _, bits) = lookup "fprintf_sk" sk in
+ bitmatch bits with
+ | { :sk_fields } ->
+ fprintf chan "SK %s %04x %s %s %d %d\n"
+ (print_offset sk) unknown1
+ (print_offset sk_next) (print_offset sk_prev)
+ refcount sec_len
+ (* print_bitstring sec_desc -- suppress this *)
+
+(* Store lists of records we encounter (lists of offsets). *)
+let nk_records = ref []
+and vk_records = ref []
+and sk_records = ref []
+
+(* Functions to visit each block, starting at the root. Each block
+ * that we visit is printed.
+ *)
+let rec visit_nk ?(nk_is_root = false) nk =
+ let (_, _, bits) = lookup "visit_nk" nk in
+ mark_visited nk;
+ (bitmatch bits with
+ | { :nk_fields } ->
+ fprintf_nk stdout nk;
+
+ nk_records := nk :: !nk_records;
+
+ (* Check the isroot flag is only set on the root node. *)
+ assert (isroot = nk_is_root);
+
+ if unknownflag8000 then
+ eprintf "NK %s unknownflag8000 is set\n" (print_offset nk);
+ if unknownflag4000 then
+ eprintf "NK %s unknownflag4000 is set\n" (print_offset nk);
+ if unknownflag2000 then
+ eprintf "NK %s unknownflag2000 is set\n" (print_offset nk);
+ if unknownflag1000 then
+ eprintf "NK %s unknownflag1000 is set\n" (print_offset nk);
+ if unknownflag0800 then
+ eprintf "NK %s unknownflag0800 is set\n" (print_offset nk);
+ if unknownflag0400 then
+ eprintf "NK %s unknownflag0400 is set\n" (print_offset nk);
+ if unknown1 <> 0_l then
+ eprintf "NK %s unknown1 <> 0 (%08lx)\n" (print_offset nk) unknown1;
+ if unknown2 <> 0 then
+ eprintf "NK %s unknown2 <> 0 (%04x)\n" (print_offset nk) unknown2;
+ if unknown3 <> 0_l then
+ eprintf "NK %s unknown3 <> 0 (%08lx)\n" (print_offset nk) unknown3;
+ if unknown6 <> 0_l then
+ eprintf "NK %s unknown6 <> 0 (%08lx)\n" (print_offset nk) unknown6;
+
+ (* -- common, assume it's not an error
+ if classname = -1 then
+ eprintf "NK %s has no classname\n" (print_offset nk);
+ if classname_len = 0 then
+ eprintf "NK %s has zero-length classname\n" (print_offset nk);
+ *)
+ if sk = -1 then
+ eprintf "NK %s has no sk-record\n" (print_offset nk);
+ if name_len = 0 then
+ eprintf "NK %s has zero-length name\n" (print_offset nk);
+
+ (* Visit the values first at this node. *)
+ let max_data_len, max_name_len =
+ if vallist <> -1 then
+ visit_vallist nr_values vallist
+ else
+ 0, 0 in
+
+ if max_vk_data_len < max_data_len then
+ eprintf "NK %s nk.max_vk_data_len (%d) < actual max data_len (%d)\n"
+ (print_offset nk) max_vk_data_len max_data_len;
+
+ if max_vk_name_len < max_name_len * 2 then
+ eprintf "NK %s nk.max_vk_name_len (%d) < actual max name_len * 2 (%d)\n"
+ (print_offset nk) max_vk_name_len (max_name_len * 2);
+
+ (* Visit the subkeys of this node. *)
+ if subkeys <> -1 then (
+ let counted, max_name_len = visit_subkeys subkeys in
+
+ if counted <> nr_subkeys then
+ failwithf "%s: incorrect count of subkeys (%d, counted %d) in subkey list at %s\n"
+ basename nr_subkeys counted (print_offset subkeys);
+
+ if max_subkey_name_len < max_name_len * 2 then
+ eprintf "NK %s nk.max_subkey_name_len (%d) < actual max name_len * 2 (%d)\n"
+ (print_offset nk) max_subkey_name_len (max_name_len * 2);
+ );
+
+ (* Visit the sk-record and classname. *)
+ if sk <> -1 then
+ visit_sk sk;
+ if classname <> -1 then
+ visit_classname classname classname_len;
+
+ | {_} ->
+ failwithf "%s: invalid nk block at offset %s\n"
+ basename (print_offset nk)
+ )
+
+and visit_vallist nr_values vallist =
+ let (seg_len, _, bits) = lookup "visit_vallist" vallist in
+ mark_visited vallist;
+ printf "VL %s %d %d\n" (print_offset vallist) nr_values seg_len;
+ visit_values_in_vallist nr_values vallist bits
+
+and visit_values_in_vallist nr_values vallist bits =
+ if nr_values > 0 then (
+ bitmatch bits with
+ | { rest : -1 : bitstring } when bitstring_length rest = 0 ->
+ assert (nr_values = 0);
+ 0, 0
+
+ | { value : 4*8 : littleendian, bind (get_offset value);
+ rest : -1 : bitstring } ->
+ let data_len, name_len = visit_vk value in
+ let max_data_len, max_name_len =
+ visit_values_in_vallist (nr_values-1) vallist rest in
+ max max_data_len data_len, max max_name_len name_len
+
+ | {_} ->
+ failwithf "%s: invalid offset in value list at %s\n"
+ basename (print_offset vallist)
+ ) else 0, 0
+
+and visit_vk vk =
+ let (_, _, bits) = lookup "visit_vk" vk in
+ mark_visited vk;
+
+ (bitmatch bits with
+ | { :vk_fields } ->
+ fprintf_vk stdout vk;
+
+ if unknown1 <> 0 then
+ eprintf "VK %s unknown1 flags set (%02x)\n"
+ (print_offset vk) unknown1;
+ if unknown2 <> 0 then
+ eprintf "VK %s unknown2 flags set (%02x)\n"
+ (print_offset vk) unknown2;
+ if unknown3 <> 0 then
+ eprintf "VK %s unknown3 flags set (%04x)\n"
+ (print_offset vk) unknown3;
+
+ (* Note this is common for default [ie. zero-length] key names. *)
+ if not nameisascii && name_len > 0 then
+ eprintf "VK %s has non-ASCII name flag set (name is %s)\n"
+ (print_offset vk) (print_binary_string name);
+
+ vk_records := vk :: !vk_records;
+ (match data with
+ | Inline data -> ()
+ | Offset offset ->
+ let _ = lookup "visit_vk (data)" offset in
+ mark_visited offset
+ );
+
+ data_len, name_len
+
+ | {_} ->
+ failwithf "%s: invalid vk block at offset %s\n"
+ basename (print_offset vk)
+ )
+
+(* Visits subkeys, recursing through intermediate lf/lh/ri structures,
+ * and returns the number of subkeys actually seen.
+ *)
+and visit_subkeys subkeys =
+ let (_, _, bits) = lookup "visit_subkeys" subkeys in
+ mark_visited subkeys;
+ (bitmatch bits with
+ | { ("lf"|"lh") : 2*8 : string;
+ len : 2*8 : littleendian; (* number of subkeys of this node *)
+ rest : len*8*8 : bitstring } ->
+ printf "LF %s %d\n" (print_offset subkeys) len;
+ visit_subkeys_in_lf_list subkeys len rest
+
+ | { "ri" : 2*8 : string;
+ len : 2*8 : littleendian;
+ rest : len*4*8 : bitstring } ->
+ printf "RI %s %d\n" (print_offset subkeys) len;
+ visit_subkeys_in_ri_list subkeys len rest
+
+ (* In theory you can have an li-record here, but we've never
+ * seen one.
+ *)
+
+ | { "nk" : 2*8 : string } ->
+ visit_nk subkeys;
+ let name_len = name_len_of_nk subkeys in
+ 1, name_len
+
+ | {_} ->
+ failwithf "%s: invalid subkey node found at %s\n"
+ basename (print_offset subkeys)
+ )
+
+and visit_subkeys_in_lf_list subkeys_top len bits =
+ if len > 0 then (
+ bitmatch bits with
+ | { rest : -1 : bitstring } when bitstring_length rest = 0 ->
+ assert (len = 0);
+ 0, 0
+
+ | { offset : 4*8 : littleendian, bind (get_offset offset);
+ _ (* hash *) : 4*8 : bitstring;
+ rest : -1 : bitstring } ->
+ let c1, name_len1 = visit_subkeys offset in
+ let c2, name_len2 = visit_subkeys_in_lf_list subkeys_top (len-1) rest in
+ c1 + c2, max name_len1 name_len2
+
+ | {_} ->
+ failwithf "%s: invalid subkey in lf/lh list at %s\n"
+ basename (print_offset subkeys_top)
+ ) else 0, 0
+
+and visit_subkeys_in_ri_list subkeys_top len bits =
+ if len > 0 then (
+ bitmatch bits with
+ | { rest : -1 : bitstring } when bitstring_length rest = 0 ->
+ assert (len = 0);
+ 0, 0
+
+ | { offset : 4*8 : littleendian, bind (get_offset offset);
+ rest : -1 : bitstring } ->
+ let c1, name_len1 = visit_subkeys offset in
+ let c2, name_len2 = visit_subkeys_in_ri_list subkeys_top (len-1) rest in
+ c1 + c2, max name_len1 name_len2
+
+ | {_} ->
+ failwithf "%s: invalid subkey in ri list at %s\n"
+ basename (print_offset subkeys_top)
+ ) else 0, 0
+
+and name_len_of_nk nk =
+ let (_, _, bits) = lookup "name_len_of_nk" nk in
+ bitmatch bits with
+ | { :nk_fields } -> name_len
+
+and visit_sk sk =
+ let (_, _, bits) = lookup "visit_sk" sk in
+ if is_not_visited sk then (
+ mark_visited sk;
+ (bitmatch bits with
+ | { :sk_fields } ->
+ fprintf_sk stdout sk;
+
+ if unknown1 <> 0 then
+ eprintf "SK %s unknown1 <> 0 (%04x)\n" (print_offset sk) unknown1;
+
+ sk_records := sk :: !sk_records
+
+ | {_} ->
+ failwithf "%s: invalid sk-record at %s\n"
+ basename (print_offset sk)
+ )
+ )
+
+and visit_classname classname classname_len =
+ let (seg_len, _, bits) = lookup "visit_classname" classname in
+ mark_visited classname;
+ assert (seg_len >= classname_len);
+ printf "CL %s %s\n" (print_offset classname) (print_bitstring bits)
+
+let () =
+ visit_nk ~nk_is_root:true root_key
+
+(* These are immutable now. *)
+let nk_records = !nk_records
+let vk_records = !vk_records
+let sk_records = !sk_records
+
+(* So we can rapidly tell what is an nk/vk/sk offset. *)
+let nk_set =
+ List.fold_left (fun set offs -> IntSet.add offs set) IntSet.empty nk_records
+let vk_set =
+ List.fold_left (fun set offs -> IntSet.add offs set) IntSet.empty vk_records
+let sk_set =
+ List.fold_left (fun set offs -> IntSet.add offs set) IntSet.empty sk_records
+
+(* Now after visiting all the blocks, are there any used blocks which
+ * are unvisited? If there are any then that would indicate either (a)
+ * that the hive contains unreferenced blocks, or (b) that there are
+ * referenced blocks that we did not visit because we don't have a full
+ * understanding of the hive format.
+ *
+ * Windows 7 registries often contain a few of these -- not clear
+ * how serious they are, but don't fail here.
+ *)
+let () =
+ let unvisited = unvisited_blocks () in
+ IntMap.iter (
+ fun offset block ->
+ match block with
+ | (_, false, _) -> () (* ignore unused blocks *)
+ | (seg_len, true, _) ->
+ eprintf "used block %s (length %d) is not referenced\n"
+ (print_offset offset) seg_len
+ ) unvisited
+
+(* Check the SKs are:
+ * (a) linked into a single circular list through the sk_prev/sk_next
+ * pointers
+ * (b) refcounts are correct
+ *)
+let () =
+ if List.length sk_records > 0 then (
+ let sk0 = List.hd sk_records in (* start at any arbitrary sk *)
+ (* This loop follows the chain of sk pointers until we arrive
+ * back at the original, checking prev/next are consistent.
+ *)
+ let rec loop visited prevsk sk =
+ if sk <> sk0 then (
+ if not (IntSet.mem sk sk_set) then
+ eprintf "SK %s not an sk-record (faulty sk_next somewhere)\n"
+ (print_offset sk)
+ else (
+ let _, _, bits = lookup "loop sk circular list" sk in
+ bitmatch bits with
+ | { :sk_fields } ->
+ if sk_prev <> prevsk then
+ eprintf "SK %s sk_prev != previous sk (%s, %s)\n"
+ (print_offset sk)
+ (print_offset sk_prev) (print_offset prevsk);
+ if IntSet.mem sk visited then
+ eprintf "SK %s already visited (bad circular list)\n"
+ (print_offset sk);
+ let visited = IntSet.add sk visited in
+ loop visited sk sk_next
+ )
+ )
+ in
+ let _, _, bits = lookup "start sk circular list" sk0 in
+ (bitmatch bits with
+ | { :sk_fields } ->
+ loop IntSet.empty sk_prev sk0
+ );
+
+ (* For every nk-record, if it references an sk-record count that,
+ * then check this matches the refcounts in the sk-records
+ * themselves.
+ *)
+ let refcounts = Counter.create () in
+ List.iter (
+ fun nk ->
+ let _, _, bits = lookup "sk refcounter (nk)" nk in
+ (bitmatch bits with
+ | { :nk_fields } ->
+ Counter.incr refcounts sk
+ )
+ ) nk_records;
+
+ List.iter (
+ fun sk ->
+ let _, _, bits = lookup "sk refcounter (sk)" sk in
+ (bitmatch bits with
+ | { :sk_fields } ->
+ let actual = Counter.get refcounts sk in
+ if actual <> refcount then
+ eprintf "SK %s incorrect refcount (actual %d, in file %d)\n"
+ (print_offset sk) actual refcount
+ )
+ ) sk_records
+ )