(* Windows Registry reverse-engineering tool. * Copyright (C) 2010 Red Hat Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * * For existing information on the registry format, please refer * to the following documents. Note they are both incomplete * and inaccurate in some respects. * * http://www.sentinelchicken.com/data/TheWindowsNTRegistryFileFormat.pdf * http://pogostick.net/~pnh/ntpasswd/WinReg.txt *) open Bitstring open ExtString open Printf open Visualizer_utils open Visualizer_NT_time let () = if Array.length Sys.argv <> 2 then ( eprintf "Error: missing argument. Usage: %s hivefile > out where 'hivefile' is the input hive file from a Windows machine 'out' is an output file where we will write all the keys, values etc for extended debugging purposes. Errors, inconsistencies and unexpected fields in the hive file are written to stderr. " Sys.executable_name; exit 1 ) let filename = Sys.argv.(1) let basename = Filename.basename filename (* Load the file. *) let bits = bitstring_of_file filename (* Split into header + data at the 4KB boundary. *) let header, data = takebits (4096 * 8) bits, dropbits (4096 * 8) bits (* Define a persistent pattern which matches the header fields. By * using persistent patterns, we can reuse them later in the * program. *) let bitmatch header_fields = { "regf" : 4*8 : string; seq1 : 4*8 : littleendian; seq2 : 4*8 : littleendian; last_modified : 64 : littleendian, bind (nt_to_time_t last_modified); major : 4*8 : littleendian; minor : 4*8 : littleendian; (* "Type". Contains 0. *) unknown1 : 4*8 : littleendian; (* "Format". Contains 1. *) unknown2 : 4*8 : littleendian; root_key : 4*8 : littleendian, bind (get_offset root_key); end_pages : 4*8 : littleendian, bind (get_offset end_pages); (* "Cluster". Contains 1. *) unknown3 : 4*8 : littleendian; filename : 64*8 : string; (* All three GUIDs here confirmed in Windows 7 registries. In * Windows <= 2003 these GUID fields seem to contain junk. * * If you write zeroes to the GUID fields, load and unload in Win7 * REGEDIT, then Windows 7 writes some random GUIDs. * * Also (on Win7) unknownguid1 == unknownguid2. unknownguid3 is * different. *) unknownguid1 : 16*8 : bitstring; unknownguid2 : 16*8 : bitstring; (* Wrote zero to unknown4, loaded and unloaded it in Win7 REGEDIT, * and it still contained zero. In existing registries it seems to * contain random junk. *) unknown4 : 4*8 : littleendian; unknownguid3 : 16*8 : bitstring; (* If you write zero to unknown5, load and unload it in REGEDIT, * Windows 7 puts the string "rmtm" here. Existing registries also * seen containing this string. However on older Windows it can * be all zeroes. *) unknown5 : 4*8 : string; (* This seems to contain junk from other parts of the registry. I * wrote zeroes here, loaded and unloaded it in Win7 REGEDIT, and * it still contained zeroes. *) unknown6 : 340*8 : bitstring; csum : 4*8 : littleendian, save_offset_to (crc_offset), check (assert (crc_offset = 0x1fc * 8); true); unknown7 : (0x1000-0x200)*8 : bitstring } let fprintf_header chan bits = bitmatch bits with | { :header_fields } -> fprintf chan "HD %6ld %6ld %s %ld.%ld %08lx %08lx %s %s %08lx %s %s %s %08lx %s %s %s %08lx %s\n" seq1 seq2 (print_time last_modified) major minor unknown1 unknown2 (print_offset root_key) (print_offset end_pages) unknown3 (print_utf16 filename) (print_guid unknownguid1) (print_guid unknownguid2) unknown4 (print_guid unknownguid3) unknown5 (print_bitstring unknown6) csum (print_bitstring unknown7) (* Parse the header and check it. *) let root_key, end_pages = bitmatch header with | { :header_fields } -> fprintf_header stdout header; if major <> 1_l then eprintf "HD hive file major <> 1 (major.minor = %ld.%ld)\n" major minor; if seq1 <> seq2 then eprintf "HD hive file sequence numbers should match (%ld <> %ld)\n" seq1 seq2; if unknown1 <> 0_l then eprintf "HD unknown1 field <> 0 (%08lx)\n" unknown1; if unknown2 <> 1_l then eprintf "HD unknown2 field <> 1 (%08lx)\n" unknown2; if unknown3 <> 1_l then eprintf "HD unknown3 field <> 1 (%08lx)\n" unknown3; if not (equals unknownguid1 unknownguid2) then eprintf "HD unknownguid1 <> unknownguid2 (%s, %s)\n" (print_guid unknownguid1) (print_guid unknownguid2); (* We think this is junk. if unknown4 <> 0_l then eprintf "HD unknown4 field <> 0 (%08lx)\n" unknown4; *) if unknown5 <> "rmtm" && unknown5 <> "\000\000\000\000" then eprintf "HD unknown5 field <> \"rmtm\" & <> zeroes (%s)\n" unknown5; (* We think this is junk. if not (is_zero_bitstring unknown6) then eprintf "HD unknown6 area is not zero (%s)\n" (print_bitstring unknown6); *) if not (is_zero_bitstring unknown7) then eprintf "HD unknown7 area is not zero (%s)\n" (print_bitstring unknown7); root_key, end_pages | {_} -> failwithf "%s: this doesn't look like a registry hive file\n" basename (* Define persistent patterns to match page and block fields. *) let bitmatch page_fields = { "hbin" : 4*8 : string; page_offset : 4*8 : littleendian, bind (get_offset page_offset); page_size : 4*8 : littleendian, check (Int32.rem page_size 4096_l = 0_l), bind (Int32.to_int page_size); (* In the first hbin in the file these fields contain something. * In subsequent hbins these fields are all zero. * * From existing hives (first hbin only): * * unknown1 unknown2 unknown5 * 00 00 00 00 00 00 00 00 9C 77 3B 02 6A 7D CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 50 3A 15 07 B5 9B CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 57 86 90 D4 9A 58 CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 52 3F 90 9D CF 7C CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 E8 86 C1 17 BD 06 CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 4A 77 CE 7A CF 7C CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 E4 EA 23 FF 69 7D CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 50 13 BA 8D A2 9A CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 0E 07 93 13 BD 06 CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 9D 55 D0 B3 99 58 CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 46 AC FF 8B CF 7C CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 80 29 2D 02 6A 7D CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 90 8D 36 07 B5 9B CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 5C 9B 8B B8 6A 06 CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 85 9F BB 99 9A 58 CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 BE 3D 21 02 6A 7D CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 70 53 09 07 B5 9B CA 01 00 00 00 00 * 00 00 00 00 00 00 00 00 5B 62 42 B6 9A 58 CA 01 00 00 00 00 * 01 00 00 00 00 00 00 00 B2 46 9B 9E CF 7C CA 01 00 00 00 00 * 01 00 00 00 00 00 00 00 CA 88 EE 1A BD 06 CA 01 00 00 00 00 * * From the above we worked out that fields 3 and 4 are an NT * timestamp, which seems to be "last modified" (when REGEDIT * unloads a hive it updates this timestamp even if nothing * has been changed). *) unknown1 : 4*8 : littleendian; (* usually zero, occasionally 1 *) unknown2 : 4*8 : littleendian; (* always zero? *) last_modified : 64 : littleendian, bind (if page_offset = 0 then nt_to_time_t last_modified else ( assert (last_modified = 0_L); 0. ) ); (* The "B.D." document said this field contains the page size, but * this is not true. This misinformation has been copied to the * sentinelchicken documentation too. *) unknown5 : 4*8 : littleendian; (* always zero? *) (* Now the blocks in this page follow. *) blocks : (page_size - 32) * 8 : bitstring; rest : -1 : bitstring } let fprintf_page chan bits = bitmatch bits with | { :page_fields } -> fprintf chan "HB %s %08x %08lx %08lx %s %08lx\n" (print_offset page_offset) page_size unknown1 unknown2 (if page_offset = 0 then print_time last_modified else string_of_float last_modified) unknown5 let bitmatch block_fields = { seg_len : 4*8 : littleendian, bind (Int32.to_int seg_len); block_data : (abs seg_len - 4) * 8 : bitstring; rest : -1 : bitstring } let fprintf_block chan block_offset bits = bitmatch bits with | { :block_fields } -> fprintf chan "BL %s %s %d\n" (print_offset block_offset) (if seg_len < 0 then "used" else "free") (if seg_len < 0 then -seg_len else seg_len) (* Iterate over the pages and blocks. In the process we will examine * each page (hbin) header. Also we will build block_list which is a * list of (block offset, length, used flag, data). *) let block_list = ref [] let () = let rec loop_over_pages data data_offset = if data_offset < end_pages then ( bitmatch data with | { rest : -1 : bitstring } when bitstring_length rest = 0 -> () | { :page_fields } -> fprintf_page stdout data; assert (page_offset = data_offset); if data_offset = 0 then ( (* first hbin only *) if unknown1 <> 0_l then eprintf "HB %s unknown1 field <> 0 (%08lx)\n" (print_offset page_offset) unknown1; if unknown2 <> 0_l then eprintf "HB %s unknown2 field <> 0 (%08lx)\n" (print_offset page_offset) unknown2; if unknown5 <> 0_l then eprintf "HB %s unknown5 field <> 0 (%08lx)\n" (print_offset page_offset) unknown5 ) else ( (* subsequent hbins *) if unknown1 <> 0_l || unknown2 <> 0_l || unknown5 <> 0_l then eprintf "HB %s unknown fields <> 0 (%08lx %08lx %08lx)\n" (print_offset page_offset) unknown1 unknown2 unknown5; if last_modified <> 0. then eprintf "HB %s last_modified <> 0. (%g)\n" (print_offset page_offset) last_modified ); (* Loop over the blocks in this page. *) loop_over_blocks blocks (data_offset + 32); (* Loop over rest of the pages. *) loop_over_pages rest (data_offset + page_size) | {_} -> failwithf "%s: invalid hbin at offset %s\n" basename (print_offset data_offset) ) else ( (* Reached the end of the official hbins in this file, BUT the * file can be larger than this and might contain stuff. What * does it contain after the hbins? We think just junk, but * we're not sure. *) if not (is_zero_bitstring data) then ( eprintf "Junk in file after end of pages:\n"; let rec loop data data_offset = bitmatch data with | { rest : -1 : bitstring } when bitstring_length rest = 0 -> () | { :page_fields } -> eprintf "\tjunk hbin %s 0x%08x\n" (print_offset data_offset) page_size; loop rest (data_offset + page_size); | { _ } -> eprintf "\tother junk %s %s\n" (print_offset data_offset) (print_bitstring data) in loop data data_offset ) ) and loop_over_blocks blocks block_offset = bitmatch blocks with | { rest : -1 : bitstring } when bitstring_length rest = 0 -> () | { :block_fields } -> assert (block_offset mod 8 = 0); fprintf_block stdout block_offset blocks; let used, seg_len = if seg_len < 0 then true, -seg_len else false, seg_len in let block = block_offset, (seg_len, used, block_data) in block_list := block :: !block_list; (* Loop over the rest of the blocks in this page. *) loop_over_blocks rest (block_offset + seg_len) | {_} -> failwithf "%s: invalid block near offset %s\n" basename (print_offset block_offset) in loop_over_pages data 0 (* Turn the block_list into a map so we can quickly look up a block * from its offset. *) let block_list = !block_list let block_map = List.fold_left ( fun map (block_offset, block) -> IntMap.add block_offset block map ) IntMap.empty block_list let lookup fn offset = try let (_, used, _) as block = IntMap.find offset block_map in if not used then failwithf "%s: %s: lookup: free block %s referenced from hive tree" basename fn (print_offset offset); block with Not_found -> failwithf "%s: %s: lookup: unknown block %s referenced from hive tree" basename fn (print_offset offset) (* Use this to mark blocks that we've visited. If the hive contains * no unreferenced blocks, then by the end this should just contain * free blocks. *) let mark_visited, is_not_visited, unvisited_blocks = let v = ref block_map in let mark_visited offset = v := IntMap.remove offset !v and is_not_visited offset = IntMap.mem offset !v and unvisited_blocks () = !v in mark_visited, is_not_visited, unvisited_blocks (* Define persistent patterns to match nk-records, vk-records and * sk-records, which are the record types that we especially want to * analyze later. Other blocks types (eg. value lists, lf-records) * have no "spare space" so everything is known about them and we don't * store these. *) let bitmatch nk_fields = { "nk" : 2*8 : string; (* Flags stored in the file as a little endian word, hence the * unusual ordering: *) virtmirrored : 1; predefinedhandle : 1; keynameascii : 1; symlinkkey : 1; cannotbedeleted : 1; isroot : 1; ismountpoint : 1; isvolatile : 1; unknownflag8000 : 1; unknownflag4000 : 1; unknownflag2000 : 1; unknownflag1000 : 1; unknownflag0800 : 1; unknownflag0400 : 1; virtualstore : 1; virttarget : 1; timestamp : 64 : littleendian, bind (nt_to_time_t timestamp); unknown1 : 4*8 : littleendian; parent : 4*8 : littleendian, bind (get_offset parent); nr_subkeys : 4*8 : littleendian, bind (Int32.to_int nr_subkeys); nr_subkeys_vol : 4*8; subkeys : 4*8 : littleendian, bind (get_offset subkeys); subkeys_vol : 4*8; nr_values : 4*8 : littleendian, bind (Int32.to_int nr_values); vallist : 4*8 : littleendian, bind (get_offset vallist); sk : 4*8 : littleendian, bind (get_offset sk); classname : 4*8 : littleendian, bind (get_offset classname); (* sentinelchicken.com says this is a single 32 bit field * containing maximum number of bytes in a subkey name, however * that does not seem to be correct. We think it is two 16 bit * fields, the first being the maximum number of bytes in the * UTF16-LE encoded version of the subkey names, (since subkey * names are usually ASCII, that would be max length of names * 2). * This is a historical maximum, so it can be greater than the * current maximum name field. * * The second field is often non-zero, but the purpose is unknown. * In the hives we examined it had values 0, 1, 0x20, 0x21, 0xa0, * 0xa1, 0xe1, suggesting some sort of flags. *) max_subkey_name_len : 2*8 : littleendian; unknown2 : 2*8 : littleendian; (* sentinelchicken.com says: maximum subkey CLASSNAME length, * however that does not seem to be correct. In hives I looked * at, it has value 0, 0xc, 0x10, 0x18, 0x1a, 0x28. *) unknown3 : 4*8 : littleendian; (* sentinelchicken.com says: maximum number of bytes in a value * name, however that does not seem to be correct. We think it is * the maximum number of bytes in the UTF16-LE encoded version of * the value names (since value names are usually ASCII, that would * be max length of names * 2). This is a historical maximum, so * it can be greater than the current maximum name field. *) max_vk_name_len : 4*8 : littleendian, bind (Int32.to_int max_vk_name_len); (* sentinelchicken.com says: maximum value data size, and this * agrees with my observations. It is the largest data size (not * seg_len, but vk.data_len) for any value in this key. We think * that this field is a historical max, so eg if a maximally sized * value is deleted then this field is not reduced. Certainly * max_vk_data_len >= the measured maximum in all the hives that we * have observed. *) max_vk_data_len : 4*8 : littleendian, bind (Int32.to_int max_vk_data_len); unknown6 : 4*8 : littleendian; name_len : 2*8 : littleendian; classname_len : 2*8 : littleendian; name : name_len * 8 : string } let fprintf_nk chan nk = let (_, _, bits) = lookup "fprintf_nk" nk in bitmatch bits with | { :nk_fields } -> fprintf chan "NK %s %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s %s %08lx %s %d %ld %s %08lx %d %s %s %s %d %04x %08lx %d %d %08lx %d %d %s\n" (print_offset nk) (if unknownflag8000 then "8" else ".") (if unknownflag4000 then "4" else ".") (if unknownflag2000 then "2" else ".") (if unknownflag1000 then "1" else ".") (if unknownflag0800 then "8" else ".") (if unknownflag0400 then "4" else ".") (if virtualstore then "s" else ".") (if virttarget then "t" else ".") (if virtmirrored then "m" else ".") (if predefinedhandle then "P" else ".") (if keynameascii then "A" else ".") (if symlinkkey then "S" else ".") (if cannotbedeleted then "N" else ".") (if isroot then "R" else ".") (if ismountpoint then "M" else ".") (if isvolatile then "V" else ".") (print_time timestamp) unknown1 (print_offset parent) nr_subkeys nr_subkeys_vol (print_offset subkeys) subkeys_vol nr_values (print_offset vallist) (print_offset sk) (print_offset classname) max_subkey_name_len unknown2 unknown3 max_vk_name_len max_vk_data_len unknown6 name_len classname_len name type data_t = Inline of bitstring | Offset of int let bitmatch vk_fields = { "vk" : 2*8 : string; name_len : 2*8 : littleendian; (* No one documents the important fact that data_len can have the * top bit set (randomly or is it meaningful?). The length can * also be 0 (or 0x80000000) if the data type is NONE. *) data_len : 4*8 : littleendian, bind ( let data_len = Int32.logand data_len 0x7fff_ffff_l in Int32.to_int data_len ); (* Inline data if len <= 4, offset otherwise. * * The data itself depends on the type field. * * For REG_SZ type, the data always seems to be NUL-terminated, which * means because these strings are often UTF-16LE, that the string will * end with \0\0 bytes. The termination bytes are included in data_len. * * For REG_MULTI_SZ, see * http://blogs.msdn.com/oldnewthing/archive/2009/10/08/9904646.aspx *) data : 4*8 : bitstring, bind ( if data_len <= 4 then Inline (takebits (data_len*8) data) else ( let offset = bitmatch data with { offset : 4*8 : littleendian } -> offset in let offset = get_offset offset in Offset offset ) ); t : 4*8 : littleendian, bind (Int32.to_int t); (* Flags, stored as a little-endian word: *) unknown1 : 7; nameisascii : 1; (* Clear for default [zero-length] name, always set * otherwise in registries that we found. Perhaps this * is really "nameisdefault" flag? *) unknown2 : 8; (* Unknown field, usually contains something. *) unknown3 : 2*8 : littleendian; name : name_len * 8 : string } let fprintf_vk chan vk = let (_, _, bits) = lookup "fprintf_vk" vk in bitmatch bits with | { :vk_fields } -> let real_data = match data with | Inline data -> data | Offset offset -> let (_, _, bits) = lookup "fprintf_vk (data)" offset in bits in fprintf chan "VK %s %s %d %s%s %s %08x %s %08x %08x\n" (print_offset vk) name data_len (match data with | Inline _ -> "" | Offset offset -> "["^print_offset offset^"]") (print_bitstring real_data) (print_vk_type t) unknown1 (if nameisascii then "A" else "L") unknown2 unknown3 let bitmatch sk_fields = { "sk" : 2*8 : string; unknown1 : 2*8 : littleendian; sk_next : 4*8 : littleendian, bind (get_offset sk_next); sk_prev : 4*8 : littleendian, bind (get_offset sk_prev); refcount : 4*8 : littleendian, bind (Int32.to_int refcount); sec_len : 4*8 : littleendian, bind (Int32.to_int sec_len); sec_desc : sec_len * 8 : bitstring } let fprintf_sk chan sk = let (_, _, bits) = lookup "fprintf_sk" sk in bitmatch bits with | { :sk_fields } -> fprintf chan "SK %s %04x %s %s %d %d\n" (print_offset sk) unknown1 (print_offset sk_next) (print_offset sk_prev) refcount sec_len (* print_bitstring sec_desc -- suppress this *) (* Store lists of records we encounter (lists of offsets). *) let nk_records = ref [] and vk_records = ref [] and sk_records = ref [] (* Functions to visit each block, starting at the root. Each block * that we visit is printed. *) let rec visit_nk ?(nk_is_root = false) nk = let (_, _, bits) = lookup "visit_nk" nk in mark_visited nk; (bitmatch bits with | { :nk_fields } -> fprintf_nk stdout nk; nk_records := nk :: !nk_records; (* Check the isroot flag is only set on the root node. *) assert (isroot = nk_is_root); if unknownflag8000 then eprintf "NK %s unknownflag8000 is set\n" (print_offset nk); if unknownflag4000 then eprintf "NK %s unknownflag4000 is set\n" (print_offset nk); if unknownflag2000 then eprintf "NK %s unknownflag2000 is set\n" (print_offset nk); if unknownflag1000 then eprintf "NK %s unknownflag1000 is set\n" (print_offset nk); if unknownflag0800 then eprintf "NK %s unknownflag0800 is set\n" (print_offset nk); if unknownflag0400 then eprintf "NK %s unknownflag0400 is set\n" (print_offset nk); if unknown1 <> 0_l then eprintf "NK %s unknown1 <> 0 (%08lx)\n" (print_offset nk) unknown1; if unknown2 <> 0 then eprintf "NK %s unknown2 <> 0 (%04x)\n" (print_offset nk) unknown2; if unknown3 <> 0_l then eprintf "NK %s unknown3 <> 0 (%08lx)\n" (print_offset nk) unknown3; if unknown6 <> 0_l then eprintf "NK %s unknown6 <> 0 (%08lx)\n" (print_offset nk) unknown6; (* -- common, assume it's not an error if classname = -1 then eprintf "NK %s has no classname\n" (print_offset nk); if classname_len = 0 then eprintf "NK %s has zero-length classname\n" (print_offset nk); *) if sk = -1 then eprintf "NK %s has no sk-record\n" (print_offset nk); if name_len = 0 then eprintf "NK %s has zero-length name\n" (print_offset nk); (* Visit the values first at this node. *) let max_data_len, max_name_len = if vallist <> -1 then visit_vallist nr_values vallist else 0, 0 in if max_vk_data_len < max_data_len then eprintf "NK %s nk.max_vk_data_len (%d) < actual max data_len (%d)\n" (print_offset nk) max_vk_data_len max_data_len; if max_vk_name_len < max_name_len * 2 then eprintf "NK %s nk.max_vk_name_len (%d) < actual max name_len * 2 (%d)\n" (print_offset nk) max_vk_name_len (max_name_len * 2); (* Visit the subkeys of this node. *) if subkeys <> -1 then ( let counted, max_name_len = visit_subkeys subkeys in if counted <> nr_subkeys then failwithf "%s: incorrect count of subkeys (%d, counted %d) in subkey list at %s\n" basename nr_subkeys counted (print_offset subkeys); if max_subkey_name_len < max_name_len * 2 then eprintf "NK %s nk.max_subkey_name_len (%d) < actual max name_len * 2 (%d)\n" (print_offset nk) max_subkey_name_len (max_name_len * 2); ); (* Visit the sk-record and classname. *) if sk <> -1 then visit_sk sk; if classname <> -1 then visit_classname classname classname_len; | {_} -> failwithf "%s: invalid nk block at offset %s\n" basename (print_offset nk) ) and visit_vallist nr_values vallist = let (seg_len, _, bits) = lookup "visit_vallist" vallist in mark_visited vallist; printf "VL %s %d %d\n" (print_offset vallist) nr_values seg_len; visit_values_in_vallist nr_values vallist bits and visit_values_in_vallist nr_values vallist bits = if nr_values > 0 then ( bitmatch bits with | { rest : -1 : bitstring } when bitstring_length rest = 0 -> assert (nr_values = 0); 0, 0 | { value : 4*8 : littleendian, bind (get_offset value); rest : -1 : bitstring } -> let data_len, name_len = visit_vk value in let max_data_len, max_name_len = visit_values_in_vallist (nr_values-1) vallist rest in max max_data_len data_len, max max_name_len name_len | {_} -> failwithf "%s: invalid offset in value list at %s\n" basename (print_offset vallist) ) else 0, 0 and visit_vk vk = let (_, _, bits) = lookup "visit_vk" vk in mark_visited vk; (bitmatch bits with | { :vk_fields } -> fprintf_vk stdout vk; if unknown1 <> 0 then eprintf "VK %s unknown1 flags set (%02x)\n" (print_offset vk) unknown1; if unknown2 <> 0 then eprintf "VK %s unknown2 flags set (%02x)\n" (print_offset vk) unknown2; if unknown3 <> 0 then eprintf "VK %s unknown3 flags set (%04x)\n" (print_offset vk) unknown3; (* Note this is common for default [ie. zero-length] key names. *) if not nameisascii && name_len > 0 then eprintf "VK %s has non-ASCII name flag set (name is %s)\n" (print_offset vk) (print_binary_string name); vk_records := vk :: !vk_records; (match data with | Inline data -> () | Offset offset -> let _ = lookup "visit_vk (data)" offset in mark_visited offset ); data_len, name_len | {_} -> failwithf "%s: invalid vk block at offset %s\n" basename (print_offset vk) ) (* Visits subkeys, recursing through intermediate lf/lh/ri structures, * and returns the number of subkeys actually seen. *) and visit_subkeys subkeys = let (_, _, bits) = lookup "visit_subkeys" subkeys in mark_visited subkeys; (bitmatch bits with | { ("lf"|"lh") : 2*8 : string; len : 2*8 : littleendian; (* number of subkeys of this node *) rest : len*8*8 : bitstring } -> printf "LF %s %d\n" (print_offset subkeys) len; visit_subkeys_in_lf_list subkeys len rest | { "ri" : 2*8 : string; len : 2*8 : littleendian; rest : len*4*8 : bitstring } -> printf "RI %s %d\n" (print_offset subkeys) len; visit_subkeys_in_ri_list subkeys len rest (* In theory you can have an li-record here, but we've never * seen one. *) | { "nk" : 2*8 : string } -> visit_nk subkeys; let name_len = name_len_of_nk subkeys in 1, name_len | {_} -> failwithf "%s: invalid subkey node found at %s\n" basename (print_offset subkeys) ) and visit_subkeys_in_lf_list subkeys_top len bits = if len > 0 then ( bitmatch bits with | { rest : -1 : bitstring } when bitstring_length rest = 0 -> assert (len = 0); 0, 0 | { offset : 4*8 : littleendian, bind (get_offset offset); _ (* hash *) : 4*8 : bitstring; rest : -1 : bitstring } -> let c1, name_len1 = visit_subkeys offset in let c2, name_len2 = visit_subkeys_in_lf_list subkeys_top (len-1) rest in c1 + c2, max name_len1 name_len2 | {_} -> failwithf "%s: invalid subkey in lf/lh list at %s\n" basename (print_offset subkeys_top) ) else 0, 0 and visit_subkeys_in_ri_list subkeys_top len bits = if len > 0 then ( bitmatch bits with | { rest : -1 : bitstring } when bitstring_length rest = 0 -> assert (len = 0); 0, 0 | { offset : 4*8 : littleendian, bind (get_offset offset); rest : -1 : bitstring } -> let c1, name_len1 = visit_subkeys offset in let c2, name_len2 = visit_subkeys_in_ri_list subkeys_top (len-1) rest in c1 + c2, max name_len1 name_len2 | {_} -> failwithf "%s: invalid subkey in ri list at %s\n" basename (print_offset subkeys_top) ) else 0, 0 and name_len_of_nk nk = let (_, _, bits) = lookup "name_len_of_nk" nk in bitmatch bits with | { :nk_fields } -> name_len and visit_sk sk = let (_, _, bits) = lookup "visit_sk" sk in if is_not_visited sk then ( mark_visited sk; (bitmatch bits with | { :sk_fields } -> fprintf_sk stdout sk; if unknown1 <> 0 then eprintf "SK %s unknown1 <> 0 (%04x)\n" (print_offset sk) unknown1; sk_records := sk :: !sk_records | {_} -> failwithf "%s: invalid sk-record at %s\n" basename (print_offset sk) ) ) and visit_classname classname classname_len = let (seg_len, _, bits) = lookup "visit_classname" classname in mark_visited classname; assert (seg_len >= classname_len); printf "CL %s %s\n" (print_offset classname) (print_bitstring bits) let () = visit_nk ~nk_is_root:true root_key (* These are immutable now. *) let nk_records = !nk_records let vk_records = !vk_records let sk_records = !sk_records (* So we can rapidly tell what is an nk/vk/sk offset. *) let nk_set = List.fold_left (fun set offs -> IntSet.add offs set) IntSet.empty nk_records let vk_set = List.fold_left (fun set offs -> IntSet.add offs set) IntSet.empty vk_records let sk_set = List.fold_left (fun set offs -> IntSet.add offs set) IntSet.empty sk_records (* Now after visiting all the blocks, are there any used blocks which * are unvisited? If there are any then that would indicate either (a) * that the hive contains unreferenced blocks, or (b) that there are * referenced blocks that we did not visit because we don't have a full * understanding of the hive format. * * Windows 7 registries often contain a few of these -- not clear * how serious they are, but don't fail here. *) let () = let unvisited = unvisited_blocks () in IntMap.iter ( fun offset block -> match block with | (_, false, _) -> () (* ignore unused blocks *) | (seg_len, true, _) -> eprintf "used block %s (length %d) is not referenced\n" (print_offset offset) seg_len ) unvisited (* Check the SKs are: * (a) linked into a single circular list through the sk_prev/sk_next * pointers * (b) refcounts are correct *) let () = if List.length sk_records > 0 then ( let sk0 = List.hd sk_records in (* start at any arbitrary sk *) (* This loop follows the chain of sk pointers until we arrive * back at the original, checking prev/next are consistent. *) let rec loop visited prevsk sk = if sk <> sk0 then ( if not (IntSet.mem sk sk_set) then eprintf "SK %s not an sk-record (faulty sk_next somewhere)\n" (print_offset sk) else ( let _, _, bits = lookup "loop sk circular list" sk in bitmatch bits with | { :sk_fields } -> if sk_prev <> prevsk then eprintf "SK %s sk_prev != previous sk (%s, %s)\n" (print_offset sk) (print_offset sk_prev) (print_offset prevsk); if IntSet.mem sk visited then eprintf "SK %s already visited (bad circular list)\n" (print_offset sk); let visited = IntSet.add sk visited in loop visited sk sk_next ) ) in let _, _, bits = lookup "start sk circular list" sk0 in (bitmatch bits with | { :sk_fields } -> loop IntSet.empty sk_prev sk0 ); (* For every nk-record, if it references an sk-record count that, * then check this matches the refcounts in the sk-records * themselves. *) let refcounts = Counter.create () in List.iter ( fun nk -> let _, _, bits = lookup "sk refcounter (nk)" nk in (bitmatch bits with | { :nk_fields } -> Counter.incr refcounts sk ) ) nk_records; List.iter ( fun sk -> let _, _, bits = lookup "sk refcounter (sk)" sk in (bitmatch bits with | { :sk_fields } -> let actual = Counter.get refcounts sk in if actual <> refcount then eprintf "SK %s incorrect refcount (actual %d, in file %d)\n" (print_offset sk) actual refcount ) ) sk_records )