From db8a46aa1165479ddf2b551e81a7e96bb06fc942 Mon Sep 17 00:00:00 2001 From: Richard Jones Date: Wed, 3 Feb 2010 17:35:53 +0000 Subject: [PATCH] Tools for analyzing and reverse engineering hive files. This commit is not of general interest. It contains the tools which I used to reverse engineer the hive format and to test changes. Keeping these with the rest of the code is useful in case in future we encounter a hive file that we fail to modify. Note that the tools are not compiled by default. You have to compile each explicitly with: make -C hivex/tools .opt You will also need ocaml-extlib-devel and ocaml-bitstring-devel. --- .gitignore | 1 + configure.ac | 1 + hivex/Makefile.am | 2 + hivex/tools/Makefile.am | 56 +++ hivex/tools/clearheaderfields.ml | 112 +++++ hivex/tools/counter.ml | 86 ++++ hivex/tools/counter.mli | 69 +++ hivex/tools/fillemptyhbins.ml | 74 +++ hivex/tools/truncatefile.ml | 112 +++++ hivex/tools/visualizer.ml | 923 ++++++++++++++++++++++++++++++++++++++ hivex/tools/visualizer_NT_time.ml | 30 ++ hivex/tools/visualizer_utils.ml | 163 +++++++ 12 files changed, 1629 insertions(+) create mode 100644 hivex/tools/Makefile.am create mode 100644 hivex/tools/clearheaderfields.ml create mode 100644 hivex/tools/counter.ml create mode 100644 hivex/tools/counter.mli create mode 100644 hivex/tools/fillemptyhbins.ml create mode 100644 hivex/tools/truncatefile.ml create mode 100644 hivex/tools/visualizer.ml create mode 100644 hivex/tools/visualizer_NT_time.ml create mode 100644 hivex/tools/visualizer_utils.ml diff --git a/.gitignore b/.gitignore index 49daccd..899e973 100644 --- a/.gitignore +++ b/.gitignore @@ -84,6 +84,7 @@ hivex/*.1 hivex/*.3 hivex/hivexsh hivex/hivexml +hivex/tools/*.opt html/guestfish.1.html html/guestfs.3.html html/guestmount.1.html diff --git a/configure.ac b/configure.ac index fda4773..134cebd 100644 --- a/configure.ac +++ b/configure.ac @@ -737,6 +737,7 @@ AC_CONFIG_FILES([Makefile gnulib/lib/Makefile gnulib/tests/Makefile hivex/Makefile + hivex/tools/Makefile fuse/Makefile ocaml/META perl/Makefile.PL]) AC_OUTPUT diff --git a/hivex/Makefile.am b/hivex/Makefile.am index 1adbbd8..5624b16 100644 --- a/hivex/Makefile.am +++ b/hivex/Makefile.am @@ -15,6 +15,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +SUBDIRS = tools + EXTRA_DIST = hivex.pod hivexml.pod hivexget.pod hivexsh.pod LICENSE lib_LTLIBRARIES = libhivex.la diff --git a/hivex/tools/Makefile.am b/hivex/tools/Makefile.am new file mode 100644 index 0000000..bd8e986 --- /dev/null +++ b/hivex/tools/Makefile.am @@ -0,0 +1,56 @@ +# libguestfs +# Copyright (C) 2009 Red Hat Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +# OCaml Windows Registry visualizer. This was used while reverse +# engineering the hive format, and is not normally compiled. If you +# do with to compile it, you'll need ocaml-bitstring-devel and +# ocaml-extlib-devel. Also you'll need a collection of hive files +# from Windows machines to experiment with. +# +# We use '-w y' (disable unused variable warnings) because these +# warnings aren't very reliable with heavily preprocessed code like +# that produced by bitstring. + +EXTRA_DIST = \ + visualizer.ml \ + visualizer_utils.ml \ + visualizer_NT_time.ml \ + clearheaderfields.ml \ + fillemptyhbins.ml \ + truncatefile.ml \ + counter.mli \ + counter.ml + +visualizer.opt: counter.mli counter.ml visualizer_utils.ml visualizer_NT_time.ml visualizer.ml + ocamlfind ocamlopt -w y \ + -package bitstring,bitstring.syntax,extlib \ + -syntax camlp4 -linkpkg $^ -o $@ + +fillemptyhbins.opt: fillemptyhbins.ml + ocamlfind ocamlopt -w y \ + -package bitstring,bitstring.syntax,extlib \ + -syntax camlp4 -linkpkg $^ -o $@ + +clearheaderfields.opt: visualizer_utils.ml clearheaderfields.ml + ocamlfind ocamlopt -w y \ + -package bitstring,bitstring.syntax,extlib \ + -syntax camlp4 -linkpkg $^ -o $@ + +truncatefile.opt: visualizer_utils.ml truncatefile.ml + ocamlfind ocamlopt -w y \ + -package bitstring,bitstring.syntax,extlib \ + -syntax camlp4 -linkpkg $^ -o $@ diff --git a/hivex/tools/clearheaderfields.ml b/hivex/tools/clearheaderfields.ml new file mode 100644 index 0000000..d055553 --- /dev/null +++ b/hivex/tools/clearheaderfields.ml @@ -0,0 +1,112 @@ +(* Windows Registry reverse-engineering tool. + * Copyright (C) 2010 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + *) + +open Bitstring +open ExtString +open Printf +open Visualizer_utils + +let () = + if Array.length Sys.argv <> 2 then ( + eprintf "Error: missing argument. +Usage: %s hivefile +" Sys.executable_name; + exit 1 + ) + +let filename = Sys.argv.(1) + +(* Load the file. *) +let bits = bitstring_of_file filename + +(* Split into header + data at the 4KB boundary. *) +let header, data = takebits (4096 * 8) bits, dropbits (4096 * 8) bits + +(* Read the header fields. *) +let seq, last_modified, major, minor, unknown1, unknown2, + root_key, end_pages, unknown3, fname = + bitmatch header with + | { "regf" : 4*8 : string; + seq1 : 4*8 : littleendian; + seq2 : 4*8 : littleendian; + last_modified : 64 : bitstring; + major : 4*8 : littleendian; + minor : 4*8 : littleendian; + unknown1 : 4*8 : littleendian; + unknown2 : 4*8 : littleendian; + root_key : 4*8 : littleendian; + end_pages : 4*8 : littleendian; + unknown3 : 4*8 : littleendian; + fname : 64*8 : string; + unknownguid1 : 16*8 : bitstring; + unknownguid2 : 16*8 : bitstring; + unknown4 : 4*8 : littleendian; + unknownguid3 : 16*8 : bitstring; + unknown5 : 4*8 : string; + unknown6 : 340*8 : bitstring; + csum : 4*8 + : littleendian, save_offset_to (crc_offset), + check (assert (crc_offset = 0x1fc * 8); true); + unknown7 : (0x1000-0x200)*8 : bitstring } -> + seq1, last_modified, major, minor, unknown1, unknown2, + root_key, end_pages, unknown3, fname + | {_} -> assert false + +(* Create a new header, but with unknown fields cleared. Do it in + * two parts, first creating everything up to the checksum, then + * calculating the checksum and appending checksum and the final + * field. + *) +let header = + let zeroguid = zeroes_bitstring (16*8) in + let before_csum = + BITSTRING { + "regf" : 4*8 : string; + seq : 4*8 : littleendian; + seq : 4*8 : littleendian; + last_modified : 64 : bitstring; + major : 4*8 : littleendian; + minor : 4*8 : littleendian; + unknown1 : 4*8 : littleendian; + unknown2 : 4*8 : littleendian; + root_key : 4*8 : littleendian; + end_pages : 4*8 : littleendian; + unknown3 : 4*8 : littleendian; + fname : 64*8 : string; + zeroguid : 16*8 : bitstring; + zeroguid : 16*8 : bitstring; + 0_l : 4*8 : littleendian; + zeroguid : 16*8 : bitstring; + 0_l : 4*8 : littleendian; + zeroes_bitstring (340*8) : 340*8 : bitstring + } in + assert (bitstring_length before_csum = 0x1fc * 8); + let csum = bitstring_fold_left_int32_le Int32.logxor 0_l before_csum in + let csum_and_after = + BITSTRING { + csum : 4*8 : littleendian; + zeroes_bitstring ((0x1000-0x200)*8) : (0x1000-0x200)*8 : bitstring + } in + let new_header = concat [before_csum; csum_and_after] in + assert (bitstring_length header = bitstring_length new_header); + new_header + +(* Write it. *) +let () = + let file = concat [header; data] in + bitstring_to_file file filename diff --git a/hivex/tools/counter.ml b/hivex/tools/counter.ml new file mode 100644 index 0000000..2e44c65 --- /dev/null +++ b/hivex/tools/counter.ml @@ -0,0 +1,86 @@ +(* Basic counting module. + + Copyright (C) 2006 Merjis Ltd. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*) + +type 'a t = ('a, int ref) Hashtbl.t + +let create () = + Hashtbl.create 13 + +let get_ref counter thing = + try + Hashtbl.find counter thing + with + Not_found -> + let r = ref 0 in + Hashtbl.add counter thing r; + r + +let incr counter thing = + let r = get_ref counter thing in + incr r + +let decr counter thing = + let r = get_ref counter thing in + decr r + +let add counter thing n = + let r = get_ref counter thing in + r := !r + n + +let sub counter thing n = + let r = get_ref counter thing in + r := !r - n + +let set counter thing n = + let r = get_ref counter thing in + r := n + +(* Don't use get_ref, to avoid unnecessarily creating 'ref 0's. *) +let get counter thing = + try + !(Hashtbl.find counter thing) + with + Not_found -> 0 + +(* This is a common pair of operations, worth optimising. *) +let incr_get counter thing = + let r = get_ref counter thing in + Pervasives.incr r; + !r + +let zero = Hashtbl.remove + +let read counter = + let counts = + Hashtbl.fold ( + fun thing r xs -> + let r = !r in + if r <> 0 then (r, thing) :: xs + else xs + ) counter [] in + List.sort (fun (a, _) (b, _) -> compare (b : int) (a : int)) counts + +let length = Hashtbl.length + +let total counter = + let total = ref 0 in + Hashtbl.iter (fun _ r -> total := !total + !r) counter; + !total + +let clear = Hashtbl.clear diff --git a/hivex/tools/counter.mli b/hivex/tools/counter.mli new file mode 100644 index 0000000..87610b5 --- /dev/null +++ b/hivex/tools/counter.mli @@ -0,0 +1,69 @@ +(** Basic counting module. + + Copyright (C) 2006 Merjis Ltd. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*) + +type 'a t +(** Count items of type ['a]. *) + +val create : unit -> 'a t +(** Create a new counter. *) + +val incr : 'a t -> 'a -> unit +(** [incr counter thing] adds one to the count of [thing]s in [counter]. *) + +val decr : 'a t -> 'a -> unit +(** [decr counter thing] subtracts one to the count of [thing]s in [counter]. *) + +val add : 'a t -> 'a -> int -> unit +(** [add counter thing n] adds [n] to the count of [thing]s in [counter]. *) + +val sub : 'a t -> 'a -> int -> unit +(** [sub counter thing n] subtracts [n] to the count of [thing]s in [counter]. *) + +val set : 'a t -> 'a -> int -> unit +(** [set counter thing n] sets the count of [thing]s to [n]. *) + +val get : 'a t -> 'a -> int +(** [get counter thing] returns the count of [thing]s. (Returns 0 for + * [thing]s which have not been added. + *) + +val incr_get : 'a t -> 'a -> int +(** Faster form of {!Counter.incr} followed by {!Counter.get}. *) + +val zero : 'a t -> 'a -> unit +(** [zero counter thing] sets the count of [thing]s to 0. + * See also {!Counter.clear}. + *) + +val read : 'a t -> (int * 'a) list +(** [read counter] reads the frequency of each thing. They are sorted + * with the thing appearing most frequently first. Only things occurring + * non-zero times are returned. + *) + +val length : 'a t -> int +(** Return the number of distinct things. See also {!Counter.total} *) + +val total : 'a t -> int +(** Return the number of things counted (the total number of counts). + * See also {!Counter.length} + *) + +val clear : 'a t -> unit +(** [clear counter] zeroes all counts. *) diff --git a/hivex/tools/fillemptyhbins.ml b/hivex/tools/fillemptyhbins.ml new file mode 100644 index 0000000..14eae96 --- /dev/null +++ b/hivex/tools/fillemptyhbins.ml @@ -0,0 +1,74 @@ +(* Windows Registry reverse-engineering tool. + * Copyright (C) 2010 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + *) + +open Bitstring +open ExtString +open Printf + +let () = + if Array.length Sys.argv <> 3 then ( + eprintf "Error: missing argument. +Usage: %s hivefile startoffset +" Sys.executable_name; + exit 1 + ) + +let filename = Sys.argv.(1) +let offset = int_of_string Sys.argv.(2) + +(* Load the file. *) +let bits = bitstring_of_file filename + +(* Split into header + data at the 4KB boundary. *) +let header, data = takebits (4096 * 8) bits, dropbits (4096 * 8) bits + +(* Overwrite everything after @offset, so ... *) +let nrpages = (bitstring_length data / 8 - offset) / 4096 +let data = takebits (offset * 8) data + +(* Create the empty pages. They're not all the same because each + * page contains its own page_offset. + *) +let pages = + let noblock = + let seg_len = 4096 - 32 in + let zeroes = zeroes_bitstring ((seg_len - 4) * 8) in + BITSTRING { + Int32.of_int seg_len : 4*8 : littleendian; + zeroes : (seg_len - 4) * 8 : bitstring + } in + let zeroes = zeroes_bitstring (20*8) in + let rec loop page_offset i = + if i < nrpages then ( + let page = + BITSTRING { + "hbin" : 4*8 : string; + Int32.of_int page_offset : 4*8 : littleendian; + 4096_l : 4*8 : littleendian; (* page length *) + zeroes : 20*8 : bitstring; + noblock : (4096 - 32) * 8 : bitstring + } in + page :: loop (page_offset + 4096) (i+1) + ) else [] + in + loop offset 0 + +(* Write it. *) +let () = + let file = concat (header :: data :: pages) in + bitstring_to_file file filename diff --git a/hivex/tools/truncatefile.ml b/hivex/tools/truncatefile.ml new file mode 100644 index 0000000..b519f7a --- /dev/null +++ b/hivex/tools/truncatefile.ml @@ -0,0 +1,112 @@ +(* Windows Registry reverse-engineering tool. + * Copyright (C) 2010 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + *) + +open Bitstring +open ExtString +open Printf +open Visualizer_utils + +let () = + if Array.length Sys.argv <> 3 then ( + eprintf "Error: missing argument. +Usage: %s hivefile endpages +" Sys.executable_name; + exit 1 + ) + +let filename = Sys.argv.(1) +let new_end_pages = int_of_string Sys.argv.(2) + +(* Load the file. *) +let bits = bitstring_of_file filename + +(* Split into header + data at the 4KB boundary. *) +let header, data = takebits (4096 * 8) bits, dropbits (4096 * 8) bits + +(* Truncate the file data. *) +let data = takebits (new_end_pages * 8) data + +(* Read the header fields. *) +let seq, last_modified, major, minor, unknown1, unknown2, + root_key, end_pages, unknown3, fname = + bitmatch header with + | { "regf" : 4*8 : string; + seq1 : 4*8 : littleendian; + seq2 : 4*8 : littleendian; + last_modified : 64 : bitstring; + major : 4*8 : littleendian; + minor : 4*8 : littleendian; + unknown1 : 4*8 : littleendian; + unknown2 : 4*8 : littleendian; + root_key : 4*8 : littleendian; + end_pages : 4*8 : littleendian; + unknown3 : 4*8 : littleendian; + fname : 64*8 : string; + unknownguid1 : 16*8 : bitstring; + unknownguid2 : 16*8 : bitstring; + unknown4 : 4*8 : littleendian; + unknownguid3 : 16*8 : bitstring; + unknown5 : 4*8 : string; + unknown6 : 340*8 : bitstring; + csum : 4*8 + : littleendian, save_offset_to (crc_offset), + check (assert (crc_offset = 0x1fc * 8); true); + unknown7 : (0x1000-0x200)*8 : bitstring } -> + seq1, last_modified, major, minor, unknown1, unknown2, + root_key, end_pages, unknown3, fname + | {_} -> assert false + +(* Create a new header, with endpages updated. *) +let header = + let zeroguid = zeroes_bitstring (16*8) in + let before_csum = + BITSTRING { + "regf" : 4*8 : string; + seq : 4*8 : littleendian; + seq : 4*8 : littleendian; + last_modified : 64 : bitstring; + major : 4*8 : littleendian; + minor : 4*8 : littleendian; + unknown1 : 4*8 : littleendian; + unknown2 : 4*8 : littleendian; + root_key : 4*8 : littleendian; + Int32.of_int new_end_pages : 4*8 : littleendian; + unknown3 : 4*8 : littleendian; + fname : 64*8 : string; + zeroguid : 16*8 : bitstring; + zeroguid : 16*8 : bitstring; + 0_l : 4*8 : littleendian; + zeroguid : 16*8 : bitstring; + 0_l : 4*8 : littleendian; + zeroes_bitstring (340*8) : 340*8 : bitstring + } in + assert (bitstring_length before_csum = 0x1fc * 8); + let csum = bitstring_fold_left_int32_le Int32.logxor 0_l before_csum in + let csum_and_after = + BITSTRING { + csum : 4*8 : littleendian; + zeroes_bitstring ((0x1000-0x200)*8) : (0x1000-0x200)*8 : bitstring + } in + let new_header = concat [before_csum; csum_and_after] in + assert (bitstring_length header = bitstring_length new_header); + new_header + +(* Write it. *) +let () = + let file = concat [header; data] in + bitstring_to_file file filename diff --git a/hivex/tools/visualizer.ml b/hivex/tools/visualizer.ml new file mode 100644 index 0000000..da79bee --- /dev/null +++ b/hivex/tools/visualizer.ml @@ -0,0 +1,923 @@ +(* Windows Registry reverse-engineering tool. + * Copyright (C) 2010 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * For existing information on the registry format, please refer + * to the following documents. Note they are both incomplete + * and inaccurate in some respects. + * + * http://www.sentinelchicken.com/data/TheWindowsNTRegistryFileFormat.pdf + * http://pogostick.net/~pnh/ntpasswd/WinReg.txt + *) + +open Bitstring +open ExtString +open Printf +open Visualizer_utils +open Visualizer_NT_time + +let () = + if Array.length Sys.argv <> 2 then ( + eprintf "Error: missing argument. +Usage: %s hivefile > out +where + 'hivefile' is the input hive file from a Windows machine + 'out' is an output file where we will write all the keys, + values etc for extended debugging purposes. +Errors, inconsistencies and unexpected fields in the hive file +are written to stderr. +" Sys.executable_name; + exit 1 + ) + +let filename = Sys.argv.(1) +let basename = Filename.basename filename + +(* Load the file. *) +let bits = bitstring_of_file filename + +(* Split into header + data at the 4KB boundary. *) +let header, data = takebits (4096 * 8) bits, dropbits (4096 * 8) bits + +(* Define a persistent pattern which matches the header fields. By + * using persistent patterns, we can reuse them later in the + * program. + *) +let bitmatch header_fields = + { "regf" : 4*8 : string; + seq1 : 4*8 : littleendian; + seq2 : 4*8 : littleendian; + last_modified : 64 + : littleendian, bind (nt_to_time_t last_modified); + major : 4*8 : littleendian; + minor : 4*8 : littleendian; + + (* "Type". Contains 0. *) + unknown1 : 4*8 : littleendian; + + (* "Format". Contains 1. *) + unknown2 : 4*8 : littleendian; + + root_key : 4*8 + : littleendian, bind (get_offset root_key); + end_pages : 4*8 + : littleendian, bind (get_offset end_pages); + + (* "Cluster". Contains 1. *) + unknown3 : 4*8 : littleendian; + + filename : 64*8 : string; + + (* All three GUIDs here confirmed in Windows 7 registries. In + * Windows <= 2003 these GUID fields seem to contain junk. + * + * If you write zeroes to the GUID fields, load and unload in Win7 + * REGEDIT, then Windows 7 writes some random GUIDs. + * + * Also (on Win7) unknownguid1 == unknownguid2. unknownguid3 is + * different. + *) + unknownguid1 : 16*8 : bitstring; + unknownguid2 : 16*8 : bitstring; + + (* Wrote zero to unknown4, loaded and unloaded it in Win7 REGEDIT, + * and it still contained zero. In existing registries it seems to + * contain random junk. + *) + unknown4 : 4*8 : littleendian; + unknownguid3 : 16*8 : bitstring; + + (* If you write zero to unknown5, load and unload it in REGEDIT, + * Windows 7 puts the string "rmtm" here. Existing registries also + * seen containing this string. However on older Windows it can + * be all zeroes. + *) + unknown5 : 4*8 : string; + + (* This seems to contain junk from other parts of the registry. I + * wrote zeroes here, loaded and unloaded it in Win7 REGEDIT, and + * it still contained zeroes. + *) + unknown6 : 340*8 : bitstring; + csum : 4*8 + : littleendian, save_offset_to (crc_offset), + check (assert (crc_offset = 0x1fc * 8); true); + unknown7 : (0x1000-0x200)*8 : bitstring } + +let fprintf_header chan bits = + bitmatch bits with + | { :header_fields } -> + fprintf chan + "HD %6ld %6ld %s %ld.%ld %08lx %08lx %s %s %08lx %s %s %s %08lx %s %s %s %08lx %s\n" + seq1 seq2 (print_time last_modified) major minor + unknown1 unknown2 + (print_offset root_key) (print_offset end_pages) + unknown3 (print_utf16 filename) + (print_guid unknownguid1) (print_guid unknownguid2) + unknown4 (print_guid unknownguid3) unknown5 + (print_bitstring unknown6) + csum (print_bitstring unknown7) + +(* Parse the header and check it. *) +let root_key, end_pages = + bitmatch header with + | { :header_fields } -> + fprintf_header stdout header; + + if major <> 1_l then + eprintf "HD hive file major <> 1 (major.minor = %ld.%ld)\n" + major minor; + if seq1 <> seq2 then + eprintf "HD hive file sequence numbers should match (%ld <> %ld)\n" + seq1 seq2; + if unknown1 <> 0_l then + eprintf "HD unknown1 field <> 0 (%08lx)\n" unknown1; + if unknown2 <> 1_l then + eprintf "HD unknown2 field <> 1 (%08lx)\n" unknown2; + if unknown3 <> 1_l then + eprintf "HD unknown3 field <> 1 (%08lx)\n" unknown3; + if not (equals unknownguid1 unknownguid2) then + eprintf "HD unknownguid1 <> unknownguid2 (%s, %s)\n" + (print_guid unknownguid1) (print_guid unknownguid2); + (* We think this is junk. + if unknown4 <> 0_l then + eprintf "HD unknown4 field <> 0 (%08lx)\n" unknown4; + *) + if unknown5 <> "rmtm" && unknown5 <> "\000\000\000\000" then + eprintf "HD unknown5 field <> \"rmtm\" & <> zeroes (%s)\n" unknown5; + (* We think this is junk. + if not (is_zero_bitstring unknown6) then + eprintf "HD unknown6 area is not zero (%s)\n" + (print_bitstring unknown6); + *) + if not (is_zero_bitstring unknown7) then + eprintf "HD unknown7 area is not zero (%s)\n" + (print_bitstring unknown7); + + root_key, end_pages + | {_} -> + failwithf "%s: this doesn't look like a registry hive file\n" basename + +(* Define persistent patterns to match page and block fields. *) +let bitmatch page_fields = + { "hbin" : 4*8 : string; + page_offset : 4*8 + : littleendian, bind (get_offset page_offset); + page_size : 4*8 + : littleendian, check (Int32.rem page_size 4096_l = 0_l), + bind (Int32.to_int page_size); + + (* In the first hbin in the file these fields contain something. + * In subsequent hbins these fields are all zero. + * + * From existing hives (first hbin only): + * + * unknown1 unknown2 unknown5 + * 00 00 00 00 00 00 00 00 9C 77 3B 02 6A 7D CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 50 3A 15 07 B5 9B CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 57 86 90 D4 9A 58 CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 52 3F 90 9D CF 7C CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 E8 86 C1 17 BD 06 CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 4A 77 CE 7A CF 7C CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 E4 EA 23 FF 69 7D CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 50 13 BA 8D A2 9A CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 0E 07 93 13 BD 06 CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 9D 55 D0 B3 99 58 CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 46 AC FF 8B CF 7C CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 80 29 2D 02 6A 7D CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 90 8D 36 07 B5 9B CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 5C 9B 8B B8 6A 06 CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 85 9F BB 99 9A 58 CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 BE 3D 21 02 6A 7D CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 70 53 09 07 B5 9B CA 01 00 00 00 00 + * 00 00 00 00 00 00 00 00 5B 62 42 B6 9A 58 CA 01 00 00 00 00 + * 01 00 00 00 00 00 00 00 B2 46 9B 9E CF 7C CA 01 00 00 00 00 + * 01 00 00 00 00 00 00 00 CA 88 EE 1A BD 06 CA 01 00 00 00 00 + * + * From the above we worked out that fields 3 and 4 are an NT + * timestamp, which seems to be "last modified" (when REGEDIT + * unloads a hive it updates this timestamp even if nothing + * has been changed). + *) + unknown1 : 4*8 : littleendian; (* usually zero, occasionally 1 *) + unknown2 : 4*8 : littleendian; (* always zero? *) + last_modified : 64 + : littleendian, + bind (if page_offset = 0 then nt_to_time_t last_modified + else ( + assert (last_modified = 0_L); + 0. + ) + ); + (* The "B.D." document said this field contains the page size, but + * this is not true. This misinformation has been copied to the + * sentinelchicken documentation too. + *) + unknown5 : 4*8 : littleendian; (* always zero? *) + + (* Now the blocks in this page follow. *) + blocks : (page_size - 32) * 8 : bitstring; + + rest : -1 : bitstring } + +let fprintf_page chan bits = + bitmatch bits with + | { :page_fields } -> + fprintf chan "HB %s %08x %08lx %08lx %s %08lx\n" + (print_offset page_offset) + page_size unknown1 unknown2 + (if page_offset = 0 then print_time last_modified + else string_of_float last_modified) unknown5 + +let bitmatch block_fields = + { seg_len : 4*8 + : littleendian, bind (Int32.to_int seg_len); + block_data : (abs seg_len - 4) * 8 : bitstring; + rest : -1 : bitstring } + +let fprintf_block chan block_offset bits = + bitmatch bits with + | { :block_fields } -> + fprintf chan "BL %s %s %d\n" + (print_offset block_offset) + (if seg_len < 0 then "used" else "free") + (if seg_len < 0 then -seg_len else seg_len) + +(* Iterate over the pages and blocks. In the process we will examine + * each page (hbin) header. Also we will build block_list which is a + * list of (block offset, length, used flag, data). + *) +let block_list = ref [] +let () = + let rec loop_over_pages data data_offset = + if data_offset < end_pages then ( + bitmatch data with + | { rest : -1 : bitstring } when bitstring_length rest = 0 -> () + + | { :page_fields } -> + fprintf_page stdout data; + + assert (page_offset = data_offset); + + if data_offset = 0 then ( (* first hbin only *) + if unknown1 <> 0_l then + eprintf "HB %s unknown1 field <> 0 (%08lx)\n" + (print_offset page_offset) unknown1; + if unknown2 <> 0_l then + eprintf "HB %s unknown2 field <> 0 (%08lx)\n" + (print_offset page_offset) unknown2; + if unknown5 <> 0_l then + eprintf "HB %s unknown5 field <> 0 (%08lx)\n" + (print_offset page_offset) unknown5 + ) else ( (* subsequent hbins *) + if unknown1 <> 0_l || unknown2 <> 0_l || unknown5 <> 0_l then + eprintf "HB %s unknown fields <> 0 (%08lx %08lx %08lx)\n" + (print_offset page_offset) + unknown1 unknown2 unknown5; + if last_modified <> 0. then + eprintf "HB %s last_modified <> 0. (%g)\n" + (print_offset page_offset) last_modified + ); + + (* Loop over the blocks in this page. *) + loop_over_blocks blocks (data_offset + 32); + + (* Loop over rest of the pages. *) + loop_over_pages rest (data_offset + page_size) + + | {_} -> + failwithf "%s: invalid hbin at offset %s\n" + basename (print_offset data_offset) + ) else ( + (* Reached the end of the official hbins in this file, BUT the + * file can be larger than this and might contain stuff. What + * does it contain after the hbins? We think just junk, but + * we're not sure. + *) + if not (is_zero_bitstring data) then ( + eprintf "Junk in file after end of pages:\n"; + let rec loop data data_offset = + bitmatch data with + | { rest : -1 : bitstring } when bitstring_length rest = 0 -> () + | { :page_fields } -> + eprintf "\tjunk hbin %s 0x%08x\n" + (print_offset data_offset) page_size; + loop rest (data_offset + page_size); + | { _ } -> + eprintf "\tother junk %s %s\n" + (print_offset data_offset) (print_bitstring data) + in + loop data data_offset + ) + ) + and loop_over_blocks blocks block_offset = + bitmatch blocks with + | { rest : -1 : bitstring } when bitstring_length rest = 0 -> () + + | { :block_fields } -> + assert (block_offset mod 8 = 0); + + fprintf_block stdout block_offset blocks; + + let used, seg_len = + if seg_len < 0 then true, -seg_len else false, seg_len in + + let block = block_offset, (seg_len, used, block_data) in + block_list := block :: !block_list; + + (* Loop over the rest of the blocks in this page. *) + loop_over_blocks rest (block_offset + seg_len) + + | {_} -> + failwithf "%s: invalid block near offset %s\n" + basename (print_offset block_offset) + in + loop_over_pages data 0 + +(* Turn the block_list into a map so we can quickly look up a block + * from its offset. + *) +let block_list = !block_list +let block_map = + List.fold_left ( + fun map (block_offset, block) -> IntMap.add block_offset block map + ) IntMap.empty block_list +let lookup fn offset = + try + let (_, used, _) as block = IntMap.find offset block_map in + if not used then + failwithf "%s: %s: lookup: free block %s referenced from hive tree" + basename fn (print_offset offset); + block + with Not_found -> + failwithf "%s: %s: lookup: unknown block %s referenced from hive tree" + basename fn (print_offset offset) + +(* Use this to mark blocks that we've visited. If the hive contains + * no unreferenced blocks, then by the end this should just contain + * free blocks. + *) +let mark_visited, is_not_visited, unvisited_blocks = + let v = ref block_map in + let mark_visited offset = v := IntMap.remove offset !v + and is_not_visited offset = IntMap.mem offset !v + and unvisited_blocks () = !v in + mark_visited, is_not_visited, unvisited_blocks + +(* Define persistent patterns to match nk-records, vk-records and + * sk-records, which are the record types that we especially want to + * analyze later. Other blocks types (eg. value lists, lf-records) + * have no "spare space" so everything is known about them and we don't + * store these. + *) +let bitmatch nk_fields = + { "nk" : 2*8 : string; + (* Flags stored in the file as a little endian word, hence the + * unusual ordering: + *) + virtmirrored : 1; + predefinedhandle : 1; keynameascii : 1; symlinkkey : 1; + cannotbedeleted : 1; isroot : 1; ismountpoint : 1; isvolatile : 1; + unknownflag8000 : 1; unknownflag4000 : 1; + unknownflag2000 : 1; unknownflag1000 : 1; + unknownflag0800 : 1; unknownflag0400 : 1; + virtualstore : 1; virttarget : 1; + timestamp : 64 : littleendian, bind (nt_to_time_t timestamp); + unknown1 : 4*8 : littleendian; + parent : 4*8 : littleendian, bind (get_offset parent); + nr_subkeys : 4*8 : littleendian, bind (Int32.to_int nr_subkeys); + nr_subkeys_vol : 4*8; + subkeys : 4*8 : littleendian, bind (get_offset subkeys); + subkeys_vol : 4*8; + nr_values : 4*8 : littleendian, bind (Int32.to_int nr_values); + vallist : 4*8 : littleendian, bind (get_offset vallist); + sk : 4*8 : littleendian, bind (get_offset sk); + classname : 4*8 : littleendian, bind (get_offset classname); + (* sentinelchicken.com says this is a single 32 bit field + * containing maximum number of bytes in a subkey name, however + * that does not seem to be correct. We think it is two 16 bit + * fields, the first being the maximum number of bytes in the + * UTF16-LE encoded version of the subkey names, (since subkey + * names are usually ASCII, that would be max length of names * 2). + * This is a historical maximum, so it can be greater than the + * current maximum name field. + * + * The second field is often non-zero, but the purpose is unknown. + * In the hives we examined it had values 0, 1, 0x20, 0x21, 0xa0, + * 0xa1, 0xe1, suggesting some sort of flags. + *) + max_subkey_name_len : 2*8 : littleendian; + unknown2 : 2*8 : littleendian; + (* sentinelchicken.com says: maximum subkey CLASSNAME length, + * however that does not seem to be correct. In hives I looked + * at, it has value 0, 0xc, 0x10, 0x18, 0x1a, 0x28. + *) + unknown3 : 4*8 : littleendian; + (* sentinelchicken.com says: maximum number of bytes in a value + * name, however that does not seem to be correct. We think it is + * the maximum number of bytes in the UTF16-LE encoded version of + * the value names (since value names are usually ASCII, that would + * be max length of names * 2). This is a historical maximum, so + * it can be greater than the current maximum name field. + *) + max_vk_name_len : 4*8 : littleendian, bind (Int32.to_int max_vk_name_len); + (* sentinelchicken.com says: maximum value data size, and this + * agrees with my observations. It is the largest data size (not + * seg_len, but vk.data_len) for any value in this key. We think + * that this field is a historical max, so eg if a maximally sized + * value is deleted then this field is not reduced. Certainly + * max_vk_data_len >= the measured maximum in all the hives that we + * have observed. + *) + max_vk_data_len : 4*8 : littleendian, bind (Int32.to_int max_vk_data_len); + unknown6 : 4*8 : littleendian; + name_len : 2*8 : littleendian; + classname_len : 2*8 : littleendian; + name : name_len * 8 : string } + +let fprintf_nk chan nk = + let (_, _, bits) = lookup "fprintf_nk" nk in + bitmatch bits with + | { :nk_fields } -> + fprintf chan + "NK %s %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s %s %08lx %s %d %ld %s %08lx %d %s %s %s %d %04x %08lx %d %d %08lx %d %d %s\n" + (print_offset nk) + (if unknownflag8000 then "8" else ".") + (if unknownflag4000 then "4" else ".") + (if unknownflag2000 then "2" else ".") + (if unknownflag1000 then "1" else ".") + (if unknownflag0800 then "8" else ".") + (if unknownflag0400 then "4" else ".") + (if virtualstore then "s" else ".") + (if virttarget then "t" else ".") + (if virtmirrored then "m" else ".") + (if predefinedhandle then "P" else ".") + (if keynameascii then "A" else ".") + (if symlinkkey then "S" else ".") + (if cannotbedeleted then "N" else ".") + (if isroot then "R" else ".") + (if ismountpoint then "M" else ".") + (if isvolatile then "V" else ".") + (print_time timestamp) + unknown1 (print_offset parent) nr_subkeys nr_subkeys_vol + (print_offset subkeys) subkeys_vol + nr_values (print_offset vallist) + (print_offset sk) (print_offset classname) + max_subkey_name_len unknown2 unknown3 + max_vk_name_len max_vk_data_len unknown6 + name_len classname_len name + +type data_t = Inline of bitstring | Offset of int +let bitmatch vk_fields = + { "vk" : 2*8 : string; + name_len : 2*8 : littleendian; + (* No one documents the important fact that data_len can have the + * top bit set (randomly or is it meaningful?). The length can + * also be 0 (or 0x80000000) if the data type is NONE. + *) + data_len : 4*8 + : littleendian, bind ( + let data_len = Int32.logand data_len 0x7fff_ffff_l in + Int32.to_int data_len + ); + (* Inline data if len <= 4, offset otherwise. + * + * The data itself depends on the type field. + * + * For REG_SZ type, the data always seems to be NUL-terminated, which + * means because these strings are often UTF-16LE, that the string will + * end with \0\0 bytes. The termination bytes are included in data_len. + * + * For REG_MULTI_SZ, see + * http://blogs.msdn.com/oldnewthing/archive/2009/10/08/9904646.aspx + *) + data : 4*8 + : bitstring, bind ( + if data_len <= 4 then + Inline (takebits (data_len*8) data) + else ( + let offset = + bitmatch data with { offset : 4*8 : littleendian } -> offset in + let offset = get_offset offset in + Offset offset + ) + ); + t : 4*8 : littleendian, bind (Int32.to_int t); + (* Flags, stored as a little-endian word: *) + unknown1 : 7; + nameisascii : 1; (* Clear for default [zero-length] name, always set + * otherwise in registries that we found. Perhaps this + * is really "nameisdefault" flag? + *) + unknown2 : 8; + (* Unknown field, usually contains something. *) + unknown3 : 2*8 : littleendian; + name : name_len * 8 : string } + +let fprintf_vk chan vk = + let (_, _, bits) = lookup "fprintf_vk" vk in + bitmatch bits with + | { :vk_fields } -> + let real_data = + match data with + | Inline data -> data + | Offset offset -> + let (_, _, bits) = lookup "fprintf_vk (data)" offset in + bits in + fprintf chan "VK %s %s %d %s%s %s %08x %s %08x %08x\n" + (print_offset vk) + name data_len + (match data with + | Inline _ -> "" + | Offset offset -> "["^print_offset offset^"]") + (print_bitstring real_data) + (print_vk_type t) + unknown1 (if nameisascii then "A" else "L") + unknown2 unknown3 + +let bitmatch sk_fields = + { "sk" : 2*8 : string; + unknown1 : 2*8 : littleendian; + sk_next : 4*8 : littleendian, bind (get_offset sk_next); + sk_prev : 4*8 : littleendian, bind (get_offset sk_prev); + refcount : 4*8 : littleendian, bind (Int32.to_int refcount); + sec_len : 4*8 : littleendian, bind (Int32.to_int sec_len); + sec_desc : sec_len * 8 : bitstring } + +let fprintf_sk chan sk = + let (_, _, bits) = lookup "fprintf_sk" sk in + bitmatch bits with + | { :sk_fields } -> + fprintf chan "SK %s %04x %s %s %d %d\n" + (print_offset sk) unknown1 + (print_offset sk_next) (print_offset sk_prev) + refcount sec_len + (* print_bitstring sec_desc -- suppress this *) + +(* Store lists of records we encounter (lists of offsets). *) +let nk_records = ref [] +and vk_records = ref [] +and sk_records = ref [] + +(* Functions to visit each block, starting at the root. Each block + * that we visit is printed. + *) +let rec visit_nk ?(nk_is_root = false) nk = + let (_, _, bits) = lookup "visit_nk" nk in + mark_visited nk; + (bitmatch bits with + | { :nk_fields } -> + fprintf_nk stdout nk; + + nk_records := nk :: !nk_records; + + (* Check the isroot flag is only set on the root node. *) + assert (isroot = nk_is_root); + + if unknownflag8000 then + eprintf "NK %s unknownflag8000 is set\n" (print_offset nk); + if unknownflag4000 then + eprintf "NK %s unknownflag4000 is set\n" (print_offset nk); + if unknownflag2000 then + eprintf "NK %s unknownflag2000 is set\n" (print_offset nk); + if unknownflag1000 then + eprintf "NK %s unknownflag1000 is set\n" (print_offset nk); + if unknownflag0800 then + eprintf "NK %s unknownflag0800 is set\n" (print_offset nk); + if unknownflag0400 then + eprintf "NK %s unknownflag0400 is set\n" (print_offset nk); + if unknown1 <> 0_l then + eprintf "NK %s unknown1 <> 0 (%08lx)\n" (print_offset nk) unknown1; + if unknown2 <> 0 then + eprintf "NK %s unknown2 <> 0 (%04x)\n" (print_offset nk) unknown2; + if unknown3 <> 0_l then + eprintf "NK %s unknown3 <> 0 (%08lx)\n" (print_offset nk) unknown3; + if unknown6 <> 0_l then + eprintf "NK %s unknown6 <> 0 (%08lx)\n" (print_offset nk) unknown6; + + (* -- common, assume it's not an error + if classname = -1 then + eprintf "NK %s has no classname\n" (print_offset nk); + if classname_len = 0 then + eprintf "NK %s has zero-length classname\n" (print_offset nk); + *) + if sk = -1 then + eprintf "NK %s has no sk-record\n" (print_offset nk); + if name_len = 0 then + eprintf "NK %s has zero-length name\n" (print_offset nk); + + (* Visit the values first at this node. *) + let max_data_len, max_name_len = + if vallist <> -1 then + visit_vallist nr_values vallist + else + 0, 0 in + + if max_vk_data_len < max_data_len then + eprintf "NK %s nk.max_vk_data_len (%d) < actual max data_len (%d)\n" + (print_offset nk) max_vk_data_len max_data_len; + + if max_vk_name_len < max_name_len * 2 then + eprintf "NK %s nk.max_vk_name_len (%d) < actual max name_len * 2 (%d)\n" + (print_offset nk) max_vk_name_len (max_name_len * 2); + + (* Visit the subkeys of this node. *) + if subkeys <> -1 then ( + let counted, max_name_len = visit_subkeys subkeys in + + if counted <> nr_subkeys then + failwithf "%s: incorrect count of subkeys (%d, counted %d) in subkey list at %s\n" + basename nr_subkeys counted (print_offset subkeys); + + if max_subkey_name_len < max_name_len * 2 then + eprintf "NK %s nk.max_subkey_name_len (%d) < actual max name_len * 2 (%d)\n" + (print_offset nk) max_subkey_name_len (max_name_len * 2); + ); + + (* Visit the sk-record and classname. *) + if sk <> -1 then + visit_sk sk; + if classname <> -1 then + visit_classname classname classname_len; + + | {_} -> + failwithf "%s: invalid nk block at offset %s\n" + basename (print_offset nk) + ) + +and visit_vallist nr_values vallist = + let (seg_len, _, bits) = lookup "visit_vallist" vallist in + mark_visited vallist; + printf "VL %s %d %d\n" (print_offset vallist) nr_values seg_len; + visit_values_in_vallist nr_values vallist bits + +and visit_values_in_vallist nr_values vallist bits = + if nr_values > 0 then ( + bitmatch bits with + | { rest : -1 : bitstring } when bitstring_length rest = 0 -> + assert (nr_values = 0); + 0, 0 + + | { value : 4*8 : littleendian, bind (get_offset value); + rest : -1 : bitstring } -> + let data_len, name_len = visit_vk value in + let max_data_len, max_name_len = + visit_values_in_vallist (nr_values-1) vallist rest in + max max_data_len data_len, max max_name_len name_len + + | {_} -> + failwithf "%s: invalid offset in value list at %s\n" + basename (print_offset vallist) + ) else 0, 0 + +and visit_vk vk = + let (_, _, bits) = lookup "visit_vk" vk in + mark_visited vk; + + (bitmatch bits with + | { :vk_fields } -> + fprintf_vk stdout vk; + + if unknown1 <> 0 then + eprintf "VK %s unknown1 flags set (%02x)\n" + (print_offset vk) unknown1; + if unknown2 <> 0 then + eprintf "VK %s unknown2 flags set (%02x)\n" + (print_offset vk) unknown2; + if unknown3 <> 0 then + eprintf "VK %s unknown3 flags set (%04x)\n" + (print_offset vk) unknown3; + + (* Note this is common for default [ie. zero-length] key names. *) + if not nameisascii && name_len > 0 then + eprintf "VK %s has non-ASCII name flag set (name is %s)\n" + (print_offset vk) (print_binary_string name); + + vk_records := vk :: !vk_records; + (match data with + | Inline data -> () + | Offset offset -> + let _ = lookup "visit_vk (data)" offset in + mark_visited offset + ); + + data_len, name_len + + | {_} -> + failwithf "%s: invalid vk block at offset %s\n" + basename (print_offset vk) + ) + +(* Visits subkeys, recursing through intermediate lf/lh/ri structures, + * and returns the number of subkeys actually seen. + *) +and visit_subkeys subkeys = + let (_, _, bits) = lookup "visit_subkeys" subkeys in + mark_visited subkeys; + (bitmatch bits with + | { ("lf"|"lh") : 2*8 : string; + len : 2*8 : littleendian; (* number of subkeys of this node *) + rest : len*8*8 : bitstring } -> + printf "LF %s %d\n" (print_offset subkeys) len; + visit_subkeys_in_lf_list subkeys len rest + + | { "ri" : 2*8 : string; + len : 2*8 : littleendian; + rest : len*4*8 : bitstring } -> + printf "RI %s %d\n" (print_offset subkeys) len; + visit_subkeys_in_ri_list subkeys len rest + + (* In theory you can have an li-record here, but we've never + * seen one. + *) + + | { "nk" : 2*8 : string } -> + visit_nk subkeys; + let name_len = name_len_of_nk subkeys in + 1, name_len + + | {_} -> + failwithf "%s: invalid subkey node found at %s\n" + basename (print_offset subkeys) + ) + +and visit_subkeys_in_lf_list subkeys_top len bits = + if len > 0 then ( + bitmatch bits with + | { rest : -1 : bitstring } when bitstring_length rest = 0 -> + assert (len = 0); + 0, 0 + + | { offset : 4*8 : littleendian, bind (get_offset offset); + _ (* hash *) : 4*8 : bitstring; + rest : -1 : bitstring } -> + let c1, name_len1 = visit_subkeys offset in + let c2, name_len2 = visit_subkeys_in_lf_list subkeys_top (len-1) rest in + c1 + c2, max name_len1 name_len2 + + | {_} -> + failwithf "%s: invalid subkey in lf/lh list at %s\n" + basename (print_offset subkeys_top) + ) else 0, 0 + +and visit_subkeys_in_ri_list subkeys_top len bits = + if len > 0 then ( + bitmatch bits with + | { rest : -1 : bitstring } when bitstring_length rest = 0 -> + assert (len = 0); + 0, 0 + + | { offset : 4*8 : littleendian, bind (get_offset offset); + rest : -1 : bitstring } -> + let c1, name_len1 = visit_subkeys offset in + let c2, name_len2 = visit_subkeys_in_ri_list subkeys_top (len-1) rest in + c1 + c2, max name_len1 name_len2 + + | {_} -> + failwithf "%s: invalid subkey in ri list at %s\n" + basename (print_offset subkeys_top) + ) else 0, 0 + +and name_len_of_nk nk = + let (_, _, bits) = lookup "name_len_of_nk" nk in + bitmatch bits with + | { :nk_fields } -> name_len + +and visit_sk sk = + let (_, _, bits) = lookup "visit_sk" sk in + if is_not_visited sk then ( + mark_visited sk; + (bitmatch bits with + | { :sk_fields } -> + fprintf_sk stdout sk; + + if unknown1 <> 0 then + eprintf "SK %s unknown1 <> 0 (%04x)\n" (print_offset sk) unknown1; + + sk_records := sk :: !sk_records + + | {_} -> + failwithf "%s: invalid sk-record at %s\n" + basename (print_offset sk) + ) + ) + +and visit_classname classname classname_len = + let (seg_len, _, bits) = lookup "visit_classname" classname in + mark_visited classname; + assert (seg_len >= classname_len); + printf "CL %s %s\n" (print_offset classname) (print_bitstring bits) + +let () = + visit_nk ~nk_is_root:true root_key + +(* These are immutable now. *) +let nk_records = !nk_records +let vk_records = !vk_records +let sk_records = !sk_records + +(* So we can rapidly tell what is an nk/vk/sk offset. *) +let nk_set = + List.fold_left (fun set offs -> IntSet.add offs set) IntSet.empty nk_records +let vk_set = + List.fold_left (fun set offs -> IntSet.add offs set) IntSet.empty vk_records +let sk_set = + List.fold_left (fun set offs -> IntSet.add offs set) IntSet.empty sk_records + +(* Now after visiting all the blocks, are there any used blocks which + * are unvisited? If there are any then that would indicate either (a) + * that the hive contains unreferenced blocks, or (b) that there are + * referenced blocks that we did not visit because we don't have a full + * understanding of the hive format. + * + * Windows 7 registries often contain a few of these -- not clear + * how serious they are, but don't fail here. + *) +let () = + let unvisited = unvisited_blocks () in + IntMap.iter ( + fun offset block -> + match block with + | (_, false, _) -> () (* ignore unused blocks *) + | (seg_len, true, _) -> + eprintf "used block %s (length %d) is not referenced\n" + (print_offset offset) seg_len + ) unvisited + +(* Check the SKs are: + * (a) linked into a single circular list through the sk_prev/sk_next + * pointers + * (b) refcounts are correct + *) +let () = + if List.length sk_records > 0 then ( + let sk0 = List.hd sk_records in (* start at any arbitrary sk *) + (* This loop follows the chain of sk pointers until we arrive + * back at the original, checking prev/next are consistent. + *) + let rec loop visited prevsk sk = + if sk <> sk0 then ( + if not (IntSet.mem sk sk_set) then + eprintf "SK %s not an sk-record (faulty sk_next somewhere)\n" + (print_offset sk) + else ( + let _, _, bits = lookup "loop sk circular list" sk in + bitmatch bits with + | { :sk_fields } -> + if sk_prev <> prevsk then + eprintf "SK %s sk_prev != previous sk (%s, %s)\n" + (print_offset sk) + (print_offset sk_prev) (print_offset prevsk); + if IntSet.mem sk visited then + eprintf "SK %s already visited (bad circular list)\n" + (print_offset sk); + let visited = IntSet.add sk visited in + loop visited sk sk_next + ) + ) + in + let _, _, bits = lookup "start sk circular list" sk0 in + (bitmatch bits with + | { :sk_fields } -> + loop IntSet.empty sk_prev sk0 + ); + + (* For every nk-record, if it references an sk-record count that, + * then check this matches the refcounts in the sk-records + * themselves. + *) + let refcounts = Counter.create () in + List.iter ( + fun nk -> + let _, _, bits = lookup "sk refcounter (nk)" nk in + (bitmatch bits with + | { :nk_fields } -> + Counter.incr refcounts sk + ) + ) nk_records; + + List.iter ( + fun sk -> + let _, _, bits = lookup "sk refcounter (sk)" sk in + (bitmatch bits with + | { :sk_fields } -> + let actual = Counter.get refcounts sk in + if actual <> refcount then + eprintf "SK %s incorrect refcount (actual %d, in file %d)\n" + (print_offset sk) actual refcount + ) + ) sk_records + ) diff --git a/hivex/tools/visualizer_NT_time.ml b/hivex/tools/visualizer_NT_time.ml new file mode 100644 index 0000000..a752112 --- /dev/null +++ b/hivex/tools/visualizer_NT_time.ml @@ -0,0 +1,30 @@ +(* Windows Registry reverse-engineering tool. + * Copyright (C) 2010 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * For existing information on the registry format, please refer + * to the following documents. Note they are both incomplete + * and inaccurate in some respects. + *) + +(* Convert an NT file timestamp to time_t. See: + * http://blogs.msdn.com/oldnewthing/archive/2003/09/05/54806.aspx + * http://support.microsoft.com/kb/167296 + *) +let nt_to_time_t t = + let t = Int64.sub t 116444736000000000L in + let t = Int64.div t 10000000L in + Int64.to_float t diff --git a/hivex/tools/visualizer_utils.ml b/hivex/tools/visualizer_utils.ml new file mode 100644 index 0000000..2f0d6b7 --- /dev/null +++ b/hivex/tools/visualizer_utils.ml @@ -0,0 +1,163 @@ +(* Windows Registry reverse-engineering tool. + * Copyright (C) 2010 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * For existing information on the registry format, please refer + * to the following documents. Note they are both incomplete + * and inaccurate in some respects. + *) + +open ExtString +open Printf + +let failwithf fs = ksprintf failwith fs + +(* Useful function to convert unknown bitstring fragments into + * printable strings. + *) +let rec print_bitstring bits = + let str = Bitstring.string_of_bitstring bits in + print_binary_string str +and print_binary_string str = + let rec printable = function + | '\x00' -> "\\0" | '\x01' -> "\\1" | '\x02' -> "\\2" | '\x03' -> "\\3" + | '\x04' -> "\\4" | '\x05' -> "\\5" | '\x06' -> "\\6" | '\x07' -> "\\7" + | ('\x08'..'\x31' as c) + | ('\x7f'..'\xff' as c) -> sprintf "\\x%02x" (Char.code c) + | ('\x32'..'\x7e' as c) -> String.make 1 c + and repeat str = function + | n when n <= 0 -> "" + | n -> str ^ repeat str (n-1) + in + let chars = String.explode str in + let rec loop acc = function + | [] -> List.rev acc + | x :: xs -> + let rec loop2 i = function + | y :: ys when x = y -> loop2 (i+1) ys + | ys -> i, ys + in + let count, ys = loop2 1 xs in + let acc = (count, x) :: acc in + loop acc ys + in + let frags = loop [] chars in + let frags = + List.map (function + | (nr, x) when nr <= 4 -> repeat (printable x) nr + | (nr, x) -> sprintf "%s<%d times>" (printable x) nr + ) frags in + "\"" ^ String.concat "" frags ^ "\"" + +(* Convert an offset from the file to an offset. The only special + * thing is that 0xffffffff in the file is used as a kind of "NULL + * pointer". We map these null values to -1. + *) +let get_offset = function + | 0xffffffff_l -> -1 + | i -> Int32.to_int i + +(* Print an offset. *) +let print_offset = function + | -1 -> "NULL" + | i -> sprintf "@%08x" i + +(* Print time. *) +let print_time t = + let tm = Unix.gmtime t in + sprintf "%04d-%02d-%02d %02d:%02d:%02d" + (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday + tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec + +(* Print UTF16LE. *) +let print_utf16 str = + let n = String.length str in + if n land 1 <> 0 then + print_binary_string str + else ( + let rec loop i = + if i < n-1 then ( + let c1 = Char.code (str.[i]) in + let c2 = Char.code (str.[i+1]) in + if c1 <> 0 || c2 <> 0 then ( + (* Well, this doesn't print non-7bit-ASCII ... *) + let c = + if c2 = 0 then String.make 1 (Char.chr c1) + else sprintf "\\u%04d" (c2 * 256 + c1) in + c :: loop (i+2) + ) else [] + ) else [] + in + let frags = loop 0 in + "L\"" ^ String.concat "" frags ^ "\"" + ) + +(* A map of int -> anything. *) +module IntMap = Map.Make (struct type t = int let compare = compare end) + +(* A set of ints. *) +module IntSet = Set.Make (struct type t = int let compare = compare end) + +(* Print registry vk-record type field. *) +let print_vk_type = function + | 0 -> "NONE" + | 1 -> "SZ" + | 2 -> "EXPAND_SZ" + | 3 -> "BINARY" + | 4 -> "DWORD" + | 5 -> "DWORD_BIG_ENDIAN" + | 6 -> "LINK" + | 7 -> "MULTI_SZ" + | 8 -> "RESOURCE_LiST" + | 9 -> "FULL_RESOURCE_DESCRIPTOR" + | 10 -> "RESOURCE_REQUIREMENTS_LIST" + | 11 -> "QWORD" + | i -> sprintf "UNKNOWN_VK_TYPE_%d" i + +(* XXX We should write a more efficient version of this and + * push it into the bitstring library. + *) +let is_zero_bitstring bits = + let len = Bitstring.bitstring_length bits in + let zeroes = Bitstring.zeroes_bitstring len in + 0 = Bitstring.compare bits zeroes + +let is_zero_guid = is_zero_bitstring + +(* http://msdn.microsoft.com/en-us/library/aa373931(VS.85).aspx + * Endianness of GUIDs is not clear from the MSDN documentation, + * so this is just a guess. + *) +let print_guid bits = + bitmatch bits with + | { data1 : 4*8 : littleendian; + data2 : 2*8 : littleendian; + data3 : 2*8 : littleendian; + data4_1 : 2*8 : littleendian; + data4_2 : 6*8 : littleendian } -> + sprintf "%08lX-%04X-%04X-%04X-%012LX" data1 data2 data3 data4_1 data4_2 + | { _ } -> + assert false + +(* Fold over little-endian 32-bit integers in a bitstring. *) +let rec bitstring_fold_left_int32_le f a bits = + bitmatch bits with + | { i : 4*8 : littleendian; + rest : -1 : bitstring } -> + bitstring_fold_left_int32_le f (f a i) rest + | { rest : -1 : bitstring } when Bitstring.bitstring_length rest = 0 -> a + | { _ } -> + invalid_arg "bitstring_fold_left_int32_le: length not a multiple of 32 bits" -- 1.8.3.1