From: Richard Jones Date: Mon, 22 Feb 2010 16:30:12 +0000 (+0000) Subject: Add a generator for generating bindings to other languages. X-Git-Tag: 1.2.0~21 X-Git-Url: http://git.annexia.org/?a=commitdiff_plain;h=3c53304204c3fc4403875afc3668a8b5f9523921;p=hivex.git Add a generator for generating bindings to other languages. At the moment the generator just generates the C header file and C POD documentation. This just so we can compare the existing hand-written code with the generated code to make sure that our description of the API within the generator is correct. --- diff --git a/.gitignore b/.gitignore index 68450c0..6fb5f37 100644 --- a/.gitignore +++ b/.gitignore @@ -21,10 +21,14 @@ config.sub configure depcomp .deps +generator/.pod2text.data +generator/stamp-generator hivex.pc hivex-*.tar.gz hivex/*.1 hivex/*.3 +hivex/hivex.h +hivex/hivex.pod hivex/hivexsh hivex/hivexml hivex/tools/*.opt diff --git a/autogen.sh b/autogen.sh index a1fd048..0d08b4c 100755 --- a/autogen.sh +++ b/autogen.sh @@ -42,6 +42,12 @@ if [ ! -z "$BUILDDIR" ]; then CONFIGUREDIR=.. fi +# Rerun the generator (requires OCaml interpreter). This is *not* for +# anything that is required at configure-time when configure is run +# from a distribution tarball. From those, nothing ocaml-related is +# required. +./generator/generator.ml + # If no arguments were specified and configure has run before, use the previous # arguments if [ $# == 0 -a -x ./config.status ]; then diff --git a/generator/generator.ml b/generator/generator.ml new file mode 100755 index 0000000..1cbee8c --- /dev/null +++ b/generator/generator.ml @@ -0,0 +1,1358 @@ +#!/usr/bin/env ocaml +(* hivex + * Copyright (C) 2009-2010 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + *) + +(* This script generates language bindings and some documentation for + * hivex. + * + * After editing this file, run it (./generator/generator.ml) to + * regenerate all the output files. 'make' will rerun this + * automatically when necessary. Note that if you are using a separate + * build directory you must run generator.ml from the _source_ + * directory. + * + * IMPORTANT: This script should NOT print any warnings. If it prints + * warnings, you should treat them as errors. + * + * OCaml tips: (1) In emacs, install tuareg-mode to display and format + * OCaml code correctly. 'vim' comes with a good OCaml editing mode by + * default. (2) Read the resources at http://ocaml-tutorial.org/ + *) + +#load "unix.cma";; +#load "str.cma";; +#directory "+xml-light";; +#load "xml-light.cma";; + +open Unix +open Printf + +type style = ret * args +and ret = + | RErr (* 0 = ok, -1 = error *) + | RHive (* Returns a hive_h or NULL. *) + | RNode (* Returns hive_node_h or 0. *) + | RNodeList (* Returns hive_node_h* or NULL. *) + | RValue (* Returns hive_value_h or 0. *) + | RValueList (* Returns hive_value_h* or NULL. *) + | RString (* Returns char* or NULL. *) + | RStringList (* Returns char** or NULL. *) + | RLenType (* See hivex_value_type. *) + | RLenTypeVal (* See hivex_value_value. *) + | RInt32 (* Returns int32. *) + | RInt64 (* Returns int64. *) + +and args = argt list (* List of parameters. *) + +and argt = (* Note, cannot be NULL/0 unless it + says so explicitly below. *) + | AHive (* hive_h* *) + | ANode of string (* hive_node_h *) + | AValue of string (* hive_value_h *) + | AString of string (* char* *) + | AStringNullable of string (* char* (can be NULL) *) + | AOpenFlags (* HIVEX_OPEN_* flags list. *) + | AUnusedFlags (* Flags arg that is always 0 *) + | ASetValues (* See hivex_node_set_values. *) + +(* Hive types, from: + * https://secure.wikimedia.org/wikipedia/en/wiki/Windows_Registry#Keys_and_values + * + * It's unfortunate that in our original C binding we strayed away from + * the names that Windows uses (eg. REG_SZ for strings). We include + * both our names and the Windows names. + *) +let hive_types = [ + 0, "none", "NONE", + "Just a key without a value"; + 1, "string", "SZ", + "A Windows string (encoding is unknown, but often UTF16-LE)"; + 2, "expand_string", "EXPAND_SZ", + "A Windows string that contains %env% (environment variable expansion)"; + 3, "binary", "BINARY", + "A blob of binary"; + 4, "dword", "DWORD", + "DWORD (32 bit integer), little endian"; + 5, "dword_be", "DWORD_BIG_ENDIAN", + "DWORD (32 bit integer), big endian"; + 6, "link", "LINK", + "Symbolic link to another part of the registry tree"; + 7, "multiple_strings", "MULTI_SZ", + "Multiple Windows strings. See http://blogs.msdn.com/oldnewthing/archive/2009/10/08/9904646.aspx"; + 8, "resource_list", "RESOURCE_LIST", + "Resource list"; + 9, "full_resource_description", "FULL_RESOURCE_DESCRIPTOR", + "Resource descriptor"; + 10, "resource_requirements_list", "RESOURCE_REQUIREMENTS_LIST", + "Resouce requirements list"; + 11, "qword", "QWORD", + "QWORD (64 bit integer), unspecified endianness but usually little endian" +] + +(* Open flags (bitmask passed to AOpenFlags) *) +let open_flags = [ + 1, "VERBOSE", "Verbose messages"; + 2, "DEBUG", "Debug messages"; + 4, "WRITE", "Enable writes to the hive"; +] + +(* The API calls. *) +let functions = [ + "open", (RHive, [AString "filename"; AOpenFlags]), + "open a hive file", + "\ +Opens the hive named C for reading. + +Flags is an ORed list of the open flags (or C<0> if you don't +want to pass any flags). These flags are defined: + +=over 4 + +=item HIVEX_OPEN_VERBOSE + +Verbose messages. + +=item HIVEX_OPEN_DEBUG + +Very verbose messages, suitable for debugging problems in the library +itself. + +This is also selected if the C environment variable +is set to 1. + +=item HIVEX_OPEN_WRITE + +Open the hive for writing. If omitted, the hive is read-only. + +See L. + +=back"; + + "close", (RErr, [AHive]), + "close a hive handle", + "\ +Close a hive handle and free all associated resources. + +Note that any uncommitted writes are I committed by this call, +but instead are lost. See L."; + + "root", (RNode, [AHive]), + "return the root node of the hive", + "\ +Return root node of the hive. All valid registries must contain +a root node."; + + "node_name", (RString, [AHive; ANode "node"]), + "return the name of the node", + "\ +Return the name of the node. + +Note that the name of the root node is a dummy, such as +C<$$$PROTO.HIV> (other names are possible: it seems to depend on the +tool or program that created the hive in the first place). You can +only know the \"real\" name of the root node by knowing which registry +file this hive originally comes from, which is knowledge that is +outside the scope of this library."; + + "node_children", (RNodeList, [AHive; ANode "node"]), + "return children of node", + "\ +Return an array of nodes which are the subkeys +(children) of C."; + + "node_get_child", (RNode, [AHive; ANode "node"; AString "name"]), + "return named child of node", + "\ +Return the child of node with the name C, if it exists. + +The name is matched case insensitively. + +If the child node does not exist, this returns 0 without +setting errno."; + + "node_parent", (RNode, [AHive; ANode "node"]), + "return the parent of node", + "\ +Return the parent of C. + +The parent pointer of the root node in registry files that we +have examined seems to be invalid, and so this function will +return an error if called on the root node."; + + "node_values", (RValueList, [AHive; ANode "node"]), + "return (key, value) pairs attached to a node", + "\ +Return the array of (key, value) pairs attached to this node."; + + "node_get_value", (RValue, [AHive; ANode "node"; AString "key"]), + "return named key at node", + "\ +Return the value attached to this node which has the name C, +if it exists. + +The key name is matched case insensitively. + +Note that to get the default key, you should pass the empty +string C<\"\"> here. The default key is often written C<\"@\">, but +inside hives that has no meaning and won't give you the +default key."; + + "value_key", (RString, [AHive; AValue "val"]), + "return the key of a (key, value) pair", + "\ +Return the key (name) of a (key, value) pair. The name +is reencoded as UTF-8 and returned as a string. + +The string should be freed by the caller when it is no longer needed. + +Note that this function can return a zero-length string. In the +context of Windows Registries, this means that this value is the +default key for this node in the tree. This is usually written +as C<\"@\">."; + + "value_type", (RLenType, [AHive; AValue "val"]), + "return data length and data type of a value", + "\ +Return the data length and data type of the value in this (key, value) +pair. See also C which returns all this +information, and the value itself. Also, C functions +below which can be used to return the value in a more useful form when +you know the type in advance."; + + "value_value", (RLenTypeVal, [AHive; AValue "val"]), + "return data length, data type and data of a value", + "\ +Return the value of this (key, value) pair. The value should +be interpreted according to its type (see C)."; + + "value_string", (RString, [AHive; AValue "val"]), + "return value as a string", + "\ +If this value is a string, return the string reencoded as UTF-8 +(as a C string). This only works for values which have type +C, C or C."; + + "value_multiple_strings", (RStringList, [AHive; AValue "val"]), + "return value as multiple strings", + "\ +If this value is a multiple-string, return the strings reencoded +as UTF-8 (as a NULL-terminated array of C strings). This only +works for values which have type C."; + + "value_dword", (RInt32, [AHive; AValue "val"]), + "return value as a DWORD", + "\ +If this value is a DWORD (Windows int32), return it. This only works +for values which have type C or C."; + + "value_qword", (RInt64, [AHive; AValue "val"]), + "return value as a QWORD", + "\ +If this value is a QWORD (Windows int64), return it. This only +works for values which have type C."; + + "commit", (RErr, [AHive; AStringNullable "filename"; AUnusedFlags]), + "commit (write) changes to file", + "\ +Commit (write) any changes which have been made. + +C is the new file to write. If C is NULL then we +overwrite the original file (ie. the file name that was passed to +C). C is not used, always pass 0. + +Note this does not close the hive handle. You can perform further +operations on the hive after committing, including making more +modifications. If you no longer wish to use the hive, call +C after this."; + + "node_add_child", (RNode, [AHive; ANode "parent"; AString "name"]), + "add child node", + "\ +Add a new child node named C to the existing node C. +The new child initially has no subnodes and contains no keys or +values. The sk-record (security descriptor) is inherited from +the parent. + +The parent must not have an existing child called C, so if you +want to overwrite an existing child, call C +first."; + + "node_delete_child", (RErr, [AHive; ANode "node"]), + "delete child node", + "\ +Delete the node C. All values at the node and all subnodes are +deleted (recursively). The C handle and the handles of all +subnodes become invalid. You cannot delete the root node."; + + "node_set_values", (RErr, [AHive; ANode "node"; ASetValues; AUnusedFlags]), + "set (key, value) pairs at a node", + "\ +This call can be used to set all the (key, value) pairs stored in C. + +C is the node to modify. C is an array of (key, value) +pairs. There should be C elements in this array. C +is not used, always pass 0. + +Any existing values stored at the node are discarded, and their +C handles become invalid. Thus you can remove all +values stored at C by passing C. + +Note that this library does not offer a way to modify just a single +key at a node. We don't implement a way to do this efficiently."; +] + +(* Used to memoize the result of pod2text. *) +let pod2text_memo_filename = "generator/.pod2text.data" +let pod2text_memo : ((int * string * string), string list) Hashtbl.t = + try + let chan = open_in pod2text_memo_filename in + let v = input_value chan in + close_in chan; + v + with + _ -> Hashtbl.create 13 +let pod2text_memo_updated () = + let chan = open_out pod2text_memo_filename in + output_value chan pod2text_memo; + close_out chan + +(* Useful functions. + * Note we don't want to use any external OCaml libraries which + * makes this a bit harder than it should be. + *) +module StringMap = Map.Make (String) + +let failwithf fs = ksprintf failwith fs + +let unique = let i = ref 0 in fun () -> incr i; !i + +let replace_char s c1 c2 = + let s2 = String.copy s in + let r = ref false in + for i = 0 to String.length s2 - 1 do + if String.unsafe_get s2 i = c1 then ( + String.unsafe_set s2 i c2; + r := true + ) + done; + if not !r then s else s2 + +let isspace c = + c = ' ' + (* || c = '\f' *) || c = '\n' || c = '\r' || c = '\t' (* || c = '\v' *) + +let triml ?(test = isspace) str = + let i = ref 0 in + let n = ref (String.length str) in + while !n > 0 && test str.[!i]; do + decr n; + incr i + done; + if !i = 0 then str + else String.sub str !i !n + +let trimr ?(test = isspace) str = + let n = ref (String.length str) in + while !n > 0 && test str.[!n-1]; do + decr n + done; + if !n = String.length str then str + else String.sub str 0 !n + +let trim ?(test = isspace) str = + trimr ~test (triml ~test str) + +let rec find s sub = + let len = String.length s in + let sublen = String.length sub in + let rec loop i = + if i <= len-sublen then ( + let rec loop2 j = + if j < sublen then ( + if s.[i+j] = sub.[j] then loop2 (j+1) + else -1 + ) else + i (* found *) + in + let r = loop2 0 in + if r = -1 then loop (i+1) else r + ) else + -1 (* not found *) + in + loop 0 + +let rec replace_str s s1 s2 = + let len = String.length s in + let sublen = String.length s1 in + let i = find s s1 in + if i = -1 then s + else ( + let s' = String.sub s 0 i in + let s'' = String.sub s (i+sublen) (len-i-sublen) in + s' ^ s2 ^ replace_str s'' s1 s2 + ) + +let rec string_split sep str = + let len = String.length str in + let seplen = String.length sep in + let i = find str sep in + if i = -1 then [str] + else ( + let s' = String.sub str 0 i in + let s'' = String.sub str (i+seplen) (len-i-seplen) in + s' :: string_split sep s'' + ) + +let files_equal n1 n2 = + let cmd = sprintf "cmp -s %s %s" (Filename.quote n1) (Filename.quote n2) in + match Sys.command cmd with + | 0 -> true + | 1 -> false + | i -> failwithf "%s: failed with error code %d" cmd i + +let rec filter_map f = function + | [] -> [] + | x :: xs -> + match f x with + | Some y -> y :: filter_map f xs + | None -> filter_map f xs + +let rec find_map f = function + | [] -> raise Not_found + | x :: xs -> + match f x with + | Some y -> y + | None -> find_map f xs + +let iteri f xs = + let rec loop i = function + | [] -> () + | x :: xs -> f i x; loop (i+1) xs + in + loop 0 xs + +let mapi f xs = + let rec loop i = function + | [] -> [] + | x :: xs -> let r = f i x in r :: loop (i+1) xs + in + loop 0 xs + +let count_chars c str = + let count = ref 0 in + for i = 0 to String.length str - 1 do + if c = String.unsafe_get str i then incr count + done; + !count + +let name_of_argt = function + | AHive -> "h" + | ANode n | AValue n | AString n | AStringNullable n -> n + | AOpenFlags | AUnusedFlags -> "flags" + | ASetValues -> "values" + +(* Check function names etc. for consistency. *) +let check_functions () = + let contains_uppercase str = + let len = String.length str in + let rec loop i = + if i >= len then false + else ( + let c = str.[i] in + if c >= 'A' && c <= 'Z' then true + else loop (i+1) + ) + in + loop 0 + in + + (* Check function names. *) + List.iter ( + fun (name, _, _, _) -> + if String.length name >= 7 && String.sub name 0 7 = "hivex" then + failwithf "function name %s does not need 'hivex' prefix" name; + if name = "" then + failwithf "function name is empty"; + if name.[0] < 'a' || name.[0] > 'z' then + failwithf "function name %s must start with lowercase a-z" name; + if String.contains name '-' then + failwithf "function name %s should not contain '-', use '_' instead." + name + ) functions; + + (* Check function parameter/return names. *) + List.iter ( + fun (name, style, _, _) -> + let check_arg_ret_name n = + if contains_uppercase n then + failwithf "%s param/ret %s should not contain uppercase chars" + name n; + if String.contains n '-' || String.contains n '_' then + failwithf "%s param/ret %s should not contain '-' or '_'" + name n; + if n = "value" then + failwithf "%s has a param/ret called 'value', which causes conflicts in the OCaml bindings, use something like 'val' or a more descriptive name" name; + if n = "int" || n = "char" || n = "short" || n = "long" then + failwithf "%s has a param/ret which conflicts with a C type (eg. 'int', 'char' etc.)" name; + if n = "i" || n = "n" then + failwithf "%s has a param/ret called 'i' or 'n', which will cause some conflicts in the generated code" name; + if n = "argv" || n = "args" then + failwithf "%s has a param/ret called 'argv' or 'args', which will cause some conflicts in the generated code" name; + + (* List Haskell, OCaml and C keywords here. + * http://www.haskell.org/haskellwiki/Keywords + * http://caml.inria.fr/pub/docs/manual-ocaml/lex.html#operator-char + * http://en.wikipedia.org/wiki/C_syntax#Reserved_keywords + * Formatted via: cat c haskell ocaml|sort -u|grep -vE '_|^val$' \ + * |perl -pe 's/(.+)/"$1";/'|fmt -70 + * Omitting _-containing words, since they're handled above. + * Omitting the OCaml reserved word, "val", is ok, + * and saves us from renaming several parameters. + *) + let reserved = [ + "and"; "as"; "asr"; "assert"; "auto"; "begin"; "break"; "case"; + "char"; "class"; "const"; "constraint"; "continue"; "data"; + "default"; "deriving"; "do"; "done"; "double"; "downto"; "else"; + "end"; "enum"; "exception"; "extern"; "external"; "false"; "float"; + "for"; "forall"; "foreign"; "fun"; "function"; "functor"; "goto"; + "hiding"; "if"; "import"; "in"; "include"; "infix"; "infixl"; + "infixr"; "inherit"; "initializer"; "inline"; "instance"; "int"; + "interface"; + "land"; "lazy"; "let"; "long"; "lor"; "lsl"; "lsr"; "lxor"; + "match"; "mdo"; "method"; "mod"; "module"; "mutable"; "new"; + "newtype"; "object"; "of"; "open"; "or"; "private"; "qualified"; + "rec"; "register"; "restrict"; "return"; "short"; "sig"; "signed"; + "sizeof"; "static"; "struct"; "switch"; "then"; "to"; "true"; "try"; + "type"; "typedef"; "union"; "unsigned"; "virtual"; "void"; + "volatile"; "when"; "where"; "while"; + ] in + if List.mem n reserved then + failwithf "%s has param/ret using reserved word %s" name n; + in + + List.iter (fun arg -> check_arg_ret_name (name_of_argt arg)) (snd style) + ) functions; + + (* Check short descriptions. *) + List.iter ( + fun (name, _, shortdesc, _) -> + if shortdesc.[0] <> Char.lowercase shortdesc.[0] then + failwithf "short description of %s should begin with lowercase." name; + let c = shortdesc.[String.length shortdesc-1] in + if c = '\n' || c = '.' then + failwithf "short description of %s should not end with . or \\n." name + ) functions; + + (* Check long dscriptions. *) + List.iter ( + fun (name, _, _, longdesc) -> + if longdesc.[String.length longdesc-1] = '\n' then + failwithf "long description of %s should not end with \\n." name + ) functions + +(* 'pr' prints to the current output file. *) +let chan = ref Pervasives.stdout +let lines = ref 0 +let pr fs = + ksprintf + (fun str -> + let i = count_chars '\n' str in + lines := !lines + i; + output_string !chan str + ) fs + +let copyright_years = + let this_year = 1900 + (localtime (time ())).tm_year in + if this_year > 2009 then sprintf "2009-%04d" this_year else "2009" + +(* Generate a header block in a number of standard styles. *) +type comment_style = + CStyle | CPlusPlusStyle | HashStyle | OCamlStyle | HaskellStyle +type license = GPLv2plus | LGPLv2plus | GPLv2 | LGPLv2 + +let generate_header ?(extra_inputs = []) comment license = + let inputs = "generator/generator.ml" :: extra_inputs in + let c = match comment with + | CStyle -> pr "/* "; " *" + | CPlusPlusStyle -> pr "// "; "//" + | HashStyle -> pr "# "; "#" + | OCamlStyle -> pr "(* "; " *" + | HaskellStyle -> pr "{- "; " " in + pr "hivex generated file\n"; + pr "%s WARNING: THIS FILE IS GENERATED FROM:\n" c; + List.iter (pr "%s %s\n" c) inputs; + pr "%s ANY CHANGES YOU MAKE TO THIS FILE WILL BE LOST.\n" c; + pr "%s\n" c; + pr "%s Copyright (C) %s Red Hat Inc.\n" c copyright_years; + pr "%s Derived from code by Petter Nordahl-Hagen under a compatible license:\n" c; + pr "%s Copyright (c) 1997-2007 Petter Nordahl-Hagen.\n" c; + pr "%s Derived from code by Markus Stephany under a compatible license:\n" c; + pr "%s Copyright (c)2000-2004, Markus Stephany.\n" c; + pr "%s\n" c; + (match license with + | GPLv2plus -> + pr "%s This program is free software; you can redistribute it and/or modify\n" c; + pr "%s it under the terms of the GNU General Public License as published by\n" c; + pr "%s the Free Software Foundation; either version 2 of the License, or\n" c; + pr "%s (at your option) any later version.\n" c; + pr "%s\n" c; + pr "%s This program is distributed in the hope that it will be useful,\n" c; + pr "%s but WITHOUT ANY WARRANTY; without even the implied warranty of\n" c; + pr "%s MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" c; + pr "%s GNU General Public License for more details.\n" c; + pr "%s\n" c; + pr "%s You should have received a copy of the GNU General Public License along\n" c; + pr "%s with this program; if not, write to the Free Software Foundation, Inc.,\n" c; + pr "%s 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n" c; + + | LGPLv2plus -> + pr "%s This library is free software; you can redistribute it and/or\n" c; + pr "%s modify it under the terms of the GNU Lesser General Public\n" c; + pr "%s License as published by the Free Software Foundation; either\n" c; + pr "%s version 2 of the License, or (at your option) any later version.\n" c; + pr "%s\n" c; + pr "%s This library is distributed in the hope that it will be useful,\n" c; + pr "%s but WITHOUT ANY WARRANTY; without even the implied warranty of\n" c; + pr "%s MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n" c; + pr "%s Lesser General Public License for more details.\n" c; + pr "%s\n" c; + pr "%s You should have received a copy of the GNU Lesser General Public\n" c; + pr "%s License along with this library; if not, write to the Free Software\n" c; + pr "%s Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n" c; + + | GPLv2 -> + pr "%s This program is free software; you can redistribute it and/or modify\n" c; + pr "%s it under the terms of the GNU General Public License as published by\n" c; + pr "%s the Free Software Foundation; version 2 of the License only.\n" c; + pr "%s\n" c; + pr "%s This program is distributed in the hope that it will be useful,\n" c; + pr "%s but WITHOUT ANY WARRANTY; without even the implied warranty of\n" c; + pr "%s MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" c; + pr "%s GNU General Public License for more details.\n" c; + pr "%s\n" c; + pr "%s You should have received a copy of the GNU General Public License along\n" c; + pr "%s with this program; if not, write to the Free Software Foundation, Inc.,\n" c; + pr "%s 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n" c; + + | LGPLv2 -> + pr "%s This library is free software; you can redistribute it and/or\n" c; + pr "%s modify it under the terms of the GNU Lesser General Public\n" c; + pr "%s License as published by the Free Software Foundation; either\n" c; + pr "%s version 2.1 of the License only.\n" c; + pr "%s\n" c; + pr "%s This library is distributed in the hope that it will be useful,\n" c; + pr "%s but WITHOUT ANY WARRANTY; without even the implied warranty of\n" c; + pr "%s MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n" c; + pr "%s Lesser General Public License for more details.\n" c; + pr "%s\n" c; + pr "%s You should have received a copy of the GNU Lesser General Public\n" c; + pr "%s License along with this library; if not, write to the Free Software\n" c; + pr "%s Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n" c; + ); + (match comment with + | CStyle -> pr " */\n" + | CPlusPlusStyle + | HashStyle -> () + | OCamlStyle -> pr " *)\n" + | HaskellStyle -> pr "-}\n" + ); + pr "\n" + +(* Start of main code generation functions below this line. *) + +let rec generate_c_header () = + generate_header CStyle LGPLv2; + + pr "\ +#ifndef HIVEX_H_ +#define HIVEX_H_ + +#include + +#ifdef __cplusplus +extern \"C\" { +#endif + +/* NOTE: This API is documented in the man page hivex(3). */ + +/* Hive handle. */ +typedef struct hive_h hive_h; + +/* Nodes and values. */ +typedef size_t hive_node_h; +typedef size_t hive_value_h; + +/* Pre-defined types. */ +enum hive_type { +"; + List.iter ( + fun (t, old_style, new_style, description) -> + pr " /* %s */\n" description; + pr " hive_t_REG_%s,\n" new_style; + pr "#define hive_t_%s hive_t_REG_%s\n" old_style new_style; + pr "\n" + ) hive_types; + pr "\ +}; + +typedef enum hive_type hive_type; + +/* Bitmask of flags passed to hivex_open. */ +"; + List.iter ( + fun (v, flag, description) -> + pr " /* %s */\n" description; + pr "#define HIVEX_OPEN_%-10s %d\n" flag v; + ) open_flags; + pr "\n"; + + pr "\ +/* Array of (key, value) pairs passed to hivex_node_set_values. */ +struct hive_set_value { + char *key; + hive_type t; + size_t len; + char *value; +}; +typedef struct hive_set_value hive_set_value; +"; + + (* Function declarations. *) + List.iter ( + fun (shortname, style, _, _) -> + let name = "hivex_" ^ shortname in + generate_c_prototype ~extern:true name style + ) functions; + + (* The visitor pattern. *) + pr " +/* Visit all nodes. This is specific to the C API and is not made + * available to other languages. This is because of the complexity + * of binding callbacks in other languages, but also because other + * languages make it much simpler to iterate over a tree. + */ +struct hivex_visitor { + int (*node_start) (hive_h *, void *opaque, hive_node_h, const char *name); + int (*node_end) (hive_h *, void *opaque, hive_node_h, const char *name); + int (*value_string) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *str); + int (*value_multiple_strings) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, char **argv); + int (*value_string_invalid_utf16) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *str); + int (*value_dword) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, int32_t); + int (*value_qword) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, int64_t); + int (*value_binary) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); + int (*value_none) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); + int (*value_other) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); + int (*value_any) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); +}; + +#define HIVEX_VISIT_SKIP_BAD 1 + +extern int hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags); +extern int hivex_visit_node (hive_h *h, hive_node_h node, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags); + +"; + + (* Finish the header file. *) + pr "\ +#ifdef __cplusplus +} +#endif + +#endif /* HIVEX_H_ */ +" + +and generate_c_prototype ?(extern = false) name style = + if extern then pr "extern "; + (match fst style with + | RErr -> pr "int " + | RHive -> pr "hive_h *" + | RNode -> pr "hive_node_h " + | RNodeList -> pr "hive_node_h *" + | RValue -> pr "hive_value_h " + | RValueList -> pr "hive_value_h *" + | RString -> pr "char *" + | RStringList -> pr "char **" + | RLenType -> pr "int " + | RLenTypeVal -> pr "char *" + | RInt32 -> pr "int32_t " + | RInt64 -> pr "int64_t " + ); + pr "%s (" name; + let comma = ref false in + List.iter ( + fun arg -> + if !comma then pr ", "; comma := true; + match arg with + | AHive -> pr "hive_h *h" + | ANode n -> pr "hive_node_h %s" n + | AValue n -> pr "hive_value_h %s" n + | AString n | AStringNullable n -> pr "const char *%s" n + | AOpenFlags | AUnusedFlags -> pr "int flags" + | ASetValues -> pr "size_t nr_values, const hive_set_value *values" + ) (snd style); + (match fst style with + | RLenType | RLenTypeVal -> pr ", hive_type *t, size_t *len" + | _ -> () + ); + pr ");\n" + +and generate_c_pod () = + pr "\ + =encoding utf8 + +=head1 NAME + +hivex - Windows Registry \"hive\" extraction library + +=head1 SYNOPSIS + + hive_h *hivex_open (const char *filename, int flags); + int hivex_close (hive_h *h); + +=head1 DESCRIPTION + +libhivex is a library for extracting the contents of Windows Registry +\"hive\" files. It is designed to be secure against buggy or malicious +registry files. + +Unlike many other tools in this area, it doesn't use the textual .REG +format for output, because parsing that is as much trouble as parsing +the original binary format. Instead it makes the file available +through a C API, or there is a separate program to export the hive as +XML (see L), or to get individual keys (see +L). + +=head2 TYPES + +=over 4 + +=item hive_h * + +This handle describes an open hive file. + +=item hive_node_h + +This is a node handle, an integer but opaque outside the library. +Valid node handles cannot be 0. The library returns 0 in some +situations to indicate an error. + +=item hive_type + +The enum below describes the possible types for the value(s) +stored at each node. Note that you should not trust the +type field in a Windows Registry, as it very often has no +relationship to reality. Some applications use their own +types. The encoding of strings is not specified. Some +programs store everything (including strings) in binary blobs. + + enum hive_type { +"; + List.iter ( + fun (t, _, new_style, description) -> + pr " /* %s */\n" description; + pr " hive_t_REG_%s = %d,\n" new_style t + ) hive_types; + pr "\ + }; + +=item hive_value_h + +This is a value handle, an integer but opaque outside the library. +Valid value handles cannot be 0. The library returns 0 in some +situations to indicate an error. + +=item hive_set_value + +The typedef C is used in conjunction with the +C call described below. + + struct hive_set_value { + char *key; /* key - a UTF-8 encoded ASCIIZ string */ + hive_type t; /* type of value field */ + size_t len; /* length of value field in bytes */ + char *value; /* value field */ + }; + typedef struct hive_set_value hive_set_value; + +To set the default value for a node, you have to pass C. + +Note that the C field is just treated as a list of bytes, and +is stored directly in the hive. The caller has to ensure correct +encoding and endianness, for example converting dwords to little +endian. + +The correct type and encoding for values depends on the node and key +in the registry, the version of Windows, and sometimes even changes +between versions of Windows for the same key. We don't document it +here. Often it's not documented at all. + +=back + +=head2 FUNCTIONS + +=over 4 + +"; + List.iter ( + fun (shortname, style, _, longdesc) -> + let name = "hivex_" ^ shortname in + pr "=item %s\n" name; + pr "\n"; + generate_c_prototype ~extern:false name style; + pr "\n"; + pr "%s\n" longdesc; + pr "\n"; + (match fst style with + | RErr -> + pr "\ +Returns 0 on success. +On error this returns -1 and sets errno.\n\n" + | RHive -> + pr "\ +Returns a new hive handle. +On error this returns NULL and sets errno.\n\n" + | RNode -> + pr "\ +Returns a node handle. +On error this returns 0 and sets errno.\n\n" + | RNodeList -> + pr "\ +Returns a 0-terminated array of nodes. +The array must be freed by the caller when it is no longer needed. +On error this returns NULL and sets errno.\n\n" + | RValue -> + pr "\ +Returns a value handle. +On error this returns 0 and sets errno.\n\n" + | RValueList -> + pr "\ +Returns a 0-terminated array of values. +The array must be freed by the caller when it is no longer needed. +On error this returns NULL and sets errno.\n\n" + | RString -> + pr "\ +Returns a string. +The string must be freed by the caller when it is no longer needed. +On error this returns NULL and sets errno.\n\n" + | RStringList -> + pr "\ +Returns a NULL-terminated array of C strings. +The strings and the array must all be freed by the caller when +they are no longer needed. +On error this returns NULL and sets errno.\n\n" + | RLenType -> + pr "\ +Returns 0 on success. +On error this returns NULL and sets errno.\n\n" + | RLenTypeVal -> + pr "\ +The value is returned as an array of bytes (of length C). +The value must be freed by the caller when it is no longer needed. +On error this returns NULL and sets errno.\n\n" + | RInt32 | RInt64 -> () + ); + ) functions; + + pr "\ +=back + +=head2 WRITING TO HIVE FILES + +The hivex library supports making limited modifications to hive files. +We have tried to implement this very conservatively in order to reduce +the chance of corrupting your registry. However you should be careful +and take back-ups, since Microsoft has never documented the hive +format, and so it is possible there are nuances in the +reverse-engineered format that we do not understand. + +To be able to modify a hive, you must pass the C +flag to C, otherwise any write operation will return with +errno C. + +The write operations shown below do not modify the on-disk file +immediately. You must call C in order to write the +changes to disk. If you call C without committing then +any writes are discarded. + +Hive files internally consist of a \"memory dump\" of binary blocks +(like the C heap), and some of these blocks can be unused. The hivex +library never reuses these unused blocks. Instead, to ensure +robustness in the face of the partially understood on-disk format, +hivex only allocates new blocks after the end of the file, and makes +minimal modifications to existing structures in the file to point to +these new blocks. This makes hivex slightly less disk-efficient than +it could be, but disk is cheap, and registry modifications tend to be +very small. + +When deleting nodes, it is possible that this library may leave +unreachable live blocks in the hive. This is because certain parts of +the hive disk format such as security (sk) records and big data (db) +records and classname fields are not well understood (and not +documented at all) and we play it safe by not attempting to modify +them. Apart from wasting a little bit of disk space, it is not +thought that unreachable blocks are a problem. + +=head3 WRITE OPERATIONS WHICH ARE NOT SUPPORTED + +=over 4 + +=item * + +Changing the root node. + +=item * + +Creating a new hive file from scratch. This is impossible at present +because not all fields in the header are understood. + +=item * + +Modifying or deleting single values at a node. + +=item * + +Modifying security key (sk) records or classnames. +Previously we did not understand these records. However now they +are well-understood and we could add support if it was required +(but nothing much really uses them). + +=back + +=head2 VISITING ALL NODES + +The visitor pattern is useful if you want to visit all nodes +in the tree or all nodes below a certain point in the tree. + +First you set up your own C with your +callback functions. + +Each of these callback functions should return 0 on success or -1 +on error. If any callback returns -1, then the entire visit +terminates immediately. If you don't need a callback function at +all, set the function pointer to NULL. + + struct hivex_visitor { + int (*node_start) (hive_h *, void *opaque, hive_node_h, const char *name); + int (*node_end) (hive_h *, void *opaque, hive_node_h, const char *name); + int (*value_string) (hive_h *, void *opaque, hive_node_h, hive_value_h, + hive_type t, size_t len, const char *key, const char *str); + int (*value_multiple_strings) (hive_h *, void *opaque, hive_node_h, + hive_value_h, hive_type t, size_t len, const char *key, char **argv); + int (*value_string_invalid_utf16) (hive_h *, void *opaque, hive_node_h, + hive_value_h, hive_type t, size_t len, const char *key, + const char *str); + int (*value_dword) (hive_h *, void *opaque, hive_node_h, hive_value_h, + hive_type t, size_t len, const char *key, int32_t); + int (*value_qword) (hive_h *, void *opaque, hive_node_h, hive_value_h, + hive_type t, size_t len, const char *key, int64_t); + int (*value_binary) (hive_h *, void *opaque, hive_node_h, hive_value_h, + hive_type t, size_t len, const char *key, const char *value); + int (*value_none) (hive_h *, void *opaque, hive_node_h, hive_value_h, + hive_type t, size_t len, const char *key, const char *value); + int (*value_other) (hive_h *, void *opaque, hive_node_h, hive_value_h, + hive_type t, size_t len, const char *key, const char *value); + /* If value_any callback is not NULL, then the other value_* + * callbacks are not used, and value_any is called on all values. + */ + int (*value_any) (hive_h *, void *opaque, hive_node_h, hive_value_h, + hive_type t, size_t len, const char *key, const char *value); + }; + +=over 4 + +=item hivex_visit + + int hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags); + +Visit all the nodes recursively in the hive C. + +C should be a C structure with callback +fields filled in as required (unwanted callbacks can be set to +NULL). C must be the length of the 'visitor' struct (you +should pass C for this). + +This returns 0 if the whole recursive visit was completed +successfully. On error this returns -1. If one of the callback +functions returned an error than we don't touch errno. If the +error was generated internally then we set errno. + +You can skip bad registry entries by setting C to +C. If this flag is not set, then a bad registry +causes the function to return an error immediately. + +This function is robust if the registry contains cycles or +pointers which are invalid or outside the registry. It detects +these cases and returns an error. + +=item hivex_visit_node + + int hivex_visit_node (hive_h *h, hive_node_h node, const struct hivex_visitor *visitor, size_t len, void *opaque); + +Same as C but instead of starting out at the root, this +starts at C. + +=back + +=head1 THE STRUCTURE OF THE WINDOWS REGISTRY + +Note: To understand the relationship between hives and the common +Windows Registry keys (like C) please see the +Wikipedia page on the Windows Registry. + +The Windows Registry is split across various binary files, each +file being known as a \"hive\". This library only handles a single +hive file at a time. + +Hives are n-ary trees with a single root. Each node in the tree +has a name. + +Each node in the tree (including non-leaf nodes) may have an +arbitrary list of (key, value) pairs attached to it. It may +be the case that one of these pairs has an empty key. This +is referred to as the default key for the node. + +The (key, value) pairs are the place where the useful data is +stored in the registry. The key is always a string (possibly the +empty string for the default key). The value is a typed object +(eg. string, int32, binary, etc.). + +=head2 RELATIONSHIP TO .REG FILES + +Although this library does not care about or deal with Windows reg +files, it's useful to look at the relationship between the registry +itself and reg files because they are so common. + +A reg file is a text representation of the registry, or part of the +registry. The actual registry hives that Windows uses are binary +files. There are a number of Windows and Linux tools that let you +generate reg files, or merge reg files back into the registry hives. +Notable amongst them is Microsoft's REGEDIT program (formerly known as +REGEDT32). + +A typical reg file will contain many sections looking like this: + + [HKEY_LOCAL_MACHINE\\SOFTWARE\\Classes\\Stack] + \"@\"=\"Generic Stack\" + \"TileInfo\"=\"prop:System.FileCount\" + \"TilePath\"=str(2):\"%%systemroot%%\\\\system32\" + \"ThumbnailCutoff\"=dword:00000000 + \"FriendlyTypeName\"=hex(2):40,00,25,00,53,00,79,00,73,00,74,00,65,00,6d,00,52,00,6f,00,\\ + 6f,00,74,00,25,00,5c,00,53,00,79,00,73,00,74,00,65,00,6d,00,\\ + 33,00,32,00,5c,00,73,00,65,00,61,00,72,00,63,00,68,00,66,00,\\ + 6f,00,6c,00,64,00,65,00,72,00,2e,00,64,00,6c,00,6c,00,2c,00,\\ + 2d,00,39,00,30,00,32,00,38,00,00,00,d8 + +Taking this one piece at a time: + + [HKEY_LOCAL_MACHINE\\SOFTWARE\\Classes\\Stack] + +This is the path to this node in the registry tree. The first part, +C means that this comes from a hive +(file) called C. C<\\Classes\\Stack> is the real path part, +starting at the root node of the C hive. + +Below the node name is a list of zero or more key-value pairs. Any +interior or leaf node in the registry may have key-value pairs +attached. + + \"@\"=\"Generic Stack\" + +This is the \"default key\". In reality (ie. inside the binary hive) +the key string is the empty string. In reg files this is written as +C<@> but this has no meaning either in the hives themselves or in this +library. The value is a string (type 1 - see C +above). + + \"TileInfo\"=\"prop:System.FileCount\" + +This is a regular (key, value) pair, with the value being a type 1 +string. Note that inside the binary file the string is likely to be +UTF-16 encoded. This library converts to and from UTF-8 strings +transparently. + + \"TilePath\"=str(2):\"%%systemroot%%\\\\system32\" + +The value in this case has type 2 (expanded string) meaning that some +%%...%% variables get expanded by Windows. (This library doesn't know +or care about variable expansion). + + \"ThumbnailCutoff\"=dword:00000000 + +The value in this case is a dword (type 4). + + \"FriendlyTypeName\"=hex(2):40,00,.... + +This value is an expanded string (type 2) represented in the reg file +as a series of hex bytes. In this case the string appears to be a +UTF-16 string. + +=head1 NOTE ON THE USE OF ERRNO + +Many functions in this library set errno to indicate errors. These +are the values of errno you may encounter (this list is not +exhaustive): + +=over 4 + +=item ENOTSUP + +Corrupt or unsupported Registry file format. + +=item ENOKEY + +Missing root key. + +=item EINVAL + +Passed an invalid argument to the function. + +=item EFAULT + +Followed a Registry pointer which goes outside +the registry or outside a registry block. + +=item ELOOP + +Registry contains cycles. + +=item ERANGE + +Field in the registry out of range. + +=item EEXIST + +Registry key already exists. + +=item EROFS + +Tried to write to a registry which is not opened for writing. + +=back + +=head1 ENVIRONMENT VARIABLES + +=over 4 + +=item HIVEX_DEBUG + +Setting HIVEX_DEBUG=1 will enable very verbose messages. This is +useful for debugging problems with the library itself. + +=back + +=head1 SEE ALSO + +L, +L, +L, +L, +L, +L, +L, +L. + +=head1 AUTHORS + +Richard W.M. Jones (C) + +=head1 COPYRIGHT + +Copyright (C) 2009-2010 Red Hat Inc. + +Derived from code by Petter Nordahl-Hagen under a compatible license: +Copyright (C) 1997-2007 Petter Nordahl-Hagen. + +Derived from code by Markus Stephany under a compatible license: +Copyright (C) 2000-2004 Markus Stephany. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; +version 2.1 of the License. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +See file LICENSE for the full license. +" + +let output_to filename k = + let filename_new = filename ^ ".new" in + chan := open_out filename_new; + k (); + close_out !chan; + chan := Pervasives.stdout; + + (* Is the new file different from the current file? *) + if Sys.file_exists filename && files_equal filename filename_new then + unlink filename_new (* same, so skip it *) + else ( + (* different, overwrite old one *) + (try chmod filename 0o644 with Unix_error _ -> ()); + rename filename_new filename; + chmod filename 0o444; + printf "written %s\n%!" filename; + ) + +let perror msg = function + | Unix_error (err, _, _) -> + eprintf "%s: %s\n" msg (error_message err) + | exn -> + eprintf "%s: %s\n" msg (Printexc.to_string exn) + +(* Main program. *) +let () = + let lock_fd = + try openfile "configure.ac" [O_RDWR] 0 + with + | Unix_error (ENOENT, _, _) -> + eprintf "\ +You are probably running this from the wrong directory. +Run it from the top source directory using the command + generator/generator.ml +"; + exit 1 + | exn -> + perror "open: configure.ac" exn; + exit 1 in + + (* Acquire a lock so parallel builds won't try to run the generator + * twice at the same time. Subsequent builds will wait for the first + * one to finish. Note the lock is released implicitly when the + * program exits. + *) + (try lockf lock_fd F_LOCK 1 + with exn -> + perror "lock: configure.ac" exn; + exit 1); + + check_functions (); + + output_to "hivex/hivex.h" generate_c_header; + output_to "hivex/hivex.pod" generate_c_pod; + + (* Always generate this file last, and unconditionally. It's used + * by the Makefile to know when we must re-run the generator. + *) + let chan = open_out "generator/stamp-generator" in + fprintf chan "1\n"; + close_out chan; + + printf "generated %d lines of code\n" !lines diff --git a/hivex/hivex.c b/hivex/hivex.c index 7d26eeb..990be83 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -268,6 +268,8 @@ header_checksum (const hive_h *h) return sum; } +#define HIVEX_OPEN_MSGLVL_MASK (HIVEX_OPEN_VERBOSE|HIVEX_OPEN_DEBUG) + hive_h * hivex_open (const char *filename, int flags) { diff --git a/hivex/hivex.h b/hivex/hivex.h deleted file mode 100644 index f4ce834..0000000 --- a/hivex/hivex.h +++ /dev/null @@ -1,131 +0,0 @@ -/* hivex - Windows Registry "hive" extraction library. - * Copyright (C) 2009 Red Hat Inc. - * Derived from code by Petter Nordahl-Hagen under a compatible license: - * Copyright (c) 1997-2007 Petter Nordahl-Hagen. - * Derived from code by Markus Stephany under a compatible license: - * Copyright (c)2000-2004, Markus Stephany. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * See file LICENSE for the full license. - */ - -#ifndef HIVEX_H_ -#define HIVEX_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/* NOTE: This API is documented in the man page hivex(3). */ - -typedef struct hive_h hive_h; -typedef size_t hive_node_h; -typedef size_t hive_value_h; - -enum hive_type { - /* Just a key without a value. */ - hive_t_none = 0, - - /* A UTF-16 Windows string. */ - hive_t_string = 1, - - /* A UTF-16 Windows string that contains %env% (environment variable - * substitutions). - */ - hive_t_expand_string = 2, - - /* A blob of binary. */ - hive_t_binary = 3, - - /* Two ways to encode DWORDs (32 bit words). The first is little-endian. */ - hive_t_dword = 4, - hive_t_dword_be = 5, - - /* Symbolic link, we think to another part of the registry tree. */ - hive_t_link = 6, - - /* Multiple UTF-16 Windows strings, each separated by zero byte. See: - * http://blogs.msdn.com/oldnewthing/archive/2009/10/08/9904646.aspx - */ - hive_t_multiple_strings = 7, - - /* These three are unknown. */ - hive_t_resource_list = 8, - hive_t_full_resource_description = 9, - hive_t_resource_requirements_list = 10, - - /* A QWORD (64 bit word). This is stored in the file little-endian. */ - hive_t_qword = 11 -}; - -typedef enum hive_type hive_type; - -/* Bitmask of flags passed to hivex_open. */ -#define HIVEX_OPEN_VERBOSE 1 -#define HIVEX_OPEN_DEBUG 2 -#define HIVEX_OPEN_MSGLVL_MASK (HIVEX_OPEN_VERBOSE|HIVEX_OPEN_DEBUG) -#define HIVEX_OPEN_WRITE 4 - -extern hive_h *hivex_open (const char *filename, int flags); -extern int hivex_close (hive_h *h); -extern hive_node_h hivex_root (hive_h *h); -extern char *hivex_node_name (hive_h *h, hive_node_h node); -extern hive_node_h *hivex_node_children (hive_h *h, hive_node_h node); -extern hive_node_h hivex_node_get_child (hive_h *h, hive_node_h node, const char *name); -extern hive_node_h hivex_node_parent (hive_h *h, hive_node_h node); -extern hive_value_h *hivex_node_values (hive_h *h, hive_node_h node); -extern hive_value_h hivex_node_get_value (hive_h *h, hive_node_h node, const char *key); -extern char *hivex_value_key (hive_h *h, hive_value_h value); -extern int hivex_value_type (hive_h *h, hive_value_h value, hive_type *t, size_t *len); -extern char *hivex_value_value (hive_h *h, hive_value_h value, hive_type *t, size_t *len); -extern char *hivex_value_string (hive_h *h, hive_value_h value); -extern char **hivex_value_multiple_strings (hive_h *h, hive_value_h value); -extern int32_t hivex_value_dword (hive_h *h, hive_value_h value); -extern int64_t hivex_value_qword (hive_h *h, hive_value_h value); -struct hivex_visitor { - int (*node_start) (hive_h *, void *opaque, hive_node_h, const char *name); - int (*node_end) (hive_h *, void *opaque, hive_node_h, const char *name); - int (*value_string) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *str); - int (*value_multiple_strings) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, char **argv); - int (*value_string_invalid_utf16) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *str); - int (*value_dword) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, int32_t); - int (*value_qword) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, int64_t); - int (*value_binary) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); - int (*value_none) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); - int (*value_other) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); - int (*value_any) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); -}; - -#define HIVEX_VISIT_SKIP_BAD 1 - -extern int hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags); -extern int hivex_visit_node (hive_h *h, hive_node_h node, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags); - -extern int hivex_commit (hive_h *h, const char *filename, int flags); -extern hive_node_h hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name); -extern int hivex_node_delete_child (hive_h *h, hive_node_h node); - -struct hive_set_value { - char *key; - hive_type t; - size_t len; - char *value; -}; -typedef struct hive_set_value hive_set_value; - -extern int hivex_node_set_values (hive_h *h, hive_node_h node, size_t nr_values, const hive_set_value *values, int flags); - -#ifdef __cplusplus -} -#endif - -#endif /* HIVEX_H_ */ diff --git a/hivex/hivex.pod b/hivex/hivex.pod deleted file mode 100644 index 275eb42..0000000 --- a/hivex/hivex.pod +++ /dev/null @@ -1,655 +0,0 @@ -=encoding utf8 - -=head1 NAME - -hivex - Windows Registry "hive" extraction library - -=head1 SYNOPSIS - - hive_h *hivex_open (const char *filename, int flags); - int hivex_close (hive_h *h); - -=head1 DESCRIPTION - -libhivex is a library for extracting the contents of Windows Registry -"hive" files. It is designed to be secure against buggy or malicious -registry files. - -Unlike many other tools in this area, it doesn't use the textual .REG -format for output, because parsing that is as much trouble as parsing -the original binary format. Instead it makes the file available -through a C API, or there is a separate program to export the hive as -XML (see L), or to get individual keys (see -L). - -=head2 OPENING AND CLOSING A HIVE - -=over 4 - -=item hive_h *hivex_open (const char *filename, int flags); - -Opens the hive named C for reading. - -Flags is an ORed list of the open flags (or C<0> if you don't -want to pass any flags). These flags are defined: - -=over 4 - -=item HIVEX_OPEN_VERBOSE - -Verbose messages. - -=item HIVEX_OPEN_DEBUG - -Very verbose messages, suitable for debugging problems in the library -itself. - -This is also selected if the C environment variable -is set to 1. - -=item HIVEX_OPEN_WRITE - -Open the hive for writing. If omitted, the hive is read-only. - -See L. - -=back - -C returns a hive handle. On error this returns NULL and -sets C to indicate the error. - -=item int hivex_close (hive_h *h); - -Close a hive handle and free all associated resources. - -Note that any uncommitted writes are I committed by this call, -but instead are lost. See L. - -Returns 0 on success. On error this returns -1 and sets errno. - -=back - -=head2 NAVIGATING THE TREE OF HIVE SUBKEYS - -=over 4 - -=item hive_node_h - -This is a node handle, an integer but opaque outside the library. -Valid node handles cannot be 0. The library returns 0 in some -situations to indicate an error. - -=item hive_node_h hivex_root (hive_h *h); - -Return root node of the hive. All valid registries must contain -a root node. - -On error this returns 0 and sets errno. - -=item char *hivex_node_name (hive_h *h, hive_node_h node); - -Return the name of the node. The name is reencoded as UTF-8 -and returned as a C string. - -The string should be freed by the caller when it is no longer needed. - -Note that the name of the root node is a dummy, such as -C<$$$PROTO.HIV> (other names are possible: it seems to depend on the -tool or program that created the hive in the first place). You can -only know the "real" name of the root node by knowing which registry -file this hive originally comes from, which is knowledge that is -outside the scope of this library. - -On error this returns NULL and sets errno. - -=item hive_node_h *hivex_node_children (hive_h *h, hive_node_h node); - -Return a 0-terminated array of nodes which are the subkeys -(children) of C. - -The array should be freed by the caller when it is no longer needed. - -On error this returns NULL and sets errno. - -=item hive_node_h hivex_node_get_child (hive_h *h, hive_node_h node, const char *name); - -Return the child of node with the name C, if it exists. - -The name is matched case insensitively. - -If the child node does not exist, this returns 0 without -setting errno. - -On error this returns 0 and sets errno. - -=item hive_node_h hivex_node_parent (hive_h *h, hive_node_h node); - -Return the parent of C. - -On error this returns 0 and sets errno. - -The parent pointer of the root node in registry files that we -have examined seems to be invalid, and so this function will -return an error if called on the root node. - -=back - -=head2 GETTING VALUES AT A NODE - -The enum below describes the possible types for the value(s) -stored at each node. - - enum hive_type { - hive_t_none = 0, - hive_t_string = 1, - hive_t_expand_string = 2, - hive_t_binary = 3, - hive_t_dword = 4, - hive_t_dword_be = 5, - hive_t_link = 6, - hive_t_multiple_strings = 7, - hive_t_resource_list = 8, - hive_t_full_resource_description = 9, - hive_t_resource_requirements_list = 10, - hive_t_qword = 11 - }; - -=over 4 - -=item hive_value_h - -This is a value handle, an integer but opaque outside the library. -Valid value handles cannot be 0. The library returns 0 in some -situations to indicate an error. - -=item hive_value_h *hivex_node_values (hive_h *h, hive_node_h node); - -Return the 0-terminated array of (key, value) pairs attached to -this node. - -The array should be freed by the caller when it is no longer needed. - -On error this returns NULL and sets errno. - -=item hive_value_h hivex_node_get_value (hive_h *h, hive_node_h node, const char *key); - -Return the value attached to this node which has the name C, -if it exists. - -The key name is matched case insensitively. - -Note that to get the default key, you should pass the empty -string C<""> here. The default key is often written C<"@">, but -inside hives that has no meaning and won't give you the -default key. - -If no such key exists, this returns 0 and does not set errno. - -On error this returns 0 and sets errno. - -=item char *hivex_value_key (hive_h *h, hive_value_h value); - -Return the key (name) of a (key, value) pair. The name -is reencoded as UTF-8 and returned as a C string. - -The string should be freed by the caller when it is no longer needed. - -Note that this function can return a zero-length string. In the -context of Windows Registries, this means that this value is the -default key for this node in the tree. This is usually written -as C<"@">. - -On error this returns NULL and sets errno. - -=item int hivex_value_type (hive_h *h, hive_value_h value, hive_type *t, size_t *len); - -Return the data type and length of the value in this (key, value) -pair. See also C which returns all this -information, and the value itself. Also, C functions -below which can be used to return the value in a more useful form when -you know the type in advance. - -Returns 0 on success. On error this returns -1 and sets errno. - -=item char *hivex_value_value (hive_h *h, hive_value_h value, hive_type *t, size_t *len); - -Return the value of this (key, value) pair. The value should -be interpreted according to its type (see C). - -The value is returned in an array of bytes of length C. - -The value should be freed by the caller when it is no longer needed. - -On error this returns NULL and sets errno. - -=item char *hivex_value_string (hive_h *h, hive_value_h value); - -If this value is a string, return the string reencoded as UTF-8 -(as a C string). This only works for values which have type -C, C or C. - -The string should be freed by the caller when it is no longer needed. - -On error this returns NULL and sets errno. - -=item char **hivex_value_multiple_strings (hive_h *h, hive_value_h value); - -If this value is a multiple-string, return the strings reencoded -as UTF-8 (as a NULL-terminated array of C strings). This only -works for values which have type C. - -The string array and each string in it should be freed by the -caller when they are no longer needed. - -On error this returns NULL and sets errno. - -=item int32_t hivex_value_dword (hive_h *h, hive_value_h value); - -If this value is a DWORD (Windows int32), return it. This only works -for values which have type C or C. - -=item int64_t hivex_value_qword (hive_h *h, hive_value_h value); - -If this value is a QWORD (Windows int64), return it. This only -works for values which have type C. - -=back - -=head2 VISITING ALL NODES - -The visitor pattern is useful if you want to visit all nodes -in the tree or all nodes below a certain point in the tree. - -First you set up your own C with your -callback functions. - -Each of these callback functions should return 0 on success or -1 -on error. If any callback returns -1, then the entire visit -terminates immediately. If you don't need a callback function at -all, set the function pointer to NULL. - - struct hivex_visitor { - int (*node_start) (hive_h *, void *opaque, hive_node_h, const char *name); - int (*node_end) (hive_h *, void *opaque, hive_node_h, const char *name); - int (*value_string) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, const char *str); - int (*value_multiple_strings) (hive_h *, void *opaque, hive_node_h, - hive_value_h, hive_type t, size_t len, const char *key, char **argv); - int (*value_string_invalid_utf16) (hive_h *, void *opaque, hive_node_h, - hive_value_h, hive_type t, size_t len, const char *key, - const char *str); - int (*value_dword) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, int32_t); - int (*value_qword) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, int64_t); - int (*value_binary) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, const char *value); - int (*value_none) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, const char *value); - int (*value_other) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, const char *value); - /* If value_any callback is not NULL, then the other value_* - * callbacks are not used, and value_any is called on all values. - */ - int (*value_any) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, const char *value); - }; - -=over 4 - -=item int hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags); - -Visit all the nodes recursively in the hive C. - -C should be a C structure with callback -fields filled in as required (unwanted callbacks can be set to -NULL). C must be the length of the 'visitor' struct (you -should pass C for this). - -This returns 0 if the whole recursive visit was completed -successfully. On error this returns -1. If one of the callback -functions returned an error than we don't touch errno. If the -error was generated internally then we set errno. - -You can skip bad registry entries by setting C to -C. If this flag is not set, then a bad registry -causes the function to return an error immediately. - -This function is robust if the registry contains cycles or -pointers which are invalid or outside the registry. It detects -these cases and returns an error. - -=item int hivex_visit_node (hive_h *h, hive_node_h node, const struct hivex_visitor *visitor, size_t len, void *opaque); - -Same as C but instead of starting out at the root, this -starts at C. - -=back - -=head2 WRITING TO HIVE FILES - -The hivex library supports making limited modifications to hive files. -We have tried to implement this very conservatively in order to reduce -the chance of corrupting your registry. However you should be careful -and take back-ups, since Microsoft has never documented the hive -format, and so it is possible there are nuances in the -reverse-engineered format that we do not understand. - -To be able to modify a hive, you must pass the C -flag to C, otherwise any write operation will return with -errno C. - -The write operations shown below do not modify the on-disk file -immediately. You must call C in order to write the -changes to disk. If you call C without committing then -any writes are discarded. - -Hive files internally consist of a "memory dump" of binary blocks -(like the C heap), and some of these blocks can be unused. The hivex -library never reuses these unused blocks. Instead, to ensure -robustness in the face of the partially understood on-disk format, -hivex only allocates new blocks after the end of the file, and makes -minimal modifications to existing structures in the file to point to -these new blocks. This makes hivex slightly less disk-efficient than -it could be, but disk is cheap, and registry modifications tend to be -very small. - -When deleting nodes, it is possible that this library may leave -unreachable live blocks in the hive. This is because certain parts of -the hive disk format such as security (sk) records and big data (db) -records and classname fields are not well understood (and not -documented at all) and we play it safe by not attempting to modify -them. Apart from wasting a little bit of disk space, it is not -thought that unreachable blocks are a problem. - -=over 4 - -=item int hivex_commit (hive_h *h, const char *filename, int flags); - -Commit (write) any changes which have been made. - -C is the new file to write. If C then we -overwrite the original file (ie. the file name that was passed to -C). C is not used, always pass 0. - -Returns 0 on success. On error this returns -1 and sets errno. - -Note this does not close the hive handle. You can perform further -operations on the hive after committing, including making more -modifications. If you no longer wish to use the hive, call -C after this. - -=item hive_node_h hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name); - -Add a new child node named C to the existing node C. -The new child initially has no subnodes and contains no keys or -values. The sk-record (security descriptor) is inherited from -the parent. - -The parent must not have an existing child called C, so if you -want to overwrite an existing child, call C -first. - -Returns the node handle. On error this returns 0 and sets errno. - -=item int hivex_node_delete_child (hive_h *h, hive_node_h node); - -Delete the node C. All values at the node and all subnodes are -deleted (recursively). The C handle and the handles of all -subnodes become invalid. You cannot delete the root node. - -Returns 0 on success. On error this returns -1 and sets errno. - -=item hive_set_value - -The typedef C is used in conjunction with the -C call described below. - - struct hive_set_value { - char *key; /* key - a UTF-8 encoded ASCIIZ string */ - hive_type t; /* type of value field */ - size_t len; /* length of value field in bytes */ - char *value; /* value field */ - }; - typedef struct hive_set_value hive_set_value; - -To set the default value for a node, you have to pass C. - -Note that the C field is just treated as a list of bytes, and -is stored directly in the hive. The caller has to ensure correct -encoding and endianness, for example converting dwords to little -endian. - -The correct type and encoding for values depends on the node and key -in the registry, the version of Windows, and sometimes even changes -between versions of Windows for the same key. We don't document it -here. Often it's not documented at all. - -=item int hivex_node_set_values (hive_h *h, hive_node_h node, size_t nr_values, const hive_set_value *values, int flags); - -This call can be used to set all the (key, value) pairs stored in C. - -C is the node to modify. C is an array of (key, value) -pairs. There should be C elements in this array. C -is not used, always pass 0. - -Any existing values stored at the node are discarded, and their -C handles become invalid. Thus you can remove all -values stored at C by passing C. - -Returns 0 on success. On error this returns -1 and sets errno. - -Note that this library does not offer a way to modify just a single -key at a node. We don't implement a way to do this efficiently. - -=back - -=head3 WRITE OPERATIONS WHICH ARE NOT SUPPORTED - -=over 4 - -=item * - -Changing the root node. - -=item * - -Creating a new hive file from scratch. This is impossible at present -because not all fields in the header are understood. - -=item * - -Modifying or deleting single values at a node. - -=item * - -Modifying security key (sk) records or classnames. -Previously we did not understand these records. However now they -are well-understood and we could add support if it was required -(but nothing much really uses them). - -=back - -=head1 THE STRUCTURE OF THE WINDOWS REGISTRY - -Note: To understand the relationship between hives and the common -Windows Registry keys (like C) please see the -Wikipedia page on the Windows Registry. - -The Windows Registry is split across various binary files, each -file being known as a "hive". This library only handles a single -hive file at a time. - -Hives are n-ary trees with a single root. Each node in the tree -has a name. - -Each node in the tree (including non-leaf nodes) may have an -arbitrary list of (key, value) pairs attached to it. It may -be the case that one of these pairs has an empty key. This -is referred to as the default key for the node. - -The (key, value) pairs are the place where the useful data is -stored in the registry. The key is always a string (possibly the -empty string for the default key). The value is a typed object -(eg. string, int32, binary, etc.). - -=head2 RELATIONSHIP TO .REG FILES - -Although this library does not care about or deal with Windows reg -files, it's useful to look at the relationship between the registry -itself and reg files because they are so common. - -A reg file is a text representation of the registry, or part of the -registry. The actual registry hives that Windows uses are binary -files. There are a number of Windows and Linux tools that let you -generate reg files, or merge reg files back into the registry hives. -Notable amongst them is Microsoft's REGEDIT program (formerly known as -REGEDT32). - -A typical reg file will contain many sections looking like this: - - [HKEY_LOCAL_MACHINE\SOFTWARE\Classes\Stack] - "@"="Generic Stack" - "TileInfo"="prop:System.FileCount" - "TilePath"=str(2):"%systemroot%\\system32" - "ThumbnailCutoff"=dword:00000000 - "FriendlyTypeName"=hex(2):40,00,25,00,53,00,79,00,73,00,74,00,65,00,6d,00,52,00,6f,00,\ - 6f,00,74,00,25,00,5c,00,53,00,79,00,73,00,74,00,65,00,6d,00,\ - 33,00,32,00,5c,00,73,00,65,00,61,00,72,00,63,00,68,00,66,00,\ - 6f,00,6c,00,64,00,65,00,72,00,2e,00,64,00,6c,00,6c,00,2c,00,\ - 2d,00,39,00,30,00,32,00,38,00,00,00,d8 - -Taking this one piece at a time: - - [HKEY_LOCAL_MACHINE\SOFTWARE\Classes\Stack] - -This is the path to this node in the registry tree. The first part, -C means that this comes from a hive -(file) called C. C<\Classes\Stack> is the real path part, -starting at the root node of the C hive. - -Below the node name is a list of zero or more key-value pairs. Any -interior or leaf node in the registry may have key-value pairs -attached. - - "@"="Generic Stack" - -This is the "default key". In reality (ie. inside the binary hive) -the key string is the empty string. In reg files this is written as -C<@> but this has no meaning either in the hives themselves or in this -library. The value is a string (type 1 - see C -above). - - "TileInfo"="prop:System.FileCount" - -This is a regular (key, value) pair, with the value being a type 1 -string. Note that inside the binary file the string is likely to be -UTF-16 encoded. This library converts to and from UTF-8 strings -transparently. - - "TilePath"=str(2):"%systemroot%\\system32" - -The value in this case has type 2 (expanded string) meaning that some -%...% variables get expanded by Windows. (This library doesn't know -or care about variable expansion). - - "ThumbnailCutoff"=dword:00000000 - -The value in this case is a dword (type 4). - - "FriendlyTypeName"=hex(2):40,00,.... - -This value is an expanded string (type 2) represented in the reg file -as a series of hex bytes. In this case the string appears to be a -UTF-16 string. - -=head1 NOTE ON THE USE OF ERRNO - -Many functions in this library set errno to indicate errors. These -are the values of errno you may encounter (this list is not -exhaustive): - -=over 4 - -=item ENOTSUP - -Corrupt or unsupported Registry file format. - -=item ENOKEY - -Missing root key. - -=item EINVAL - -Passed an invalid argument to the function. - -=item EFAULT - -Followed a Registry pointer which goes outside -the registry or outside a registry block. - -=item ELOOP - -Registry contains cycles. - -=item ERANGE - -Field in the registry out of range. - -=item EEXIST - -Registry key already exists. - -=item EROFS - -Tried to write to a registry which is not opened for writing. - -=back - -=head1 ENVIRONMENT VARIABLES - -=over 4 - -=item HIVEX_DEBUG - -Setting HIVEX_DEBUG=1 will enable very verbose messages. This is -useful for debugging problems with the library itself. - -=back - -=head1 SEE ALSO - -L, -L, -L, -L, -L, -L, -L, -L. - -=head1 AUTHORS - -Richard W.M. Jones (C) - -=head1 COPYRIGHT - -Copyright (C) 2009-2010 Red Hat Inc. - -Derived from code by Petter Nordahl-Hagen under a compatible license: -Copyright (C) 1997-2007 Petter Nordahl-Hagen. - -Derived from code by Markus Stephany under a compatible license: -Copyright (C) 2000-2004 Markus Stephany. - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; -version 2.1 of the License. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -See file LICENSE for the full license.