3 * Copyright (C) 2009-2010 Red Hat Inc.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 (* This script generates language bindings and some documentation for
23 * After editing this file, run it (./generator/generator.ml) to
24 * regenerate all the output files. 'make' will rerun this
25 * automatically when necessary. Note that if you are using a separate
26 * build directory you must run generator.ml from the _source_
29 * IMPORTANT: This script should NOT print any warnings. If it prints
30 * warnings, you should treat them as errors.
32 * OCaml tips: (1) In emacs, install tuareg-mode to display and format
33 * OCaml code correctly. 'vim' comes with a good OCaml editing mode by
34 * default. (2) Read the resources at http://ocaml-tutorial.org/
39 #directory "+xml-light";;
40 #load "xml-light.cma";;
45 type style = ret * args
47 | RErr (* 0 = ok, -1 = error *)
48 | RErrDispose (* Disposes handle, see hivex_close. *)
49 | RHive (* Returns a hive_h or NULL. *)
50 | RNode (* Returns hive_node_h or 0. *)
51 | RNodeNotFound (* See hivex_node_get_child. *)
52 | RNodeList (* Returns hive_node_h* or NULL. *)
53 | RValue (* Returns hive_value_h or 0. *)
54 | RValueList (* Returns hive_value_h* or NULL. *)
55 | RString (* Returns char* or NULL. *)
56 | RStringList (* Returns char** or NULL. *)
57 | RLenType (* See hivex_value_type. *)
58 | RLenTypeVal (* See hivex_value_value. *)
59 | RInt32 (* Returns int32. *)
60 | RInt64 (* Returns int64. *)
62 and args = argt list (* List of parameters. *)
64 and argt = (* Note, cannot be NULL/0 unless it
65 says so explicitly below. *)
67 | ANode of string (* hive_node_h *)
68 | AValue of string (* hive_value_h *)
69 | AString of string (* char* *)
70 | AStringNullable of string (* char* (can be NULL) *)
71 | AOpenFlags (* HIVEX_OPEN_* flags list. *)
72 | AUnusedFlags (* Flags arg that is always 0 *)
73 | ASetValues (* See hivex_node_set_values. *)
76 * https://secure.wikimedia.org/wikipedia/en/wiki/Windows_Registry#Keys_and_values
78 * It's unfortunate that in our original C binding we strayed away from
79 * the names that Windows uses (eg. REG_SZ for strings). We include
80 * both our names and the Windows names.
84 "Just a key without a value";
86 "A Windows string (encoding is unknown, but often UTF16-LE)";
87 2, "expand_string", "EXPAND_SZ",
88 "A Windows string that contains %env% (environment variable expansion)";
89 3, "binary", "BINARY",
92 "DWORD (32 bit integer), little endian";
93 5, "dword_be", "DWORD_BIG_ENDIAN",
94 "DWORD (32 bit integer), big endian";
96 "Symbolic link to another part of the registry tree";
97 7, "multiple_strings", "MULTI_SZ",
98 "Multiple Windows strings. See http://blogs.msdn.com/oldnewthing/archive/2009/10/08/9904646.aspx";
99 8, "resource_list", "RESOURCE_LIST",
101 9, "full_resource_description", "FULL_RESOURCE_DESCRIPTOR",
102 "Resource descriptor";
103 10, "resource_requirements_list", "RESOURCE_REQUIREMENTS_LIST",
104 "Resouce requirements list";
105 11, "qword", "QWORD",
106 "QWORD (64 bit integer), unspecified endianness but usually little endian"
108 let max_hive_type = 11
110 (* Open flags (bitmask passed to AOpenFlags) *)
112 1, "VERBOSE", "Verbose messages";
113 2, "DEBUG", "Debug messages";
114 4, "WRITE", "Enable writes to the hive";
119 "open", (RHive, [AString "filename"; AOpenFlags]),
122 Opens the hive named C<filename> for reading.
124 Flags is an ORed list of the open flags (or C<0> if you don't
125 want to pass any flags). These flags are defined:
129 =item HIVEX_OPEN_VERBOSE
133 =item HIVEX_OPEN_DEBUG
135 Very verbose messages, suitable for debugging problems in the library
138 This is also selected if the C<HIVEX_DEBUG> environment variable
141 =item HIVEX_OPEN_WRITE
143 Open the hive for writing. If omitted, the hive is read-only.
145 See L<hivex(3)/WRITING TO HIVE FILES>.
149 "close", (RErrDispose, [AHive]),
150 "close a hive handle",
152 Close a hive handle and free all associated resources.
154 Note that any uncommitted writes are I<not> committed by this call,
155 but instead are lost. See L<hivex(3)/WRITING TO HIVE FILES>.";
157 "root", (RNode, [AHive]),
158 "return the root node of the hive",
160 Return root node of the hive. All valid registries must contain
163 "node_name", (RString, [AHive; ANode "node"]),
164 "return the name of the node",
166 Return the name of the node.
168 Note that the name of the root node is a dummy, such as
169 C<$$$PROTO.HIV> (other names are possible: it seems to depend on the
170 tool or program that created the hive in the first place). You can
171 only know the \"real\" name of the root node by knowing which registry
172 file this hive originally comes from, which is knowledge that is
173 outside the scope of this library.";
175 "node_children", (RNodeList, [AHive; ANode "node"]),
176 "return children of node",
178 Return an array of nodes which are the subkeys
179 (children) of C<node>.";
181 "node_get_child", (RNodeNotFound, [AHive; ANode "node"; AString "name"]),
182 "return named child of node",
184 Return the child of node with the name C<name>, if it exists.
186 The name is matched case insensitively.";
188 "node_parent", (RNode, [AHive; ANode "node"]),
189 "return the parent of node",
191 Return the parent of C<node>.
193 The parent pointer of the root node in registry files that we
194 have examined seems to be invalid, and so this function will
195 return an error if called on the root node.";
197 "node_values", (RValueList, [AHive; ANode "node"]),
198 "return (key, value) pairs attached to a node",
200 Return the array of (key, value) pairs attached to this node.";
202 "node_get_value", (RValue, [AHive; ANode "node"; AString "key"]),
203 "return named key at node",
205 Return the value attached to this node which has the name C<key>,
208 The key name is matched case insensitively.
210 Note that to get the default key, you should pass the empty
211 string C<\"\"> here. The default key is often written C<\"@\">, but
212 inside hives that has no meaning and won't give you the
215 "value_key", (RString, [AHive; AValue "val"]),
216 "return the key of a (key, value) pair",
218 Return the key (name) of a (key, value) pair. The name
219 is reencoded as UTF-8 and returned as a string.
221 The string should be freed by the caller when it is no longer needed.
223 Note that this function can return a zero-length string. In the
224 context of Windows Registries, this means that this value is the
225 default key for this node in the tree. This is usually written
228 "value_type", (RLenType, [AHive; AValue "val"]),
229 "return data length and data type of a value",
231 Return the data length and data type of the value in this (key, value)
232 pair. See also C<hivex_value_value> which returns all this
233 information, and the value itself. Also, C<hivex_value_*> functions
234 below which can be used to return the value in a more useful form when
235 you know the type in advance.";
237 "value_value", (RLenTypeVal, [AHive; AValue "val"]),
238 "return data length, data type and data of a value",
240 Return the value of this (key, value) pair. The value should
241 be interpreted according to its type (see C<hive_type>).";
243 "value_string", (RString, [AHive; AValue "val"]),
244 "return value as a string",
246 If this value is a string, return the string reencoded as UTF-8
247 (as a C string). This only works for values which have type
248 C<hive_t_string>, C<hive_t_expand_string> or C<hive_t_link>.";
250 "value_multiple_strings", (RStringList, [AHive; AValue "val"]),
251 "return value as multiple strings",
253 If this value is a multiple-string, return the strings reencoded
254 as UTF-8 (in C, as a NULL-terminated array of C strings, in other
255 language bindings, as a list of strings). This only
256 works for values which have type C<hive_t_multiple_strings>.";
258 "value_dword", (RInt32, [AHive; AValue "val"]),
259 "return value as a DWORD",
261 If this value is a DWORD (Windows int32), return it. This only works
262 for values which have type C<hive_t_dword> or C<hive_t_dword_be>.";
264 "value_qword", (RInt64, [AHive; AValue "val"]),
265 "return value as a QWORD",
267 If this value is a QWORD (Windows int64), return it. This only
268 works for values which have type C<hive_t_qword>.";
270 "commit", (RErr, [AHive; AStringNullable "filename"; AUnusedFlags]),
271 "commit (write) changes to file",
273 Commit (write) any changes which have been made.
275 C<filename> is the new file to write. If C<filename> is null/undefined
276 then we overwrite the original file (ie. the file name that was passed to
279 Note this does not close the hive handle. You can perform further
280 operations on the hive after committing, including making more
281 modifications. If you no longer wish to use the hive, then you
282 should close the handle after committing.";
284 "node_add_child", (RNode, [AHive; ANode "parent"; AString "name"]),
287 Add a new child node named C<name> to the existing node C<parent>.
288 The new child initially has no subnodes and contains no keys or
289 values. The sk-record (security descriptor) is inherited from
292 The parent must not have an existing child called C<name>, so if you
293 want to overwrite an existing child, call C<hivex_node_delete_child>
296 "node_delete_child", (RErr, [AHive; ANode "node"]),
299 Delete the node C<node>. All values at the node and all subnodes are
300 deleted (recursively). The C<node> handle and the handles of all
301 subnodes become invalid. You cannot delete the root node.";
303 "node_set_values", (RErr, [AHive; ANode "node"; ASetValues; AUnusedFlags]),
304 "set (key, value) pairs at a node",
306 This call can be used to set all the (key, value) pairs
307 stored in C<node>. Note that this library does not offer
308 a way to modify just a single key at a node.
310 C<node> is the node to modify.";
313 (* Used to memoize the result of pod2text. *)
314 let pod2text_memo_filename = "generator/.pod2text.data"
315 let pod2text_memo : ((int * string * string), string list) Hashtbl.t =
317 let chan = open_in pod2text_memo_filename in
318 let v = input_value chan in
322 _ -> Hashtbl.create 13
323 let pod2text_memo_updated () =
324 let chan = open_out pod2text_memo_filename in
325 output_value chan pod2text_memo;
329 * Note we don't want to use any external OCaml libraries which
330 * makes this a bit harder than it should be.
332 module StringMap = Map.Make (String)
334 let failwithf fs = ksprintf failwith fs
336 let unique = let i = ref 0 in fun () -> incr i; !i
338 let replace_char s c1 c2 =
339 let s2 = String.copy s in
341 for i = 0 to String.length s2 - 1 do
342 if String.unsafe_get s2 i = c1 then (
343 String.unsafe_set s2 i c2;
347 if not !r then s else s2
351 (* || c = '\f' *) || c = '\n' || c = '\r' || c = '\t' (* || c = '\v' *)
353 let triml ?(test = isspace) str =
355 let n = ref (String.length str) in
356 while !n > 0 && test str.[!i]; do
361 else String.sub str !i !n
363 let trimr ?(test = isspace) str =
364 let n = ref (String.length str) in
365 while !n > 0 && test str.[!n-1]; do
368 if !n = String.length str then str
369 else String.sub str 0 !n
371 let trim ?(test = isspace) str =
372 trimr ~test (triml ~test str)
375 let len = String.length s in
376 let sublen = String.length sub in
378 if i <= len-sublen then (
381 if s.[i+j] = sub.[j] then loop2 (j+1)
387 if r = -1 then loop (i+1) else r
393 let rec replace_str s s1 s2 =
394 let len = String.length s in
395 let sublen = String.length s1 in
399 let s' = String.sub s 0 i in
400 let s'' = String.sub s (i+sublen) (len-i-sublen) in
401 s' ^ s2 ^ replace_str s'' s1 s2
404 let rec string_split sep str =
405 let len = String.length str in
406 let seplen = String.length sep in
407 let i = find str sep in
410 let s' = String.sub str 0 i in
411 let s'' = String.sub str (i+seplen) (len-i-seplen) in
412 s' :: string_split sep s''
415 let files_equal n1 n2 =
416 let cmd = sprintf "cmp -s %s %s" (Filename.quote n1) (Filename.quote n2) in
417 match Sys.command cmd with
420 | i -> failwithf "%s: failed with error code %d" cmd i
422 let rec filter_map f = function
426 | Some y -> y :: filter_map f xs
427 | None -> filter_map f xs
429 let rec find_map f = function
430 | [] -> raise Not_found
434 | None -> find_map f xs
437 let rec loop i = function
439 | x :: xs -> f i x; loop (i+1) xs
444 let rec loop i = function
446 | x :: xs -> let r = f i x in r :: loop (i+1) xs
450 let count_chars c str =
452 for i = 0 to String.length str - 1 do
453 if c = String.unsafe_get str i then incr count
457 let name_of_argt = function
459 | ANode n | AValue n | AString n | AStringNullable n -> n
460 | AOpenFlags | AUnusedFlags -> "flags"
461 | ASetValues -> "values"
463 (* Check function names etc. for consistency. *)
464 let check_functions () =
465 let contains_uppercase str =
466 let len = String.length str in
468 if i >= len then false
471 if c >= 'A' && c <= 'Z' then true
478 (* Check function names. *)
480 fun (name, _, _, _) ->
481 if String.length name >= 7 && String.sub name 0 7 = "hivex" then
482 failwithf "function name %s does not need 'hivex' prefix" name;
484 failwithf "function name is empty";
485 if name.[0] < 'a' || name.[0] > 'z' then
486 failwithf "function name %s must start with lowercase a-z" name;
487 if String.contains name '-' then
488 failwithf "function name %s should not contain '-', use '_' instead."
492 (* Check function parameter/return names. *)
494 fun (name, style, _, _) ->
495 let check_arg_ret_name n =
496 if contains_uppercase n then
497 failwithf "%s param/ret %s should not contain uppercase chars"
499 if String.contains n '-' || String.contains n '_' then
500 failwithf "%s param/ret %s should not contain '-' or '_'"
503 failwithf "%s has a param/ret called 'value', which causes conflicts in the OCaml bindings, use something like 'val' or a more descriptive name" name;
504 if n = "int" || n = "char" || n = "short" || n = "long" then
505 failwithf "%s has a param/ret which conflicts with a C type (eg. 'int', 'char' etc.)" name;
506 if n = "i" || n = "n" then
507 failwithf "%s has a param/ret called 'i' or 'n', which will cause some conflicts in the generated code" name;
508 if n = "argv" || n = "args" then
509 failwithf "%s has a param/ret called 'argv' or 'args', which will cause some conflicts in the generated code" name;
511 (* List Haskell, OCaml and C keywords here.
512 * http://www.haskell.org/haskellwiki/Keywords
513 * http://caml.inria.fr/pub/docs/manual-ocaml/lex.html#operator-char
514 * http://en.wikipedia.org/wiki/C_syntax#Reserved_keywords
515 * Formatted via: cat c haskell ocaml|sort -u|grep -vE '_|^val$' \
516 * |perl -pe 's/(.+)/"$1";/'|fmt -70
517 * Omitting _-containing words, since they're handled above.
518 * Omitting the OCaml reserved word, "val", is ok,
519 * and saves us from renaming several parameters.
522 "and"; "as"; "asr"; "assert"; "auto"; "begin"; "break"; "case";
523 "char"; "class"; "const"; "constraint"; "continue"; "data";
524 "default"; "deriving"; "do"; "done"; "double"; "downto"; "else";
525 "end"; "enum"; "exception"; "extern"; "external"; "false"; "float";
526 "for"; "forall"; "foreign"; "fun"; "function"; "functor"; "goto";
527 "hiding"; "if"; "import"; "in"; "include"; "infix"; "infixl";
528 "infixr"; "inherit"; "initializer"; "inline"; "instance"; "int";
530 "land"; "lazy"; "let"; "long"; "lor"; "lsl"; "lsr"; "lxor";
531 "match"; "mdo"; "method"; "mod"; "module"; "mutable"; "new";
532 "newtype"; "object"; "of"; "open"; "or"; "private"; "qualified";
533 "rec"; "register"; "restrict"; "return"; "short"; "sig"; "signed";
534 "sizeof"; "static"; "struct"; "switch"; "then"; "to"; "true"; "try";
535 "type"; "typedef"; "union"; "unsigned"; "virtual"; "void";
536 "volatile"; "when"; "where"; "while";
538 if List.mem n reserved then
539 failwithf "%s has param/ret using reserved word %s" name n;
542 List.iter (fun arg -> check_arg_ret_name (name_of_argt arg)) (snd style)
545 (* Check short descriptions. *)
547 fun (name, _, shortdesc, _) ->
548 if shortdesc.[0] <> Char.lowercase shortdesc.[0] then
549 failwithf "short description of %s should begin with lowercase." name;
550 let c = shortdesc.[String.length shortdesc-1] in
551 if c = '\n' || c = '.' then
552 failwithf "short description of %s should not end with . or \\n." name
555 (* Check long dscriptions. *)
557 fun (name, _, _, longdesc) ->
558 if longdesc.[String.length longdesc-1] = '\n' then
559 failwithf "long description of %s should not end with \\n." name
562 (* 'pr' prints to the current output file. *)
563 let chan = ref Pervasives.stdout
568 let i = count_chars '\n' str in
570 output_string !chan str
573 let copyright_years =
574 let this_year = 1900 + (localtime (time ())).tm_year in
575 if this_year > 2009 then sprintf "2009-%04d" this_year else "2009"
577 (* Generate a header block in a number of standard styles. *)
579 | CStyle | CPlusPlusStyle | HashStyle | OCamlStyle | HaskellStyle
581 type license = GPLv2plus | LGPLv2plus | GPLv2 | LGPLv2
583 let generate_header ?(extra_inputs = []) comment license =
584 let inputs = "generator/generator.ml" :: extra_inputs in
585 let c = match comment with
586 | CStyle -> pr "/* "; " *"
587 | CPlusPlusStyle -> pr "// "; "//"
588 | HashStyle -> pr "# "; "#"
589 | OCamlStyle -> pr "(* "; " *"
590 | HaskellStyle -> pr "{- "; " "
591 | PODCommentStyle -> pr "=begin comment\n\n "; "" in
592 pr "hivex generated file\n";
593 pr "%s WARNING: THIS FILE IS GENERATED FROM:\n" c;
594 List.iter (pr "%s %s\n" c) inputs;
595 pr "%s ANY CHANGES YOU MAKE TO THIS FILE WILL BE LOST.\n" c;
597 pr "%s Copyright (C) %s Red Hat Inc.\n" c copyright_years;
598 pr "%s Derived from code by Petter Nordahl-Hagen under a compatible license:\n" c;
599 pr "%s Copyright (c) 1997-2007 Petter Nordahl-Hagen.\n" c;
600 pr "%s Derived from code by Markus Stephany under a compatible license:\n" c;
601 pr "%s Copyright (c)2000-2004, Markus Stephany.\n" c;
605 pr "%s This program is free software; you can redistribute it and/or modify\n" c;
606 pr "%s it under the terms of the GNU General Public License as published by\n" c;
607 pr "%s the Free Software Foundation; either version 2 of the License, or\n" c;
608 pr "%s (at your option) any later version.\n" c;
610 pr "%s This program is distributed in the hope that it will be useful,\n" c;
611 pr "%s but WITHOUT ANY WARRANTY; without even the implied warranty of\n" c;
612 pr "%s MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" c;
613 pr "%s GNU General Public License for more details.\n" c;
615 pr "%s You should have received a copy of the GNU General Public License along\n" c;
616 pr "%s with this program; if not, write to the Free Software Foundation, Inc.,\n" c;
617 pr "%s 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n" c;
620 pr "%s This library is free software; you can redistribute it and/or\n" c;
621 pr "%s modify it under the terms of the GNU Lesser General Public\n" c;
622 pr "%s License as published by the Free Software Foundation; either\n" c;
623 pr "%s version 2 of the License, or (at your option) any later version.\n" c;
625 pr "%s This library is distributed in the hope that it will be useful,\n" c;
626 pr "%s but WITHOUT ANY WARRANTY; without even the implied warranty of\n" c;
627 pr "%s MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n" c;
628 pr "%s Lesser General Public License for more details.\n" c;
630 pr "%s You should have received a copy of the GNU Lesser General Public\n" c;
631 pr "%s License along with this library; if not, write to the Free Software\n" c;
632 pr "%s Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n" c;
635 pr "%s This program is free software; you can redistribute it and/or modify\n" c;
636 pr "%s it under the terms of the GNU General Public License as published by\n" c;
637 pr "%s the Free Software Foundation; version 2 of the License only.\n" c;
639 pr "%s This program is distributed in the hope that it will be useful,\n" c;
640 pr "%s but WITHOUT ANY WARRANTY; without even the implied warranty of\n" c;
641 pr "%s MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" c;
642 pr "%s GNU General Public License for more details.\n" c;
644 pr "%s You should have received a copy of the GNU General Public License along\n" c;
645 pr "%s with this program; if not, write to the Free Software Foundation, Inc.,\n" c;
646 pr "%s 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n" c;
649 pr "%s This library is free software; you can redistribute it and/or\n" c;
650 pr "%s modify it under the terms of the GNU Lesser General Public\n" c;
651 pr "%s License as published by the Free Software Foundation;\n" c;
652 pr "%s version 2.1 of the License only.\n" c;
654 pr "%s This library is distributed in the hope that it will be useful,\n" c;
655 pr "%s but WITHOUT ANY WARRANTY; without even the implied warranty of\n" c;
656 pr "%s MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n" c;
657 pr "%s Lesser General Public License for more details.\n" c;
659 pr "%s You should have received a copy of the GNU Lesser General Public\n" c;
660 pr "%s License along with this library; if not, write to the Free Software\n" c;
661 pr "%s Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n" c;
664 | CStyle -> pr " */\n"
667 | OCamlStyle -> pr " *)\n"
668 | HaskellStyle -> pr "-}\n"
669 | PODCommentStyle -> pr "\n=end comment\n"
673 (* Start of main code generation functions below this line. *)
675 let rec generate_c_header () =
676 generate_header CStyle LGPLv2;
688 /* NOTE: This API is documented in the man page hivex(3). */
691 typedef struct hive_h hive_h;
693 /* Nodes and values. */
694 typedef size_t hive_node_h;
695 typedef size_t hive_value_h;
697 /* Pre-defined types. */
701 fun (t, old_style, new_style, description) ->
702 pr " /* %s */\n" description;
703 pr " hive_t_REG_%s,\n" new_style;
704 pr "#define hive_t_%s hive_t_REG_%s\n" old_style new_style;
710 typedef enum hive_type hive_type;
712 /* Bitmask of flags passed to hivex_open. */
715 fun (v, flag, description) ->
716 pr " /* %s */\n" description;
717 pr "#define HIVEX_OPEN_%-10s %d\n" flag v;
722 /* Array of (key, value) pairs passed to hivex_node_set_values. */
723 struct hive_set_value {
729 typedef struct hive_set_value hive_set_value;
733 pr "/* Functions. */\n";
735 (* Function declarations. *)
737 fun (shortname, style, _, _) ->
738 let name = "hivex_" ^ shortname in
739 generate_c_prototype ~extern:true name style
742 (* The visitor pattern. *)
744 /* Visit all nodes. This is specific to the C API and is not made
745 * available to other languages. This is because of the complexity
746 * of binding callbacks in other languages, but also because other
747 * languages make it much simpler to iterate over a tree.
749 struct hivex_visitor {
750 int (*node_start) (hive_h *, void *opaque, hive_node_h, const char *name);
751 int (*node_end) (hive_h *, void *opaque, hive_node_h, const char *name);
752 int (*value_string) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *str);
753 int (*value_multiple_strings) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, char **argv);
754 int (*value_string_invalid_utf16) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *str);
755 int (*value_dword) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, int32_t);
756 int (*value_qword) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, int64_t);
757 int (*value_binary) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value);
758 int (*value_none) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value);
759 int (*value_other) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value);
760 int (*value_any) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value);
763 #define HIVEX_VISIT_SKIP_BAD 1
765 extern int hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags);
766 extern int hivex_visit_node (hive_h *h, hive_node_h node, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags);
770 (* Finish the header file. *)
776 #endif /* HIVEX_H_ */
779 and generate_c_prototype ?(extern = false) name style =
780 if extern then pr "extern ";
781 (match fst style with
783 | RErrDispose -> pr "int "
784 | RHive -> pr "hive_h *"
785 | RNode -> pr "hive_node_h "
786 | RNodeNotFound -> pr "hive_node_h "
787 | RNodeList -> pr "hive_node_h *"
788 | RValue -> pr "hive_value_h "
789 | RValueList -> pr "hive_value_h *"
790 | RString -> pr "char *"
791 | RStringList -> pr "char **"
792 | RLenType -> pr "int "
793 | RLenTypeVal -> pr "char *"
794 | RInt32 -> pr "int32_t "
795 | RInt64 -> pr "int64_t "
798 let comma = ref false in
801 if !comma then pr ", "; comma := true;
803 | AHive -> pr "hive_h *h"
804 | ANode n -> pr "hive_node_h %s" n
805 | AValue n -> pr "hive_value_h %s" n
806 | AString n | AStringNullable n -> pr "const char *%s" n
807 | AOpenFlags | AUnusedFlags -> pr "int flags"
808 | ASetValues -> pr "size_t nr_values, const hive_set_value *values"
810 (match fst style with
811 | RLenType | RLenTypeVal -> pr ", hive_type *t, size_t *len"
816 and generate_c_pod () =
817 generate_header PODCommentStyle GPLv2;
824 hivex - Windows Registry \"hive\" extraction library
832 fun (shortname, style, _, _) ->
833 let name = "hivex_" ^ shortname in
835 generate_c_prototype ~extern:false name style;
840 Link with I<-lhivex>.
844 libhivex is a library for extracting the contents of Windows Registry
845 \"hive\" files. It is designed to be secure against buggy or malicious
848 Unlike many other tools in this area, it doesn't use the textual .REG
849 format for output, because parsing that is as much trouble as parsing
850 the original binary format. Instead it makes the file available
851 through a C API, or there is a separate program to export the hive as
852 XML (see L<hivexml(1)>), or to navigate the file (see L<hivexsh(1)>).
858 This handle describes an open hive file.
862 This is a node handle, an integer but opaque outside the library.
863 Valid node handles cannot be 0. The library returns 0 in some
864 situations to indicate an error.
868 The enum below describes the possible types for the value(s)
869 stored at each node. Note that you should not trust the
870 type field in a Windows Registry, as it very often has no
871 relationship to reality. Some applications use their own
872 types. The encoding of strings is not specified. Some
873 programs store everything (including strings) in binary blobs.
878 fun (t, _, new_style, description) ->
879 pr " /* %s */\n" description;
880 pr " hive_t_REG_%s = %d,\n" new_style t
887 This is a value handle, an integer but opaque outside the library.
888 Valid value handles cannot be 0. The library returns 0 in some
889 situations to indicate an error.
891 =head2 hive_set_value
893 The typedef C<hive_set_value> is used in conjunction with the
894 C<hivex_node_set_values> call described below.
896 struct hive_set_value {
897 char *key; /* key - a UTF-8 encoded ASCIIZ string */
898 hive_type t; /* type of value field */
899 size_t len; /* length of value field in bytes */
900 char *value; /* value field */
902 typedef struct hive_set_value hive_set_value;
904 To set the default value for a node, you have to pass C<key = \"\">.
906 Note that the C<value> field is just treated as a list of bytes, and
907 is stored directly in the hive. The caller has to ensure correct
908 encoding and endianness, for example converting dwords to little
911 The correct type and encoding for values depends on the node and key
912 in the registry, the version of Windows, and sometimes even changes
913 between versions of Windows for the same key. We don't document it
914 here. Often it's not documented at all.
920 fun (shortname, style, _, longdesc) ->
921 let name = "hivex_" ^ shortname in
922 pr "=head2 %s\n" name;
924 generate_c_prototype ~extern:false name style;
929 if List.mem AUnusedFlags (snd style) then
930 pr "The flags parameter is unused. Always pass 0.\n\n";
932 if List.mem ASetValues (snd style) then
933 pr "C<values> is an array of (key, value) pairs. There
934 should be C<nr_values> elements in this array.
936 Any existing values stored at the node are discarded, and their
937 C<hive_value_h> handles become invalid. Thus you can remove all
938 values stored at C<node> by passing C<nr_values = 0>.\n\n";
940 (match fst style with
943 Returns 0 on success.
944 On error this returns -1 and sets errno.\n\n"
947 Returns 0 on success.
948 On error this returns -1 and sets errno.
950 This function frees the hive handle (even if it returns an error).
951 The hive handle must not be used again after calling this function.\n\n"
954 Returns a new hive handle.
955 On error this returns NULL and sets errno.\n\n"
958 Returns a node handle.
959 On error this returns 0 and sets errno.\n\n"
962 Returns a node handle.
963 If the node was not found, this returns 0 without setting errno.
964 On error this returns 0 and sets errno.\n\n"
967 Returns a 0-terminated array of nodes.
968 The array must be freed by the caller when it is no longer needed.
969 On error this returns NULL and sets errno.\n\n"
972 Returns a value handle.
973 On error this returns 0 and sets errno.\n\n"
976 Returns a 0-terminated array of values.
977 The array must be freed by the caller when it is no longer needed.
978 On error this returns NULL and sets errno.\n\n"
982 The string must be freed by the caller when it is no longer needed.
983 On error this returns NULL and sets errno.\n\n"
986 Returns a NULL-terminated array of C strings.
987 The strings and the array must all be freed by the caller when
988 they are no longer needed.
989 On error this returns NULL and sets errno.\n\n"
992 Returns 0 on success.
993 On error this returns NULL and sets errno.\n\n"
996 The value is returned as an array of bytes (of length C<len>).
997 The value must be freed by the caller when it is no longer needed.
998 On error this returns NULL and sets errno.\n\n"
999 | RInt32 | RInt64 -> ()
1004 =head1 WRITING TO HIVE FILES
1006 The hivex library supports making limited modifications to hive files.
1007 We have tried to implement this very conservatively in order to reduce
1008 the chance of corrupting your registry. However you should be careful
1009 and take back-ups, since Microsoft has never documented the hive
1010 format, and so it is possible there are nuances in the
1011 reverse-engineered format that we do not understand.
1013 To be able to modify a hive, you must pass the C<HIVEX_OPEN_WRITE>
1014 flag to C<hivex_open>, otherwise any write operation will return with
1017 The write operations shown below do not modify the on-disk file
1018 immediately. You must call C<hivex_commit> in order to write the
1019 changes to disk. If you call C<hivex_close> without committing then
1020 any writes are discarded.
1022 Hive files internally consist of a \"memory dump\" of binary blocks
1023 (like the C heap), and some of these blocks can be unused. The hivex
1024 library never reuses these unused blocks. Instead, to ensure
1025 robustness in the face of the partially understood on-disk format,
1026 hivex only allocates new blocks after the end of the file, and makes
1027 minimal modifications to existing structures in the file to point to
1028 these new blocks. This makes hivex slightly less disk-efficient than
1029 it could be, but disk is cheap, and registry modifications tend to be
1032 When deleting nodes, it is possible that this library may leave
1033 unreachable live blocks in the hive. This is because certain parts of
1034 the hive disk format such as security (sk) records and big data (db)
1035 records and classname fields are not well understood (and not
1036 documented at all) and we play it safe by not attempting to modify
1037 them. Apart from wasting a little bit of disk space, it is not
1038 thought that unreachable blocks are a problem.
1040 =head2 WRITE OPERATIONS WHICH ARE NOT SUPPORTED
1046 Changing the root node.
1050 Creating a new hive file from scratch. This is impossible at present
1051 because not all fields in the header are understood.
1055 Modifying or deleting single values at a node.
1059 Modifying security key (sk) records or classnames.
1060 Previously we did not understand these records. However now they
1061 are well-understood and we could add support if it was required
1062 (but nothing much really uses them).
1066 =head1 VISITING ALL NODES
1068 The visitor pattern is useful if you want to visit all nodes
1069 in the tree or all nodes below a certain point in the tree.
1071 First you set up your own C<struct hivex_visitor> with your
1074 Each of these callback functions should return 0 on success or -1
1075 on error. If any callback returns -1, then the entire visit
1076 terminates immediately. If you don't need a callback function at
1077 all, set the function pointer to NULL.
1079 struct hivex_visitor {
1080 int (*node_start) (hive_h *, void *opaque, hive_node_h, const char *name);
1081 int (*node_end) (hive_h *, void *opaque, hive_node_h, const char *name);
1082 int (*value_string) (hive_h *, void *opaque, hive_node_h, hive_value_h,
1083 hive_type t, size_t len, const char *key, const char *str);
1084 int (*value_multiple_strings) (hive_h *, void *opaque, hive_node_h,
1085 hive_value_h, hive_type t, size_t len, const char *key, char **argv);
1086 int (*value_string_invalid_utf16) (hive_h *, void *opaque, hive_node_h,
1087 hive_value_h, hive_type t, size_t len, const char *key,
1089 int (*value_dword) (hive_h *, void *opaque, hive_node_h, hive_value_h,
1090 hive_type t, size_t len, const char *key, int32_t);
1091 int (*value_qword) (hive_h *, void *opaque, hive_node_h, hive_value_h,
1092 hive_type t, size_t len, const char *key, int64_t);
1093 int (*value_binary) (hive_h *, void *opaque, hive_node_h, hive_value_h,
1094 hive_type t, size_t len, const char *key, const char *value);
1095 int (*value_none) (hive_h *, void *opaque, hive_node_h, hive_value_h,
1096 hive_type t, size_t len, const char *key, const char *value);
1097 int (*value_other) (hive_h *, void *opaque, hive_node_h, hive_value_h,
1098 hive_type t, size_t len, const char *key, const char *value);
1099 /* If value_any callback is not NULL, then the other value_*
1100 * callbacks are not used, and value_any is called on all values.
1102 int (*value_any) (hive_h *, void *opaque, hive_node_h, hive_value_h,
1103 hive_type t, size_t len, const char *key, const char *value);
1110 int hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags);
1112 Visit all the nodes recursively in the hive C<h>.
1114 C<visitor> should be a C<hivex_visitor> structure with callback
1115 fields filled in as required (unwanted callbacks can be set to
1116 NULL). C<len> must be the length of the 'visitor' struct (you
1117 should pass C<sizeof (struct hivex_visitor)> for this).
1119 This returns 0 if the whole recursive visit was completed
1120 successfully. On error this returns -1. If one of the callback
1121 functions returned an error than we don't touch errno. If the
1122 error was generated internally then we set errno.
1124 You can skip bad registry entries by setting C<flag> to
1125 C<HIVEX_VISIT_SKIP_BAD>. If this flag is not set, then a bad registry
1126 causes the function to return an error immediately.
1128 This function is robust if the registry contains cycles or
1129 pointers which are invalid or outside the registry. It detects
1130 these cases and returns an error.
1132 =item hivex_visit_node
1134 int hivex_visit_node (hive_h *h, hive_node_h node, const struct hivex_visitor *visitor, size_t len, void *opaque);
1136 Same as C<hivex_visit> but instead of starting out at the root, this
1141 =head1 THE STRUCTURE OF THE WINDOWS REGISTRY
1143 Note: To understand the relationship between hives and the common
1144 Windows Registry keys (like C<HKEY_LOCAL_MACHINE>) please see the
1145 Wikipedia page on the Windows Registry.
1147 The Windows Registry is split across various binary files, each
1148 file being known as a \"hive\". This library only handles a single
1149 hive file at a time.
1151 Hives are n-ary trees with a single root. Each node in the tree
1154 Each node in the tree (including non-leaf nodes) may have an
1155 arbitrary list of (key, value) pairs attached to it. It may
1156 be the case that one of these pairs has an empty key. This
1157 is referred to as the default key for the node.
1159 The (key, value) pairs are the place where the useful data is
1160 stored in the registry. The key is always a string (possibly the
1161 empty string for the default key). The value is a typed object
1162 (eg. string, int32, binary, etc.).
1164 =head2 RELATIONSHIP TO .REG FILES
1166 Although this library does not care about or deal with Windows reg
1167 files, it's useful to look at the relationship between the registry
1168 itself and reg files because they are so common.
1170 A reg file is a text representation of the registry, or part of the
1171 registry. The actual registry hives that Windows uses are binary
1172 files. There are a number of Windows and Linux tools that let you
1173 generate reg files, or merge reg files back into the registry hives.
1174 Notable amongst them is Microsoft's REGEDIT program (formerly known as
1177 A typical reg file will contain many sections looking like this:
1179 [HKEY_LOCAL_MACHINE\\SOFTWARE\\Classes\\Stack]
1180 \"@\"=\"Generic Stack\"
1181 \"TileInfo\"=\"prop:System.FileCount\"
1182 \"TilePath\"=str(2):\"%%systemroot%%\\\\system32\"
1183 \"ThumbnailCutoff\"=dword:00000000
1184 \"FriendlyTypeName\"=hex(2):40,00,25,00,53,00,79,00,73,00,74,00,65,00,6d,00,52,00,6f,00,\\
1185 6f,00,74,00,25,00,5c,00,53,00,79,00,73,00,74,00,65,00,6d,00,\\
1186 33,00,32,00,5c,00,73,00,65,00,61,00,72,00,63,00,68,00,66,00,\\
1187 6f,00,6c,00,64,00,65,00,72,00,2e,00,64,00,6c,00,6c,00,2c,00,\\
1188 2d,00,39,00,30,00,32,00,38,00,00,00,d8
1190 Taking this one piece at a time:
1192 [HKEY_LOCAL_MACHINE\\SOFTWARE\\Classes\\Stack]
1194 This is the path to this node in the registry tree. The first part,
1195 C<HKEY_LOCAL_MACHINE\\SOFTWARE> means that this comes from a hive
1196 (file) called C<SOFTWARE>. C<\\Classes\\Stack> is the real path part,
1197 starting at the root node of the C<SOFTWARE> hive.
1199 Below the node name is a list of zero or more key-value pairs. Any
1200 interior or leaf node in the registry may have key-value pairs
1203 \"@\"=\"Generic Stack\"
1205 This is the \"default key\". In reality (ie. inside the binary hive)
1206 the key string is the empty string. In reg files this is written as
1207 C<@> but this has no meaning either in the hives themselves or in this
1208 library. The value is a string (type 1 - see C<enum hive_type>
1211 \"TileInfo\"=\"prop:System.FileCount\"
1213 This is a regular (key, value) pair, with the value being a type 1
1214 string. Note that inside the binary file the string is likely to be
1215 UTF-16 encoded. This library converts to and from UTF-8 strings
1218 \"TilePath\"=str(2):\"%%systemroot%%\\\\system32\"
1220 The value in this case has type 2 (expanded string) meaning that some
1221 %%...%% variables get expanded by Windows. (This library doesn't know
1222 or care about variable expansion).
1224 \"ThumbnailCutoff\"=dword:00000000
1226 The value in this case is a dword (type 4).
1228 \"FriendlyTypeName\"=hex(2):40,00,....
1230 This value is an expanded string (type 2) represented in the reg file
1231 as a series of hex bytes. In this case the string appears to be a
1234 =head1 NOTE ON THE USE OF ERRNO
1236 Many functions in this library set errno to indicate errors. These
1237 are the values of errno you may encounter (this list is not
1244 Corrupt or unsupported Registry file format.
1252 Passed an invalid argument to the function.
1256 Followed a Registry pointer which goes outside
1257 the registry or outside a registry block.
1261 Registry contains cycles.
1265 Field in the registry out of range.
1269 Registry key already exists.
1273 Tried to write to a registry which is not opened for writing.
1277 =head1 ENVIRONMENT VARIABLES
1283 Setting HIVEX_DEBUG=1 will enable very verbose messages. This is
1284 useful for debugging problems with the library itself.
1294 L<http://libguestfs.org/>,
1297 L<http://en.wikipedia.org/wiki/Windows_Registry>.
1301 Richard W.M. Jones (C<rjones at redhat dot com>)
1305 Copyright (C) 2009-2010 Red Hat Inc.
1307 Derived from code by Petter Nordahl-Hagen under a compatible license:
1308 Copyright (C) 1997-2007 Petter Nordahl-Hagen.
1310 Derived from code by Markus Stephany under a compatible license:
1311 Copyright (C) 2000-2004 Markus Stephany.
1313 This library is free software; you can redistribute it and/or
1314 modify it under the terms of the GNU Lesser General Public
1315 License as published by the Free Software Foundation;
1316 version 2.1 of the License only.
1318 This library is distributed in the hope that it will be useful,
1319 but WITHOUT ANY WARRANTY; without even the implied warranty of
1320 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1321 Lesser General Public License for more details.
1324 and generate_ocaml_interface () =
1325 generate_header OCamlStyle LGPLv2plus;
1329 (** A [hive_h] hive file handle. *)
1333 (** Nodes and values. *)
1335 exception Error of string * Unix.error * string
1336 (** Error raised by a function.
1338 The first parameter is the name of the function which raised the error.
1339 The second parameter is the errno (see the [Unix] module). The third
1340 parameter is a human-readable string corresponding to the errno.
1342 See hivex(3) for a partial list of interesting errno values that
1343 can be generated by the library. *)
1344 exception Handle_closed of string
1345 (** This exception is raised if you call a function on a closed handle. *)
1351 fun (t, _, new_style, description) ->
1353 pr " | REG_%s (** %s *)\n" new_style description
1357 | REG_UNKNOWN of int32 (** unknown type *)
1358 (** Hive type field. *)
1364 fun (v, flag, description) ->
1365 assert (1 lsl i = v);
1366 pr " | OPEN_%s (** %s *)\n" flag description
1370 (** Open flags for {!open_file} call. *)
1377 (** (key, value) pair passed (as an array) to {!node_set_values}. *)
1381 fun (name, style, shortdesc, _) ->
1383 generate_ocaml_prototype name style;
1384 pr "(** %s *)\n" shortdesc
1387 and generate_ocaml_implementation () =
1388 generate_header OCamlStyle LGPLv2plus;
1395 exception Error of string * Unix.error * string
1396 exception Handle_closed of string
1398 (* Give the exceptions names, so they can be raised from the C code. *)
1400 Callback.register_exception \"ocaml_hivex_error\"
1401 (Error (\"\", Unix.EUNKNOWNERR 0, \"\"));
1402 Callback.register_exception \"ocaml_hivex_closed\" (Handle_closed \"\")
1408 fun (t, _, new_style, _) ->
1410 pr " | REG_%s\n" new_style
1414 | REG_UNKNOWN of int32
1420 fun (v, flag, description) ->
1421 assert (1 lsl i = v);
1422 pr " | OPEN_%s (** %s *)\n" flag description
1436 fun (name, style, _, _) ->
1437 generate_ocaml_prototype ~is_external:true name style
1440 and generate_ocaml_prototype ?(is_external = false) name style =
1441 let ocaml_name = if name = "open" then "open_file" else name in
1443 if is_external then pr "external " else pr "val ";
1444 pr "%s : " ocaml_name;
1447 | AHive -> pr "t -> "
1448 | ANode _ -> pr "node -> "
1449 | AValue _ -> pr "value -> "
1450 | AString _ -> pr "string -> "
1451 | AStringNullable _ -> pr "string option -> "
1452 | AOpenFlags -> pr "open_flag list -> "
1453 | AUnusedFlags -> ()
1454 | ASetValues -> pr "set_value array -> "
1456 (match fst style with
1457 | RErr -> pr "unit" (* all errors are turned into exceptions *)
1458 | RErrDispose -> pr "unit"
1460 | RNode -> pr "node"
1461 | RNodeNotFound -> pr "node"
1462 | RNodeList -> pr "node array"
1463 | RValue -> pr "value"
1464 | RValueList -> pr "value array"
1465 | RString -> pr "string"
1466 | RStringList -> pr "string array"
1467 | RLenType -> pr "hive_type * int"
1468 | RLenTypeVal -> pr "hive_type * string"
1469 | RInt32 -> pr "int32"
1470 | RInt64 -> pr "int64"
1473 pr " = \"ocaml_hivex_%s\"" name;
1476 and generate_ocaml_c () =
1477 generate_header CStyle LGPLv2plus;
1488 #include <caml/config.h>
1489 #include <caml/alloc.h>
1490 #include <caml/callback.h>
1491 #include <caml/custom.h>
1492 #include <caml/fail.h>
1493 #include <caml/memory.h>
1494 #include <caml/mlvalues.h>
1495 #include <caml/signals.h>
1497 #ifdef HAVE_CAML_UNIXSUPPORT_H
1498 #include <caml/unixsupport.h>
1500 extern value unix_error_of_code (int errcode);
1503 #ifndef HAVE_CAML_RAISE_WITH_ARGS
1505 caml_raise_with_args (value tag, int nargs, value args[])
1508 CAMLxparamN (args, nargs);
1512 bucket = caml_alloc_small (1 + nargs, 0);
1513 Field(bucket, 0) = tag;
1514 for (i = 0; i < nargs; i++) Field(bucket, 1 + i) = args[i];
1522 #define Hiveh_val(v) (*((hive_h **)Data_custom_val(v)))
1523 static value Val_hiveh (hive_h *);
1524 static int HiveOpenFlags_val (value);
1525 static hive_set_value *HiveSetValues_val (value);
1526 static hive_type HiveType_val (value);
1527 static value Val_hive_type (hive_type);
1528 static value copy_int_array (size_t *);
1529 static value copy_type_len (size_t, hive_type);
1530 static value copy_type_value (const char *, size_t, hive_type);
1531 static void raise_error (const char *) Noreturn;
1532 static void raise_closed (const char *) Noreturn;
1538 fun (name, style, _, _) ->
1539 pr "/* Automatically generated wrapper for function\n";
1540 pr " * "; generate_ocaml_prototype name style;
1546 | ASetValues -> ["nrvalues"; "values"]
1547 | AUnusedFlags -> ["0"]
1548 | arg -> [name_of_argt arg]) (snd style) in
1550 match fst style with
1551 | RLenType | RLenTypeVal -> c_params @ [["&t"; "&len"]]
1553 let c_params = List.concat c_params in
1556 filter_map (function
1557 | AUnusedFlags -> None
1558 | arg -> Some (name_of_argt arg ^ "v")) (snd style) in
1560 pr "/* Emit prototype to appease gcc's -Wmissing-prototypes. */\n";
1561 pr "CAMLprim value ocaml_hivex_%s (value %s" name (List.hd params);
1562 List.iter (pr ", value %s") (List.tl params); pr ");\n";
1565 pr "CAMLprim value\n";
1566 pr "ocaml_hivex_%s (value %s" name (List.hd params);
1567 List.iter (pr ", value %s") (List.tl params);
1571 pr " CAMLparam%d (%s);\n"
1572 (List.length params) (String.concat ", " params);
1573 pr " CAMLlocal1 (rv);\n";
1579 pr " hive_h *h = Hiveh_val (hv);\n";
1580 pr " if (h == NULL)\n";
1581 pr " raise_closed (\"%s\");\n" name
1583 pr " hive_node_h %s = Int_val (%sv);\n" n n
1585 pr " hive_value_h %s = Int_val (%sv);\n" n n
1587 pr " const char *%s = String_val (%sv);\n" n n
1588 | AStringNullable n ->
1589 pr " const char *%s =\n" n;
1590 pr " %sv != Val_int (0) ? String_val (Field (%sv, 0)) : NULL;\n"
1593 pr " int flags = HiveOpenFlags_val (flagsv);\n"
1594 | AUnusedFlags -> ()
1596 pr " int nrvalues = Wosize_val (valuesv);\n";
1597 pr " hive_set_value *values = HiveSetValues_val (valuesv);\n"
1602 match fst style with
1603 | RErr -> pr " int r;\n"; "-1"
1604 | RErrDispose -> pr " int r;\n"; "-1"
1605 | RHive -> pr " hive_h *r;\n"; "NULL"
1606 | RNode -> pr " hive_node_h r;\n"; "0"
1609 pr " hive_node_h r;\n";
1611 | RNodeList -> pr " hive_node_h *r;\n"; "NULL"
1612 | RValue -> pr " hive_value_h r;\n"; "0"
1613 | RValueList -> pr " hive_value_h *r;\n"; "NULL"
1614 | RString -> pr " char *r;\n"; "NULL"
1615 | RStringList -> pr " char **r;\n"; "NULL"
1618 pr " size_t len;\n";
1619 pr " hive_type t;\n";
1623 pr " size_t len;\n";
1624 pr " hive_type t;\n";
1633 "-1 && errno != 0" in
1635 (* The libguestfs OCaml bindings call enter_blocking_section
1636 * here. However I don't think that is safe, because we are
1637 * holding pointers to caml strings during the call, and these
1638 * could be moved or freed by other threads. In any case, there
1639 * is very little reason to enter_blocking_section for any hivex
1640 * call, so don't do it. XXX
1642 (*pr " caml_enter_blocking_section ();\n";*)
1643 pr " r = hivex_%s (%s" name (List.hd c_params);
1644 List.iter (pr ", %s") (List.tl c_params);
1646 (*pr " caml_leave_blocking_section ();\n";*)
1649 (* Dispose of the hive handle (even if hivex_close returns error). *)
1650 (match fst style with
1652 pr " /* So we don't double-free in the finalizer. */\n";
1653 pr " Hiveh_val (hv) = NULL;\n";
1660 | AHive | ANode _ | AValue _ | AString _ | AStringNullable _
1661 | AOpenFlags | AUnusedFlags -> ()
1663 pr " free (values);\n";
1667 (* Check for errors. *)
1668 pr " if (r == %s)\n" error_code;
1669 pr " raise_error (\"%s\");\n" name;
1672 (match fst style with
1673 | RErr -> pr " rv = Val_unit;\n"
1674 | RErrDispose -> pr " rv = Val_unit;\n"
1675 | RHive -> pr " rv = Val_hiveh (r);\n"
1676 | RNode -> pr " rv = Val_int (r);\n"
1678 pr " if (r == 0)\n";
1679 pr " caml_raise_not_found ();\n";
1681 pr " rv = Val_int (r);\n"
1683 pr " rv = copy_int_array (r);\n";
1685 | RValue -> pr " rv = Val_int (r);\n"
1687 pr " rv = copy_int_array (r);\n";
1690 pr " rv = caml_copy_string (r);\n";
1693 pr " rv = caml_copy_string_array ((const char **) r);\n";
1694 pr " for (int i = 0; r[i] != NULL; ++i) free (r[i]);\n";
1696 | RLenType -> pr " rv = copy_type_len (len, t);\n"
1698 pr " rv = copy_type_value (r, len, t);\n";
1700 | RInt32 -> pr " rv = caml_copy_int32 (r);\n"
1701 | RInt64 -> pr " rv = caml_copy_int32 (r);\n"
1704 pr " CAMLreturn (rv);\n";
1712 HiveOpenFlags_val (value v)
1717 while (v != Val_int (0)) {
1719 flags |= 1 << Int_val (v2);
1726 static hive_set_value *
1727 HiveSetValues_val (value v)
1729 size_t nr_values = Wosize_val (v);
1730 hive_set_value *values = malloc (nr_values * sizeof (hive_set_value));
1734 for (i = 0; i < nr_values; ++i) {
1736 values[i].key = String_val (Field (v2, 0));
1737 values[i].t = HiveType_val (Field (v2, 1));
1738 values[i].len = caml_string_length (Field (v2, 2));
1739 values[i].value = String_val (Field (v2, 2));
1746 HiveType_val (value v)
1749 return Int_val (v); /* REG_NONE etc. */
1751 return Int32_val (Field (v, 0)); /* REG_UNKNOWN of int32 */
1755 Val_hive_type (hive_type t)
1761 CAMLreturn (Val_int (t));
1763 rv = caml_alloc (1, 0); /* REG_UNKNOWN of int32 */
1764 v = caml_copy_int32 (t);
1765 caml_modify (&Field (rv, 0), v);
1771 copy_int_array (size_t *xs)
1777 for (nr = 0; xs[nr] != 0; ++nr)
1780 CAMLreturn (Atom (0));
1782 rv = caml_alloc (nr, 0);
1783 for (i = 0; i < nr; ++i) {
1784 v = Val_int (xs[i]);
1785 Store_field (rv, i, v); /* Safe because v is not a block. */
1792 copy_type_len (size_t len, hive_type t)
1797 rv = caml_alloc (2, 0);
1798 v = Val_hive_type (t);
1799 Store_field (rv, 0, v);
1801 Store_field (rv, 1, len);
1806 copy_type_value (const char *r, size_t len, hive_type t)
1811 rv = caml_alloc (2, 0);
1812 v = Val_hive_type (t);
1813 Store_field (rv, 0, v);
1814 v = caml_alloc_string (len);
1815 memcpy (String_val (v), r, len);
1816 caml_modify (&Field (rv, 1), len);
1820 /* Raise exceptions. */
1822 raise_error (const char *function)
1824 /* Save errno early in case it gets trashed. */
1828 CAMLlocal3 (v1, v2, v3);
1830 v1 = caml_copy_string (function);
1831 v2 = unix_error_of_code (err);
1832 v3 = caml_copy_string (strerror (err));
1833 value vvv[] = { v1, v2, v3 };
1834 caml_raise_with_args (*caml_named_value (\"ocaml_hivex_error\"), 3, vvv);
1840 raise_closed (const char *function)
1845 v = caml_copy_string (function);
1846 caml_raise_with_arg (*caml_named_value (\"ocaml_hivex_closed\"), v);
1851 /* Allocate handles and deal with finalization. */
1853 hivex_finalize (value hv)
1855 hive_h *h = Hiveh_val (hv);
1856 if (h) hivex_close (h);
1859 static struct custom_operations hivex_custom_operations = {
1860 (char *) \"hivex_custom_operations\",
1862 custom_compare_default,
1863 custom_hash_default,
1864 custom_serialize_default,
1865 custom_deserialize_default
1869 Val_hiveh (hive_h *h)
1874 rv = caml_alloc_custom (&hivex_custom_operations,
1875 sizeof (hive_h *), 0, 1);
1882 and generate_perl_pm () =
1883 generate_header HashStyle LGPLv2plus;
1890 Win::Hivex - Perl bindings for reading and writing Windows Registry hive files
1896 $h = Win::Hivex->open ('SOFTWARE');
1897 $root_node = $h->root ();
1898 print $h->node_name ($root_node);
1902 The C<Win::Hivex> module provides a Perl XS binding to the
1903 L<hivex(3)> API for reading and writing Windows Registry binary
1908 All errors turn into calls to C<croak> (see L<Carp(3)>).
1922 XSLoader::load ('Win::Hivex');
1926 $h = Win::Hivex::open ($filename,";
1930 pr "\n [%s => 1,]" (String.lowercase flag)
1935 Open a Windows Registry binary hive file.
1937 The C<verbose> and C<debug> flags enable different levels of
1940 The C<write> flag is required if you will be modifying the
1941 hive file (see L<hivex(3)/WRITING TO HIVE FILES>).
1943 This function returns a hive handle. The hive handle is
1944 closed automatically when its reference count drops to 0.
1950 my $class = ref ($proto) || $proto;
1951 my $filename = shift;
1958 fun (n, flag, description) ->
1959 pr " # %s\n" description;
1960 pr " $flags += %d if $flags{%s};\n" n (String.lowercase flag)
1965 my $self = Win::Hivex::_open ($filename, $flags);
1966 bless $self, $class;
1973 fun (name, style, _, longdesc) ->
1974 (* The close call isn't explicit in Perl: handles are closed
1975 * when their reference count drops to 0.
1977 * The open call is coded specially in Perl.
1979 * Therefore we don't generate prototypes for these two calls:
1981 if fst style <> RErrDispose && List.hd (snd style) = AHive then (
1982 let longdesc = replace_str longdesc "C<hivex_" "C<" in
1983 pr "=item %s\n\n " name;
1984 generate_perl_prototype name style;
1986 pr "%s\n\n" longdesc;
1988 (match fst style with
2000 This returns a node handle.\n\n"
2003 This returns a node handle, or C<undef> if the node was not found.\n\n"
2006 This returns a list of node handles.\n\n"
2009 This returns a value handle.\n\n"
2012 This returns a list of value handles.\n\n"
2015 if List.mem ASetValues (snd style) then
2016 pr "C<@values> is an array of (keys, value) pairs.
2017 Each element should be a hashref containing C<key>, C<t> (type)
2020 Any existing values stored at the node are discarded, and their
2021 C<value> handles become invalid. Thus you can remove all
2022 values stored at C<node> by passing C<@values = []>.\n\n"
2035 Copyright (C) %s Red Hat Inc.
2039 Please see the file COPYING.LIB for the full license.
2045 L<http://libguestfs.org>,
2051 and generate_perl_prototype name style =
2053 (match fst style with
2056 | RHive -> pr "$h = "
2058 | RNodeNotFound -> pr "$node = "
2059 | RNodeList -> pr "@nodes = "
2060 | RValue -> pr "$value = "
2061 | RValueList -> pr "@values = "
2062 | RString -> pr "$string = "
2063 | RStringList -> pr "@strings = "
2064 | RLenType -> pr "($type, $len) = "
2065 | RLenTypeVal -> pr "($type, $data) = "
2066 | RInt32 -> pr "$int32 = "
2067 | RInt64 -> pr "$int64 = "
2070 let args = List.tl (snd style) in
2072 (* AUnusedFlags is dropped in the bindings. *)
2073 let args = List.filter ((<>) AUnusedFlags) args in
2077 let comma = ref false in
2080 if !comma then pr ", "; comma := true;
2085 | AString n -> pr "$%s" n
2086 | AStringNullable n -> pr "[$%s|undef]" n
2087 | AOpenFlags -> pr "[flags]"
2088 | AUnusedFlags -> assert false
2089 | ASetValues -> pr "\\@values"
2094 and generate_perl_xs () =
2095 generate_header CStyle LGPLv2plus;
2098 #include \"EXTERN.h\"
2106 #define PRId64 \"lld\"
2110 my_newSVll(long long val) {
2111 #ifdef USE_64_BIT_ALL
2112 return newSViv(val);
2116 len = snprintf(buf, 100, \"%%\" PRId64, val);
2117 return newSVpv(buf, len);
2122 #define PRIu64 \"llu\"
2127 my_newSVull(unsigned long long val) {
2128 #ifdef USE_64_BIT_ALL
2129 return newSVuv(val);
2133 len = snprintf(buf, 100, \"%%\" PRIu64, val);
2134 return newSVpv(buf, len);
2140 /* http://www.perlmonks.org/?node_id=680842 */
2142 XS_unpack_charPtrPtr (SV *arg) {
2147 if (!arg || !SvOK (arg) || !SvROK (arg) || SvTYPE (SvRV (arg)) != SVt_PVAV)
2148 croak (\"array reference expected\");
2150 av = (AV *)SvRV (arg);
2151 ret = malloc ((av_len (av) + 1 + 1) * sizeof (char *));
2153 croak (\"malloc failed\");
2155 for (i = 0; i <= av_len (av); i++) {
2156 SV **elem = av_fetch (av, i, 0);
2158 if (!elem || !*elem)
2159 croak (\"missing element in list\");
2161 ret[i] = SvPV_nolen (*elem);
2170 /* Handle set_values parameter. */
2171 typedef struct pl_set_values {
2173 hive_set_value *values;
2176 static pl_set_values
2177 unpack_pl_set_values (SV *sv)
2183 if (!sv || !SvOK (sv) || !SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
2184 croak (\"array reference expected\");
2186 av = (AV *)SvRV(sv);
2187 ret.nr_values = av_len (av) + 1;
2188 ret.values = malloc (ret.nr_values * sizeof (hive_set_value));
2190 croak (\"malloc failed\");
2192 for (i = 0; i <= av_len (av); i++) {
2193 SV **hvp = av_fetch (av, i, 0);
2195 if (!hvp || !*hvp || !SvROK (*hvp) || SvTYPE (SvRV (*hvp)) != SVt_PVHV)
2196 croak (\"missing element in list or not a hash ref\");
2198 HV *hv = (HV *)SvRV(*hvp);
2201 svp = hv_fetch (hv, \"key\", 3, 0);
2203 croak (\"missing 'key' in hash\");
2204 ret.values[i].key = SvPV_nolen (*svp);
2206 svp = hv_fetch (hv, \"t\", 1, 0);
2208 croak (\"missing 't' in hash\");
2209 ret.values[i].t = SvIV (*svp);
2211 svp = hv_fetch (hv, \"value\", 5, 0);
2213 croak (\"missing 'value' in hash\");
2214 ret.values[i].value = SvPV (*svp, ret.values[i].len);
2220 MODULE = Win::Hivex PACKAGE = Win::Hivex
2225 _open (filename, flags)
2229 RETVAL = hivex_open (filename, flags);
2231 croak (\"hivex_open: %%s: %%s\", filename, strerror (errno));
2239 if (hivex_close (h) == -1)
2240 croak (\"hivex_close: %%s\", strerror (errno));
2245 fun (name, style, _, longdesc) ->
2246 (* The close and open calls are handled specially above. *)
2247 if fst style <> RErrDispose && List.hd (snd style) = AHive then (
2248 (match fst style with
2249 | RErr -> pr "void\n"
2250 | RErrDispose -> failwith "perl bindings cannot handle a call which disposes of the handle"
2251 | RHive -> failwith "perl bindings cannot handle a call which returns a handle"
2255 | RString -> pr "SV *\n"
2260 | RLenTypeVal -> pr "void\n"
2261 | RInt32 -> pr "SV *\n"
2262 | RInt64 -> pr "SV *\n"
2265 (* Call and arguments. *)
2267 filter_map (function
2268 | AUnusedFlags -> None
2269 | arg -> Some (name_of_argt arg)) (snd style) in
2273 | AUnusedFlags -> "0"
2274 | ASetValues -> "values.nr_values, values.values"
2275 | arg -> name_of_argt arg) (snd style) in
2277 pr "%s (%s)\n" name (String.concat ", " perl_params);
2288 | AStringNullable n ->
2289 (* http://www.perlmonks.org/?node_id=554277 *)
2290 pr " char *%s = SvOK(ST(%d)) ? SvPV_nolen(ST(%d)) : NULL;\n" n i i
2293 | AUnusedFlags -> ()
2295 pr " pl_set_values values = unpack_pl_set_values (ST(%d));\n" i
2302 pr " free (values.values);\n"
2303 | AHive | ANode _ | AValue _ | AString _ | AStringNullable _
2304 | AOpenFlags | AUnusedFlags -> ()
2309 (match fst style with
2314 pr " r = hivex_%s (%s);\n"
2315 name (String.concat ", " c_params);
2317 pr " if (r == -1)\n";
2318 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2321 | RErrDispose -> assert false
2322 | RHive -> assert false
2328 pr " /* hive_node_h = hive_value_h = size_t so we cheat\n";
2329 pr " here to simplify the generator */\n";
2332 pr " r = hivex_%s (%s);\n"
2333 name (String.concat ", " c_params);
2335 pr " if (r == 0)\n";
2336 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2338 pr " RETVAL = newSViv (r);\n";
2344 pr " hive_node_h r;\n";
2347 pr " r = hivex_%s (%s);\n"
2348 name (String.concat ", " c_params);
2350 pr " if (r == 0 && errno != 0)\n";
2351 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2353 pr " if (r == 0)\n";
2354 pr " RETVAL = &PL_sv_undef;\n";
2356 pr " RETVAL = newSViv (r);\n";
2364 pr " r = hivex_%s (%s);\n"
2365 name (String.concat ", " c_params);
2367 pr " if (r == NULL)\n";
2368 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2370 pr " RETVAL = newSVpv (r, 0);\n";
2381 pr " r = hivex_%s (%s);\n"
2382 name (String.concat ", " c_params);
2384 pr " if (r == NULL)\n";
2385 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2387 pr " for (n = 0; r[n] != 0; ++n) /**/;\n";
2388 pr " EXTEND (SP, n);\n";
2389 pr " for (i = 0; i < n; ++i)\n";
2390 pr " PUSHs (sv_2mortal (newSViv (r[i])));\n";
2398 pr " r = hivex_%s (%s);\n"
2399 name (String.concat ", " c_params);
2401 pr " if (r == NULL)\n";
2402 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2404 pr " for (n = 0; r[n] != NULL; ++n) /**/;\n";
2405 pr " EXTEND (SP, n);\n";
2406 pr " for (i = 0; i < n; ++i) {\n";
2407 pr " PUSHs (sv_2mortal (newSVpv (r[i], 0)));\n";
2408 pr " free (r[i]);\n";
2415 pr " size_t len;\n";
2416 pr " hive_type type;\n";
2418 pr " r = hivex_%s (%s, &len, &type);\n"
2419 name (String.concat ", " c_params);
2421 pr " if (r == -1)\n";
2422 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2424 pr " EXTEND (SP, 2);\n";
2425 pr " PUSHs (sv_2mortal (newSViv (type)));\n";
2426 pr " PUSHs (sv_2mortal (newSViv (len)));\n";
2431 pr " size_t len;\n";
2432 pr " hive_type type;\n";
2434 pr " r = hivex_%s (%s, &len, &type);\n"
2435 name (String.concat ", " c_params);
2437 pr " if (r == NULL)\n";
2438 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2440 pr " EXTEND (SP, 2);\n";
2441 pr " PUSHs (sv_2mortal (newSViv (type)));\n";
2442 pr " PUSHs (sv_2mortal (newSVpv (r, len)));\n";
2450 pr " r = hivex_%s (%s);\n"
2451 name (String.concat ", " c_params);
2453 pr " if (r == -1 && errno != 0)\n";
2454 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2456 pr " RETVAL = my_newSVll (r);\n";
2464 and generate_python_py () =
2465 generate_header HashStyle LGPLv2plus
2467 and generate_python_c () =
2468 generate_header CStyle LGPLv2plus
2470 let output_to filename k =
2471 let filename_new = filename ^ ".new" in
2472 chan := open_out filename_new;
2475 chan := Pervasives.stdout;
2477 (* Is the new file different from the current file? *)
2478 if Sys.file_exists filename && files_equal filename filename_new then
2479 unlink filename_new (* same, so skip it *)
2481 (* different, overwrite old one *)
2482 (try chmod filename 0o644 with Unix_error _ -> ());
2483 rename filename_new filename;
2484 chmod filename 0o444;
2485 printf "written %s\n%!" filename;
2488 let perror msg = function
2489 | Unix_error (err, _, _) ->
2490 eprintf "%s: %s\n" msg (error_message err)
2492 eprintf "%s: %s\n" msg (Printexc.to_string exn)
2497 try openfile "configure.ac" [O_RDWR] 0
2499 | Unix_error (ENOENT, _, _) ->
2501 You are probably running this from the wrong directory.
2502 Run it from the top source directory using the command
2503 generator/generator.ml
2507 perror "open: configure.ac" exn;
2510 (* Acquire a lock so parallel builds won't try to run the generator
2511 * twice at the same time. Subsequent builds will wait for the first
2512 * one to finish. Note the lock is released implicitly when the
2515 (try lockf lock_fd F_LOCK 1
2517 perror "lock: configure.ac" exn;
2522 output_to "lib/hivex.h" generate_c_header;
2523 output_to "lib/hivex.pod" generate_c_pod;
2525 output_to "ocaml/hivex.mli" generate_ocaml_interface;
2526 output_to "ocaml/hivex.ml" generate_ocaml_implementation;
2527 output_to "ocaml/hivex_c.c" generate_ocaml_c;
2529 output_to "perl/lib/Win/Hivex.pm" generate_perl_pm;
2530 output_to "perl/Hivex.xs" generate_perl_xs;
2533 We ran out of time before we could write the Python bindings.
2534 output_to "python/hivex.py" generate_python_py;
2535 output_to "python/hivex-py.c" generate_python_c;
2538 (* Always generate this file last, and unconditionally. It's used
2539 * by the Makefile to know when we must re-run the generator.
2541 let chan = open_out "generator/stamp-generator" in
2545 printf "generated %d lines of code\n" !lines