3 * Copyright (C) 2009-2010 Red Hat Inc.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 (* This script generates language bindings and some documentation for
23 * After editing this file, run it (./generator/generator.ml) to
24 * regenerate all the output files. 'make' will rerun this
25 * automatically when necessary. Note that if you are using a separate
26 * build directory you must run generator.ml from the _source_
29 * IMPORTANT: This script should NOT print any warnings. If it prints
30 * warnings, you should treat them as errors.
32 * OCaml tips: (1) In emacs, install tuareg-mode to display and format
33 * OCaml code correctly. 'vim' comes with a good OCaml editing mode by
34 * default. (2) Read the resources at http://ocaml-tutorial.org/
39 #directory "+xml-light";;
40 #load "xml-light.cma";;
45 type style = ret * args
47 | RErr (* 0 = ok, -1 = error *)
48 | RErrDispose (* Disposes handle, see hivex_close. *)
49 | RHive (* Returns a hive_h or NULL. *)
50 | RNode (* Returns hive_node_h or 0. *)
51 | RNodeNotFound (* See hivex_node_get_child. *)
52 | RNodeList (* Returns hive_node_h* or NULL. *)
53 | RValue (* Returns hive_value_h or 0. *)
54 | RValueList (* Returns hive_value_h* or NULL. *)
55 | RString (* Returns char* or NULL. *)
56 | RStringList (* Returns char** or NULL. *)
57 | RLenType (* See hivex_value_type. *)
58 | RLenTypeVal (* See hivex_value_value. *)
59 | RInt32 (* Returns int32. *)
60 | RInt64 (* Returns int64. *)
62 and args = argt list (* List of parameters. *)
64 and argt = (* Note, cannot be NULL/0 unless it
65 says so explicitly below. *)
67 | ANode of string (* hive_node_h *)
68 | AValue of string (* hive_value_h *)
69 | AString of string (* char* *)
70 | AStringNullable of string (* char* (can be NULL) *)
71 | AOpenFlags (* HIVEX_OPEN_* flags list. *)
72 | AUnusedFlags (* Flags arg that is always 0 *)
73 | ASetValues (* See hivex_node_set_values. *)
74 | ASetValue (* See hivex_node_set_value. *)
77 * https://secure.wikimedia.org/wikipedia/en/wiki/Windows_Registry#Keys_and_values
79 * It's unfortunate that in our original C binding we strayed away from
80 * the names that Windows uses (eg. REG_SZ for strings). We include
81 * both our names and the Windows names.
85 "Just a key without a value";
87 "A Windows string (encoding is unknown, but often UTF16-LE)";
88 2, "expand_string", "EXPAND_SZ",
89 "A Windows string that contains %env% (environment variable expansion)";
90 3, "binary", "BINARY",
93 "DWORD (32 bit integer), little endian";
94 5, "dword_be", "DWORD_BIG_ENDIAN",
95 "DWORD (32 bit integer), big endian";
97 "Symbolic link to another part of the registry tree";
98 7, "multiple_strings", "MULTI_SZ",
99 "Multiple Windows strings. See http://blogs.msdn.com/oldnewthing/archive/2009/10/08/9904646.aspx";
100 8, "resource_list", "RESOURCE_LIST",
102 9, "full_resource_description", "FULL_RESOURCE_DESCRIPTOR",
103 "Resource descriptor";
104 10, "resource_requirements_list", "RESOURCE_REQUIREMENTS_LIST",
105 "Resouce requirements list";
106 11, "qword", "QWORD",
107 "QWORD (64 bit integer), unspecified endianness but usually little endian"
109 let max_hive_type = 11
111 (* Open flags (bitmask passed to AOpenFlags) *)
113 1, "VERBOSE", "Verbose messages";
114 2, "DEBUG", "Debug messages";
115 4, "WRITE", "Enable writes to the hive";
120 "open", (RHive, [AString "filename"; AOpenFlags]),
123 Opens the hive named C<filename> for reading.
125 Flags is an ORed list of the open flags (or C<0> if you don't
126 want to pass any flags). These flags are defined:
130 =item HIVEX_OPEN_VERBOSE
134 =item HIVEX_OPEN_DEBUG
136 Very verbose messages, suitable for debugging problems in the library
139 This is also selected if the C<HIVEX_DEBUG> environment variable
142 =item HIVEX_OPEN_WRITE
144 Open the hive for writing. If omitted, the hive is read-only.
146 See L<hivex(3)/WRITING TO HIVE FILES>.
150 "close", (RErrDispose, [AHive]),
151 "close a hive handle",
153 Close a hive handle and free all associated resources.
155 Note that any uncommitted writes are I<not> committed by this call,
156 but instead are lost. See L<hivex(3)/WRITING TO HIVE FILES>.";
158 "root", (RNode, [AHive]),
159 "return the root node of the hive",
161 Return root node of the hive. All valid registries must contain
164 "node_name", (RString, [AHive; ANode "node"]),
165 "return the name of the node",
167 Return the name of the node.
169 Note that the name of the root node is a dummy, such as
170 C<$$$PROTO.HIV> (other names are possible: it seems to depend on the
171 tool or program that created the hive in the first place). You can
172 only know the \"real\" name of the root node by knowing which registry
173 file this hive originally comes from, which is knowledge that is
174 outside the scope of this library.";
176 "node_children", (RNodeList, [AHive; ANode "node"]),
177 "return children of node",
179 Return an array of nodes which are the subkeys
180 (children) of C<node>.";
182 "node_get_child", (RNodeNotFound, [AHive; ANode "node"; AString "name"]),
183 "return named child of node",
185 Return the child of node with the name C<name>, if it exists.
187 The name is matched case insensitively.";
189 "node_parent", (RNode, [AHive; ANode "node"]),
190 "return the parent of node",
192 Return the parent of C<node>.
194 The parent pointer of the root node in registry files that we
195 have examined seems to be invalid, and so this function will
196 return an error if called on the root node.";
198 "node_values", (RValueList, [AHive; ANode "node"]),
199 "return (key, value) pairs attached to a node",
201 Return the array of (key, value) pairs attached to this node.";
203 "node_get_value", (RValue, [AHive; ANode "node"; AString "key"]),
204 "return named key at node",
206 Return the value attached to this node which has the name C<key>,
209 The key name is matched case insensitively.
211 Note that to get the default key, you should pass the empty
212 string C<\"\"> here. The default key is often written C<\"@\">, but
213 inside hives that has no meaning and won't give you the
216 "value_key", (RString, [AHive; AValue "val"]),
217 "return the key of a (key, value) pair",
219 Return the key (name) of a (key, value) pair. The name
220 is reencoded as UTF-8 and returned as a string.
222 The string should be freed by the caller when it is no longer needed.
224 Note that this function can return a zero-length string. In the
225 context of Windows Registries, this means that this value is the
226 default key for this node in the tree. This is usually written
229 "value_type", (RLenType, [AHive; AValue "val"]),
230 "return data length and data type of a value",
232 Return the data length and data type of the value in this (key, value)
233 pair. See also C<hivex_value_value> which returns all this
234 information, and the value itself. Also, C<hivex_value_*> functions
235 below which can be used to return the value in a more useful form when
236 you know the type in advance.";
238 "value_value", (RLenTypeVal, [AHive; AValue "val"]),
239 "return data length, data type and data of a value",
241 Return the value of this (key, value) pair. The value should
242 be interpreted according to its type (see C<hive_type>).";
244 "value_string", (RString, [AHive; AValue "val"]),
245 "return value as a string",
247 If this value is a string, return the string reencoded as UTF-8
248 (as a C string). This only works for values which have type
249 C<hive_t_string>, C<hive_t_expand_string> or C<hive_t_link>.";
251 "value_multiple_strings", (RStringList, [AHive; AValue "val"]),
252 "return value as multiple strings",
254 If this value is a multiple-string, return the strings reencoded
255 as UTF-8 (in C, as a NULL-terminated array of C strings, in other
256 language bindings, as a list of strings). This only
257 works for values which have type C<hive_t_multiple_strings>.";
259 "value_dword", (RInt32, [AHive; AValue "val"]),
260 "return value as a DWORD",
262 If this value is a DWORD (Windows int32), return it. This only works
263 for values which have type C<hive_t_dword> or C<hive_t_dword_be>.";
265 "value_qword", (RInt64, [AHive; AValue "val"]),
266 "return value as a QWORD",
268 If this value is a QWORD (Windows int64), return it. This only
269 works for values which have type C<hive_t_qword>.";
271 "commit", (RErr, [AHive; AStringNullable "filename"; AUnusedFlags]),
272 "commit (write) changes to file",
274 Commit (write) any changes which have been made.
276 C<filename> is the new file to write. If C<filename> is null/undefined
277 then we overwrite the original file (ie. the file name that was passed to
280 Note this does not close the hive handle. You can perform further
281 operations on the hive after committing, including making more
282 modifications. If you no longer wish to use the hive, then you
283 should close the handle after committing.";
285 "node_add_child", (RNode, [AHive; ANode "parent"; AString "name"]),
288 Add a new child node named C<name> to the existing node C<parent>.
289 The new child initially has no subnodes and contains no keys or
290 values. The sk-record (security descriptor) is inherited from
293 The parent must not have an existing child called C<name>, so if you
294 want to overwrite an existing child, call C<hivex_node_delete_child>
297 "node_delete_child", (RErr, [AHive; ANode "node"]),
300 Delete the node C<node>. All values at the node and all subnodes are
301 deleted (recursively). The C<node> handle and the handles of all
302 subnodes become invalid. You cannot delete the root node.";
304 "node_set_values", (RErr, [AHive; ANode "node"; ASetValues; AUnusedFlags]),
305 "set (key, value) pairs at a node",
307 This call can be used to set all the (key, value) pairs
310 C<node> is the node to modify.";
312 "node_set_value", (RErr, [AHive; ANode "node"; ASetValue; AUnusedFlags]),
313 "set a single (key, value) pair at a given node",
315 This call can be used to replace a single (key, value) pair
316 stored in C<node>. If the key does not already exist, then a
317 new key is added. Key matching is case insensitive.
319 C<node> is the node to modify.";
322 (* Used to memoize the result of pod2text. *)
323 let pod2text_memo_filename = "generator/.pod2text.data"
324 let pod2text_memo : ((int * string * string), string list) Hashtbl.t =
326 let chan = open_in pod2text_memo_filename in
327 let v = input_value chan in
331 _ -> Hashtbl.create 13
332 let pod2text_memo_updated () =
333 let chan = open_out pod2text_memo_filename in
334 output_value chan pod2text_memo;
338 * Note we don't want to use any external OCaml libraries which
339 * makes this a bit harder than it should be.
341 module StringMap = Map.Make (String)
343 let failwithf fs = ksprintf failwith fs
345 let unique = let i = ref 0 in fun () -> incr i; !i
347 let replace_char s c1 c2 =
348 let s2 = String.copy s in
350 for i = 0 to String.length s2 - 1 do
351 if String.unsafe_get s2 i = c1 then (
352 String.unsafe_set s2 i c2;
356 if not !r then s else s2
360 (* || c = '\f' *) || c = '\n' || c = '\r' || c = '\t' (* || c = '\v' *)
362 let triml ?(test = isspace) str =
364 let n = ref (String.length str) in
365 while !n > 0 && test str.[!i]; do
370 else String.sub str !i !n
372 let trimr ?(test = isspace) str =
373 let n = ref (String.length str) in
374 while !n > 0 && test str.[!n-1]; do
377 if !n = String.length str then str
378 else String.sub str 0 !n
380 let trim ?(test = isspace) str =
381 trimr ~test (triml ~test str)
384 let len = String.length s in
385 let sublen = String.length sub in
387 if i <= len-sublen then (
390 if s.[i+j] = sub.[j] then loop2 (j+1)
396 if r = -1 then loop (i+1) else r
402 let rec replace_str s s1 s2 =
403 let len = String.length s in
404 let sublen = String.length s1 in
408 let s' = String.sub s 0 i in
409 let s'' = String.sub s (i+sublen) (len-i-sublen) in
410 s' ^ s2 ^ replace_str s'' s1 s2
413 let rec string_split sep str =
414 let len = String.length str in
415 let seplen = String.length sep in
416 let i = find str sep in
419 let s' = String.sub str 0 i in
420 let s'' = String.sub str (i+seplen) (len-i-seplen) in
421 s' :: string_split sep s''
424 let files_equal n1 n2 =
425 let cmd = sprintf "cmp -s %s %s" (Filename.quote n1) (Filename.quote n2) in
426 match Sys.command cmd with
429 | i -> failwithf "%s: failed with error code %d" cmd i
431 let rec filter_map f = function
435 | Some y -> y :: filter_map f xs
436 | None -> filter_map f xs
438 let rec find_map f = function
439 | [] -> raise Not_found
443 | None -> find_map f xs
446 let rec loop i = function
448 | x :: xs -> f i x; loop (i+1) xs
453 let rec loop i = function
455 | x :: xs -> let r = f i x in r :: loop (i+1) xs
459 let count_chars c str =
461 for i = 0 to String.length str - 1 do
462 if c = String.unsafe_get str i then incr count
466 let name_of_argt = function
468 | ANode n | AValue n | AString n | AStringNullable n -> n
469 | AOpenFlags | AUnusedFlags -> "flags"
470 | ASetValues -> "values"
473 (* Check function names etc. for consistency. *)
474 let check_functions () =
475 let contains_uppercase str =
476 let len = String.length str in
478 if i >= len then false
481 if c >= 'A' && c <= 'Z' then true
488 (* Check function names. *)
490 fun (name, _, _, _) ->
491 if String.length name >= 7 && String.sub name 0 7 = "hivex" then
492 failwithf "function name %s does not need 'hivex' prefix" name;
494 failwithf "function name is empty";
495 if name.[0] < 'a' || name.[0] > 'z' then
496 failwithf "function name %s must start with lowercase a-z" name;
497 if String.contains name '-' then
498 failwithf "function name %s should not contain '-', use '_' instead."
502 (* Check function parameter/return names. *)
504 fun (name, style, _, _) ->
505 let check_arg_ret_name n =
506 if contains_uppercase n then
507 failwithf "%s param/ret %s should not contain uppercase chars"
509 if String.contains n '-' || String.contains n '_' then
510 failwithf "%s param/ret %s should not contain '-' or '_'"
513 failwithf "%s has a param/ret called 'value', which causes conflicts in the OCaml bindings, use something like 'val' or a more descriptive name" name;
514 if n = "int" || n = "char" || n = "short" || n = "long" then
515 failwithf "%s has a param/ret which conflicts with a C type (eg. 'int', 'char' etc.)" name;
516 if n = "i" || n = "n" then
517 failwithf "%s has a param/ret called 'i' or 'n', which will cause some conflicts in the generated code" name;
518 if n = "argv" || n = "args" then
519 failwithf "%s has a param/ret called 'argv' or 'args', which will cause some conflicts in the generated code" name;
521 (* List Haskell, OCaml and C keywords here.
522 * http://www.haskell.org/haskellwiki/Keywords
523 * http://caml.inria.fr/pub/docs/manual-ocaml/lex.html#operator-char
524 * http://en.wikipedia.org/wiki/C_syntax#Reserved_keywords
525 * Formatted via: cat c haskell ocaml|sort -u|grep -vE '_|^val$' \
526 * |perl -pe 's/(.+)/"$1";/'|fmt -70
527 * Omitting _-containing words, since they're handled above.
528 * Omitting the OCaml reserved word, "val", is ok,
529 * and saves us from renaming several parameters.
532 "and"; "as"; "asr"; "assert"; "auto"; "begin"; "break"; "case";
533 "char"; "class"; "const"; "constraint"; "continue"; "data";
534 "default"; "deriving"; "do"; "done"; "double"; "downto"; "else";
535 "end"; "enum"; "exception"; "extern"; "external"; "false"; "float";
536 "for"; "forall"; "foreign"; "fun"; "function"; "functor"; "goto";
537 "hiding"; "if"; "import"; "in"; "include"; "infix"; "infixl";
538 "infixr"; "inherit"; "initializer"; "inline"; "instance"; "int";
540 "land"; "lazy"; "let"; "long"; "lor"; "lsl"; "lsr"; "lxor";
541 "match"; "mdo"; "method"; "mod"; "module"; "mutable"; "new";
542 "newtype"; "object"; "of"; "open"; "or"; "private"; "qualified";
543 "rec"; "register"; "restrict"; "return"; "short"; "sig"; "signed";
544 "sizeof"; "static"; "struct"; "switch"; "then"; "to"; "true"; "try";
545 "type"; "typedef"; "union"; "unsigned"; "virtual"; "void";
546 "volatile"; "when"; "where"; "while";
548 if List.mem n reserved then
549 failwithf "%s has param/ret using reserved word %s" name n;
552 List.iter (fun arg -> check_arg_ret_name (name_of_argt arg)) (snd style)
555 (* Check short descriptions. *)
557 fun (name, _, shortdesc, _) ->
558 if shortdesc.[0] <> Char.lowercase shortdesc.[0] then
559 failwithf "short description of %s should begin with lowercase." name;
560 let c = shortdesc.[String.length shortdesc-1] in
561 if c = '\n' || c = '.' then
562 failwithf "short description of %s should not end with . or \\n." name
565 (* Check long dscriptions. *)
567 fun (name, _, _, longdesc) ->
568 if longdesc.[String.length longdesc-1] = '\n' then
569 failwithf "long description of %s should not end with \\n." name
572 (* 'pr' prints to the current output file. *)
573 let chan = ref Pervasives.stdout
578 let i = count_chars '\n' str in
580 output_string !chan str
583 let copyright_years =
584 let this_year = 1900 + (localtime (time ())).tm_year in
585 if this_year > 2009 then sprintf "2009-%04d" this_year else "2009"
587 (* Generate a header block in a number of standard styles. *)
589 | CStyle | CPlusPlusStyle | HashStyle | OCamlStyle | HaskellStyle
591 type license = GPLv2plus | LGPLv2plus | GPLv2 | LGPLv2
593 let generate_header ?(extra_inputs = []) comment license =
594 let inputs = "generator/generator.ml" :: extra_inputs in
595 let c = match comment with
596 | CStyle -> pr "/* "; " *"
597 | CPlusPlusStyle -> pr "// "; "//"
598 | HashStyle -> pr "# "; "#"
599 | OCamlStyle -> pr "(* "; " *"
600 | HaskellStyle -> pr "{- "; " "
601 | PODCommentStyle -> pr "=begin comment\n\n "; "" in
602 pr "hivex generated file\n";
603 pr "%s WARNING: THIS FILE IS GENERATED FROM:\n" c;
604 List.iter (pr "%s %s\n" c) inputs;
605 pr "%s ANY CHANGES YOU MAKE TO THIS FILE WILL BE LOST.\n" c;
607 pr "%s Copyright (C) %s Red Hat Inc.\n" c copyright_years;
608 pr "%s Derived from code by Petter Nordahl-Hagen under a compatible license:\n" c;
609 pr "%s Copyright (c) 1997-2007 Petter Nordahl-Hagen.\n" c;
610 pr "%s Derived from code by Markus Stephany under a compatible license:\n" c;
611 pr "%s Copyright (c)2000-2004, Markus Stephany.\n" c;
615 pr "%s This program is free software; you can redistribute it and/or modify\n" c;
616 pr "%s it under the terms of the GNU General Public License as published by\n" c;
617 pr "%s the Free Software Foundation; either version 2 of the License, or\n" c;
618 pr "%s (at your option) any later version.\n" c;
620 pr "%s This program is distributed in the hope that it will be useful,\n" c;
621 pr "%s but WITHOUT ANY WARRANTY; without even the implied warranty of\n" c;
622 pr "%s MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" c;
623 pr "%s GNU General Public License for more details.\n" c;
625 pr "%s You should have received a copy of the GNU General Public License along\n" c;
626 pr "%s with this program; if not, write to the Free Software Foundation, Inc.,\n" c;
627 pr "%s 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n" c;
630 pr "%s This library is free software; you can redistribute it and/or\n" c;
631 pr "%s modify it under the terms of the GNU Lesser General Public\n" c;
632 pr "%s License as published by the Free Software Foundation; either\n" c;
633 pr "%s version 2 of the License, or (at your option) any later version.\n" c;
635 pr "%s This library is distributed in the hope that it will be useful,\n" c;
636 pr "%s but WITHOUT ANY WARRANTY; without even the implied warranty of\n" c;
637 pr "%s MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n" c;
638 pr "%s Lesser General Public License for more details.\n" c;
640 pr "%s You should have received a copy of the GNU Lesser General Public\n" c;
641 pr "%s License along with this library; if not, write to the Free Software\n" c;
642 pr "%s Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n" c;
645 pr "%s This program is free software; you can redistribute it and/or modify\n" c;
646 pr "%s it under the terms of the GNU General Public License as published by\n" c;
647 pr "%s the Free Software Foundation; version 2 of the License only.\n" c;
649 pr "%s This program is distributed in the hope that it will be useful,\n" c;
650 pr "%s but WITHOUT ANY WARRANTY; without even the implied warranty of\n" c;
651 pr "%s MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" c;
652 pr "%s GNU General Public License for more details.\n" c;
654 pr "%s You should have received a copy of the GNU General Public License along\n" c;
655 pr "%s with this program; if not, write to the Free Software Foundation, Inc.,\n" c;
656 pr "%s 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n" c;
659 pr "%s This library is free software; you can redistribute it and/or\n" c;
660 pr "%s modify it under the terms of the GNU Lesser General Public\n" c;
661 pr "%s License as published by the Free Software Foundation;\n" c;
662 pr "%s version 2.1 of the License only.\n" c;
664 pr "%s This library is distributed in the hope that it will be useful,\n" c;
665 pr "%s but WITHOUT ANY WARRANTY; without even the implied warranty of\n" c;
666 pr "%s MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n" c;
667 pr "%s Lesser General Public License for more details.\n" c;
669 pr "%s You should have received a copy of the GNU Lesser General Public\n" c;
670 pr "%s License along with this library; if not, write to the Free Software\n" c;
671 pr "%s Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n" c;
674 | CStyle -> pr " */\n"
677 | OCamlStyle -> pr " *)\n"
678 | HaskellStyle -> pr "-}\n"
679 | PODCommentStyle -> pr "\n=end comment\n"
683 (* Start of main code generation functions below this line. *)
685 let rec generate_c_header () =
686 generate_header CStyle LGPLv2;
698 /* NOTE: This API is documented in the man page hivex(3). */
701 typedef struct hive_h hive_h;
703 /* Nodes and values. */
704 typedef size_t hive_node_h;
705 typedef size_t hive_value_h;
707 /* Pre-defined types. */
711 fun (t, old_style, new_style, description) ->
712 pr " /* %s */\n" description;
713 pr " hive_t_REG_%s,\n" new_style;
714 pr "#define hive_t_%s hive_t_REG_%s\n" old_style new_style;
720 typedef enum hive_type hive_type;
722 /* Bitmask of flags passed to hivex_open. */
725 fun (v, flag, description) ->
726 pr " /* %s */\n" description;
727 pr "#define HIVEX_OPEN_%-10s %d\n" flag v;
732 /* Array of (key, value) pairs passed to hivex_node_set_values. */
733 struct hive_set_value {
739 typedef struct hive_set_value hive_set_value;
743 pr "/* Functions. */\n";
745 (* Function declarations. *)
747 fun (shortname, style, _, _) ->
748 let name = "hivex_" ^ shortname in
749 generate_c_prototype ~extern:true name style
752 (* The visitor pattern. *)
754 /* Visit all nodes. This is specific to the C API and is not made
755 * available to other languages. This is because of the complexity
756 * of binding callbacks in other languages, but also because other
757 * languages make it much simpler to iterate over a tree.
759 struct hivex_visitor {
760 int (*node_start) (hive_h *, void *opaque, hive_node_h, const char *name);
761 int (*node_end) (hive_h *, void *opaque, hive_node_h, const char *name);
762 int (*value_string) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *str);
763 int (*value_multiple_strings) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, char **argv);
764 int (*value_string_invalid_utf16) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *str);
765 int (*value_dword) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, int32_t);
766 int (*value_qword) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, int64_t);
767 int (*value_binary) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value);
768 int (*value_none) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value);
769 int (*value_other) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value);
770 int (*value_any) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value);
773 #define HIVEX_VISIT_SKIP_BAD 1
775 extern int hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags);
776 extern int hivex_visit_node (hive_h *h, hive_node_h node, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags);
780 (* Finish the header file. *)
786 #endif /* HIVEX_H_ */
789 and generate_c_prototype ?(extern = false) name style =
790 if extern then pr "extern ";
791 (match fst style with
793 | RErrDispose -> pr "int "
794 | RHive -> pr "hive_h *"
795 | RNode -> pr "hive_node_h "
796 | RNodeNotFound -> pr "hive_node_h "
797 | RNodeList -> pr "hive_node_h *"
798 | RValue -> pr "hive_value_h "
799 | RValueList -> pr "hive_value_h *"
800 | RString -> pr "char *"
801 | RStringList -> pr "char **"
802 | RLenType -> pr "int "
803 | RLenTypeVal -> pr "char *"
804 | RInt32 -> pr "int32_t "
805 | RInt64 -> pr "int64_t "
808 let comma = ref false in
811 if !comma then pr ", "; comma := true;
813 | AHive -> pr "hive_h *h"
814 | ANode n -> pr "hive_node_h %s" n
815 | AValue n -> pr "hive_value_h %s" n
816 | AString n | AStringNullable n -> pr "const char *%s" n
817 | AOpenFlags | AUnusedFlags -> pr "int flags"
818 | ASetValues -> pr "size_t nr_values, const hive_set_value *values"
819 | ASetValue -> pr "const hive_set_value *val"
821 (match fst style with
822 | RLenType | RLenTypeVal -> pr ", hive_type *t, size_t *len"
827 and generate_c_pod () =
828 generate_header PODCommentStyle GPLv2;
835 hivex - Windows Registry \"hive\" extraction library
843 fun (shortname, style, _, _) ->
844 let name = "hivex_" ^ shortname in
846 generate_c_prototype ~extern:false name style;
851 Link with I<-lhivex>.
855 libhivex is a library for extracting the contents of Windows Registry
856 \"hive\" files. It is designed to be secure against buggy or malicious
859 Unlike many other tools in this area, it doesn't use the textual .REG
860 format for output, because parsing that is as much trouble as parsing
861 the original binary format. Instead it makes the file available
862 through a C API, or there is a separate program to export the hive as
863 XML (see L<hivexml(1)>), or to navigate the file (see L<hivexsh(1)>).
869 This handle describes an open hive file.
873 This is a node handle, an integer but opaque outside the library.
874 Valid node handles cannot be 0. The library returns 0 in some
875 situations to indicate an error.
879 The enum below describes the possible types for the value(s)
880 stored at each node. Note that you should not trust the
881 type field in a Windows Registry, as it very often has no
882 relationship to reality. Some applications use their own
883 types. The encoding of strings is not specified. Some
884 programs store everything (including strings) in binary blobs.
889 fun (t, _, new_style, description) ->
890 pr " /* %s */\n" description;
891 pr " hive_t_REG_%s = %d,\n" new_style t
898 This is a value handle, an integer but opaque outside the library.
899 Valid value handles cannot be 0. The library returns 0 in some
900 situations to indicate an error.
902 =head2 hive_set_value
904 The typedef C<hive_set_value> is used in conjunction with the
905 C<hivex_node_set_values> call described below.
907 struct hive_set_value {
908 char *key; /* key - a UTF-8 encoded ASCIIZ string */
909 hive_type t; /* type of value field */
910 size_t len; /* length of value field in bytes */
911 char *value; /* value field */
913 typedef struct hive_set_value hive_set_value;
915 To set the default value for a node, you have to pass C<key = \"\">.
917 Note that the C<value> field is just treated as a list of bytes, and
918 is stored directly in the hive. The caller has to ensure correct
919 encoding and endianness, for example converting dwords to little
922 The correct type and encoding for values depends on the node and key
923 in the registry, the version of Windows, and sometimes even changes
924 between versions of Windows for the same key. We don't document it
925 here. Often it's not documented at all.
931 fun (shortname, style, _, longdesc) ->
932 let name = "hivex_" ^ shortname in
933 pr "=head2 %s\n" name;
935 generate_c_prototype ~extern:false name style;
940 if List.mem AUnusedFlags (snd style) then
941 pr "The flags parameter is unused. Always pass 0.\n\n";
943 if List.mem ASetValues (snd style) then
944 pr "C<values> is an array of (key, value) pairs. There
945 should be C<nr_values> elements in this array.
947 Any existing values stored at the node are discarded, and their
948 C<hive_value_h> handles become invalid. Thus you can remove all
949 values stored at C<node> by passing C<nr_values = 0>.\n\n";
951 if List.mem ASetValue (snd style) then
952 pr "C<value> is a single (key, value) pair.
954 Existing C<hive_value_h> handles become invalid.\n\n";
956 (match fst style with
959 Returns 0 on success.
960 On error this returns -1 and sets errno.\n\n"
963 Returns 0 on success.
964 On error this returns -1 and sets errno.
966 This function frees the hive handle (even if it returns an error).
967 The hive handle must not be used again after calling this function.\n\n"
970 Returns a new hive handle.
971 On error this returns NULL and sets errno.\n\n"
974 Returns a node handle.
975 On error this returns 0 and sets errno.\n\n"
978 Returns a node handle.
979 If the node was not found, this returns 0 without setting errno.
980 On error this returns 0 and sets errno.\n\n"
983 Returns a 0-terminated array of nodes.
984 The array must be freed by the caller when it is no longer needed.
985 On error this returns NULL and sets errno.\n\n"
988 Returns a value handle.
989 On error this returns 0 and sets errno.\n\n"
992 Returns a 0-terminated array of values.
993 The array must be freed by the caller when it is no longer needed.
994 On error this returns NULL and sets errno.\n\n"
998 The string must be freed by the caller when it is no longer needed.
999 On error this returns NULL and sets errno.\n\n"
1002 Returns a NULL-terminated array of C strings.
1003 The strings and the array must all be freed by the caller when
1004 they are no longer needed.
1005 On error this returns NULL and sets errno.\n\n"
1008 Returns 0 on success.
1009 On error this returns -1 and sets errno.\n\n"
1012 The value is returned as an array of bytes (of length C<len>).
1013 The value must be freed by the caller when it is no longer needed.
1014 On error this returns NULL and sets errno.\n\n"
1015 | RInt32 | RInt64 -> ()
1020 =head1 WRITING TO HIVE FILES
1022 The hivex library supports making limited modifications to hive files.
1023 We have tried to implement this very conservatively in order to reduce
1024 the chance of corrupting your registry. However you should be careful
1025 and take back-ups, since Microsoft has never documented the hive
1026 format, and so it is possible there are nuances in the
1027 reverse-engineered format that we do not understand.
1029 To be able to modify a hive, you must pass the C<HIVEX_OPEN_WRITE>
1030 flag to C<hivex_open>, otherwise any write operation will return with
1033 The write operations shown below do not modify the on-disk file
1034 immediately. You must call C<hivex_commit> in order to write the
1035 changes to disk. If you call C<hivex_close> without committing then
1036 any writes are discarded.
1038 Hive files internally consist of a \"memory dump\" of binary blocks
1039 (like the C heap), and some of these blocks can be unused. The hivex
1040 library never reuses these unused blocks. Instead, to ensure
1041 robustness in the face of the partially understood on-disk format,
1042 hivex only allocates new blocks after the end of the file, and makes
1043 minimal modifications to existing structures in the file to point to
1044 these new blocks. This makes hivex slightly less disk-efficient than
1045 it could be, but disk is cheap, and registry modifications tend to be
1048 When deleting nodes, it is possible that this library may leave
1049 unreachable live blocks in the hive. This is because certain parts of
1050 the hive disk format such as security (sk) records and big data (db)
1051 records and classname fields are not well understood (and not
1052 documented at all) and we play it safe by not attempting to modify
1053 them. Apart from wasting a little bit of disk space, it is not
1054 thought that unreachable blocks are a problem.
1056 =head2 WRITE OPERATIONS WHICH ARE NOT SUPPORTED
1062 Changing the root node.
1066 Creating a new hive file from scratch. This is impossible at present
1067 because not all fields in the header are understood.
1071 Modifying or deleting single values at a node.
1075 Modifying security key (sk) records or classnames.
1076 Previously we did not understand these records. However now they
1077 are well-understood and we could add support if it was required
1078 (but nothing much really uses them).
1082 =head1 VISITING ALL NODES
1084 The visitor pattern is useful if you want to visit all nodes
1085 in the tree or all nodes below a certain point in the tree.
1087 First you set up your own C<struct hivex_visitor> with your
1090 Each of these callback functions should return 0 on success or -1
1091 on error. If any callback returns -1, then the entire visit
1092 terminates immediately. If you don't need a callback function at
1093 all, set the function pointer to NULL.
1095 struct hivex_visitor {
1096 int (*node_start) (hive_h *, void *opaque, hive_node_h, const char *name);
1097 int (*node_end) (hive_h *, void *opaque, hive_node_h, const char *name);
1098 int (*value_string) (hive_h *, void *opaque, hive_node_h, hive_value_h,
1099 hive_type t, size_t len, const char *key, const char *str);
1100 int (*value_multiple_strings) (hive_h *, void *opaque, hive_node_h,
1101 hive_value_h, hive_type t, size_t len, const char *key, char **argv);
1102 int (*value_string_invalid_utf16) (hive_h *, void *opaque, hive_node_h,
1103 hive_value_h, hive_type t, size_t len, const char *key,
1105 int (*value_dword) (hive_h *, void *opaque, hive_node_h, hive_value_h,
1106 hive_type t, size_t len, const char *key, int32_t);
1107 int (*value_qword) (hive_h *, void *opaque, hive_node_h, hive_value_h,
1108 hive_type t, size_t len, const char *key, int64_t);
1109 int (*value_binary) (hive_h *, void *opaque, hive_node_h, hive_value_h,
1110 hive_type t, size_t len, const char *key, const char *value);
1111 int (*value_none) (hive_h *, void *opaque, hive_node_h, hive_value_h,
1112 hive_type t, size_t len, const char *key, const char *value);
1113 int (*value_other) (hive_h *, void *opaque, hive_node_h, hive_value_h,
1114 hive_type t, size_t len, const char *key, const char *value);
1115 /* If value_any callback is not NULL, then the other value_*
1116 * callbacks are not used, and value_any is called on all values.
1118 int (*value_any) (hive_h *, void *opaque, hive_node_h, hive_value_h,
1119 hive_type t, size_t len, const char *key, const char *value);
1126 int hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags);
1128 Visit all the nodes recursively in the hive C<h>.
1130 C<visitor> should be a C<hivex_visitor> structure with callback
1131 fields filled in as required (unwanted callbacks can be set to
1132 NULL). C<len> must be the length of the 'visitor' struct (you
1133 should pass C<sizeof (struct hivex_visitor)> for this).
1135 This returns 0 if the whole recursive visit was completed
1136 successfully. On error this returns -1. If one of the callback
1137 functions returned an error than we don't touch errno. If the
1138 error was generated internally then we set errno.
1140 You can skip bad registry entries by setting C<flag> to
1141 C<HIVEX_VISIT_SKIP_BAD>. If this flag is not set, then a bad registry
1142 causes the function to return an error immediately.
1144 This function is robust if the registry contains cycles or
1145 pointers which are invalid or outside the registry. It detects
1146 these cases and returns an error.
1148 =item hivex_visit_node
1150 int hivex_visit_node (hive_h *h, hive_node_h node, const struct hivex_visitor *visitor, size_t len, void *opaque);
1152 Same as C<hivex_visit> but instead of starting out at the root, this
1157 =head1 THE STRUCTURE OF THE WINDOWS REGISTRY
1159 Note: To understand the relationship between hives and the common
1160 Windows Registry keys (like C<HKEY_LOCAL_MACHINE>) please see the
1161 Wikipedia page on the Windows Registry.
1163 The Windows Registry is split across various binary files, each
1164 file being known as a \"hive\". This library only handles a single
1165 hive file at a time.
1167 Hives are n-ary trees with a single root. Each node in the tree
1170 Each node in the tree (including non-leaf nodes) may have an
1171 arbitrary list of (key, value) pairs attached to it. It may
1172 be the case that one of these pairs has an empty key. This
1173 is referred to as the default key for the node.
1175 The (key, value) pairs are the place where the useful data is
1176 stored in the registry. The key is always a string (possibly the
1177 empty string for the default key). The value is a typed object
1178 (eg. string, int32, binary, etc.).
1180 =head2 RELATIONSHIP TO .REG FILES
1182 Although this library does not care about or deal with Windows reg
1183 files, it's useful to look at the relationship between the registry
1184 itself and reg files because they are so common.
1186 A reg file is a text representation of the registry, or part of the
1187 registry. The actual registry hives that Windows uses are binary
1188 files. There are a number of Windows and Linux tools that let you
1189 generate reg files, or merge reg files back into the registry hives.
1190 Notable amongst them is Microsoft's REGEDIT program (formerly known as
1193 A typical reg file will contain many sections looking like this:
1195 [HKEY_LOCAL_MACHINE\\SOFTWARE\\Classes\\Stack]
1196 \"@\"=\"Generic Stack\"
1197 \"TileInfo\"=\"prop:System.FileCount\"
1198 \"TilePath\"=str(2):\"%%systemroot%%\\\\system32\"
1199 \"ThumbnailCutoff\"=dword:00000000
1200 \"FriendlyTypeName\"=hex(2):40,00,25,00,53,00,79,00,73,00,74,00,65,00,6d,00,52,00,6f,00,\\
1201 6f,00,74,00,25,00,5c,00,53,00,79,00,73,00,74,00,65,00,6d,00,\\
1202 33,00,32,00,5c,00,73,00,65,00,61,00,72,00,63,00,68,00,66,00,\\
1203 6f,00,6c,00,64,00,65,00,72,00,2e,00,64,00,6c,00,6c,00,2c,00,\\
1204 2d,00,39,00,30,00,32,00,38,00,00,00,d8
1206 Taking this one piece at a time:
1208 [HKEY_LOCAL_MACHINE\\SOFTWARE\\Classes\\Stack]
1210 This is the path to this node in the registry tree. The first part,
1211 C<HKEY_LOCAL_MACHINE\\SOFTWARE> means that this comes from a hive
1212 (file) called C<SOFTWARE>. C<\\Classes\\Stack> is the real path part,
1213 starting at the root node of the C<SOFTWARE> hive.
1215 Below the node name is a list of zero or more key-value pairs. Any
1216 interior or leaf node in the registry may have key-value pairs
1219 \"@\"=\"Generic Stack\"
1221 This is the \"default key\". In reality (ie. inside the binary hive)
1222 the key string is the empty string. In reg files this is written as
1223 C<@> but this has no meaning either in the hives themselves or in this
1224 library. The value is a string (type 1 - see C<enum hive_type>
1227 \"TileInfo\"=\"prop:System.FileCount\"
1229 This is a regular (key, value) pair, with the value being a type 1
1230 string. Note that inside the binary file the string is likely to be
1231 UTF-16 encoded. This library converts to and from UTF-8 strings
1234 \"TilePath\"=str(2):\"%%systemroot%%\\\\system32\"
1236 The value in this case has type 2 (expanded string) meaning that some
1237 %%...%% variables get expanded by Windows. (This library doesn't know
1238 or care about variable expansion).
1240 \"ThumbnailCutoff\"=dword:00000000
1242 The value in this case is a dword (type 4).
1244 \"FriendlyTypeName\"=hex(2):40,00,....
1246 This value is an expanded string (type 2) represented in the reg file
1247 as a series of hex bytes. In this case the string appears to be a
1250 =head1 NOTE ON THE USE OF ERRNO
1252 Many functions in this library set errno to indicate errors. These
1253 are the values of errno you may encounter (this list is not
1260 Corrupt or unsupported Registry file format.
1268 Passed an invalid argument to the function.
1272 Followed a Registry pointer which goes outside
1273 the registry or outside a registry block.
1277 Registry contains cycles.
1281 Field in the registry out of range.
1285 Registry key already exists.
1289 Tried to write to a registry which is not opened for writing.
1293 =head1 ENVIRONMENT VARIABLES
1299 Setting HIVEX_DEBUG=1 will enable very verbose messages. This is
1300 useful for debugging problems with the library itself.
1310 L<http://libguestfs.org/>,
1313 L<http://en.wikipedia.org/wiki/Windows_Registry>.
1317 Richard W.M. Jones (C<rjones at redhat dot com>)
1321 Copyright (C) 2009-2010 Red Hat Inc.
1323 Derived from code by Petter Nordahl-Hagen under a compatible license:
1324 Copyright (C) 1997-2007 Petter Nordahl-Hagen.
1326 Derived from code by Markus Stephany under a compatible license:
1327 Copyright (C) 2000-2004 Markus Stephany.
1329 This library is free software; you can redistribute it and/or
1330 modify it under the terms of the GNU Lesser General Public
1331 License as published by the Free Software Foundation;
1332 version 2.1 of the License only.
1334 This library is distributed in the hope that it will be useful,
1335 but WITHOUT ANY WARRANTY; without even the implied warranty of
1336 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1337 Lesser General Public License for more details.
1340 (* Generate the linker script which controls the visibility of
1341 * symbols in the public ABI and ensures no other symbols get
1342 * exported accidentally.
1344 and generate_linker_script () =
1345 generate_header HashStyle GPLv2plus;
1353 List.map (fun (name, _, _, _) -> "hivex_" ^ name)
1355 let globals = List.sort compare (globals @ functions) in
1359 List.iter (pr " %s;\n") globals;
1366 and generate_ocaml_interface () =
1367 generate_header OCamlStyle LGPLv2plus;
1371 (** A [hive_h] hive file handle. *)
1375 (** Nodes and values. *)
1377 exception Error of string * Unix.error * string
1378 (** Error raised by a function.
1380 The first parameter is the name of the function which raised the error.
1381 The second parameter is the errno (see the [Unix] module). The third
1382 parameter is a human-readable string corresponding to the errno.
1384 See hivex(3) for a partial list of interesting errno values that
1385 can be generated by the library. *)
1386 exception Handle_closed of string
1387 (** This exception is raised if you call a function on a closed handle. *)
1393 fun (t, _, new_style, description) ->
1395 pr " | REG_%s (** %s *)\n" new_style description
1399 | REG_UNKNOWN of int32 (** unknown type *)
1400 (** Hive type field. *)
1406 fun (v, flag, description) ->
1407 assert (1 lsl i = v);
1408 pr " | OPEN_%s (** %s *)\n" flag description
1412 (** Open flags for {!open_file} call. *)
1419 (** (key, value) pair passed (as an array) to {!node_set_values}. *)
1423 fun (name, style, shortdesc, _) ->
1425 generate_ocaml_prototype name style;
1426 pr "(** %s *)\n" shortdesc
1429 and generate_ocaml_implementation () =
1430 generate_header OCamlStyle LGPLv2plus;
1437 exception Error of string * Unix.error * string
1438 exception Handle_closed of string
1440 (* Give the exceptions names, so they can be raised from the C code. *)
1442 Callback.register_exception \"ocaml_hivex_error\"
1443 (Error (\"\", Unix.EUNKNOWNERR 0, \"\"));
1444 Callback.register_exception \"ocaml_hivex_closed\" (Handle_closed \"\")
1450 fun (t, _, new_style, _) ->
1452 pr " | REG_%s\n" new_style
1456 | REG_UNKNOWN of int32
1462 fun (v, flag, description) ->
1463 assert (1 lsl i = v);
1464 pr " | OPEN_%s (** %s *)\n" flag description
1478 fun (name, style, _, _) ->
1479 generate_ocaml_prototype ~is_external:true name style
1482 and generate_ocaml_prototype ?(is_external = false) name style =
1483 let ocaml_name = if name = "open" then "open_file" else name in
1485 if is_external then pr "external " else pr "val ";
1486 pr "%s : " ocaml_name;
1489 | AHive -> pr "t -> "
1490 | ANode _ -> pr "node -> "
1491 | AValue _ -> pr "value -> "
1492 | AString _ -> pr "string -> "
1493 | AStringNullable _ -> pr "string option -> "
1494 | AOpenFlags -> pr "open_flag list -> "
1495 | AUnusedFlags -> ()
1496 | ASetValues -> pr "set_value array -> "
1497 | ASetValue -> pr "set_value -> "
1499 (match fst style with
1500 | RErr -> pr "unit" (* all errors are turned into exceptions *)
1501 | RErrDispose -> pr "unit"
1503 | RNode -> pr "node"
1504 | RNodeNotFound -> pr "node"
1505 | RNodeList -> pr "node array"
1506 | RValue -> pr "value"
1507 | RValueList -> pr "value array"
1508 | RString -> pr "string"
1509 | RStringList -> pr "string array"
1510 | RLenType -> pr "hive_type * int"
1511 | RLenTypeVal -> pr "hive_type * string"
1512 | RInt32 -> pr "int32"
1513 | RInt64 -> pr "int64"
1516 pr " = \"ocaml_hivex_%s\"" name;
1519 and generate_ocaml_c () =
1520 generate_header CStyle LGPLv2plus;
1531 #include <caml/config.h>
1532 #include <caml/alloc.h>
1533 #include <caml/callback.h>
1534 #include <caml/custom.h>
1535 #include <caml/fail.h>
1536 #include <caml/memory.h>
1537 #include <caml/mlvalues.h>
1538 #include <caml/signals.h>
1540 #ifdef HAVE_CAML_UNIXSUPPORT_H
1541 #include <caml/unixsupport.h>
1543 extern value unix_error_of_code (int errcode);
1546 #ifndef HAVE_CAML_RAISE_WITH_ARGS
1548 caml_raise_with_args (value tag, int nargs, value args[])
1551 CAMLxparamN (args, nargs);
1555 bucket = caml_alloc_small (1 + nargs, 0);
1556 Field(bucket, 0) = tag;
1557 for (i = 0; i < nargs; i++) Field(bucket, 1 + i) = args[i];
1565 #define Hiveh_val(v) (*((hive_h **)Data_custom_val(v)))
1566 static value Val_hiveh (hive_h *);
1567 static int HiveOpenFlags_val (value);
1568 static hive_set_value *HiveSetValue_val (value);
1569 static hive_set_value *HiveSetValues_val (value);
1570 static hive_type HiveType_val (value);
1571 static value Val_hive_type (hive_type);
1572 static value copy_int_array (size_t *);
1573 static value copy_type_len (size_t, hive_type);
1574 static value copy_type_value (const char *, size_t, hive_type);
1575 static void raise_error (const char *) Noreturn;
1576 static void raise_closed (const char *) Noreturn;
1582 fun (name, style, _, _) ->
1583 pr "/* Automatically generated wrapper for function\n";
1584 pr " * "; generate_ocaml_prototype name style;
1590 | ASetValues -> ["nrvalues"; "values"]
1591 | AUnusedFlags -> ["0"]
1592 | arg -> [name_of_argt arg]) (snd style) in
1594 match fst style with
1595 | RLenType | RLenTypeVal -> c_params @ [["&t"; "&len"]]
1597 let c_params = List.concat c_params in
1600 filter_map (function
1601 | AUnusedFlags -> None
1602 | arg -> Some (name_of_argt arg ^ "v")) (snd style) in
1604 pr "/* Emit prototype to appease gcc's -Wmissing-prototypes. */\n";
1605 pr "CAMLprim value ocaml_hivex_%s (value %s" name (List.hd params);
1606 List.iter (pr ", value %s") (List.tl params); pr ");\n";
1609 pr "CAMLprim value\n";
1610 pr "ocaml_hivex_%s (value %s" name (List.hd params);
1611 List.iter (pr ", value %s") (List.tl params);
1615 pr " CAMLparam%d (%s);\n"
1616 (List.length params) (String.concat ", " params);
1617 pr " CAMLlocal1 (rv);\n";
1623 pr " hive_h *h = Hiveh_val (hv);\n";
1624 pr " if (h == NULL)\n";
1625 pr " raise_closed (\"%s\");\n" name
1627 pr " hive_node_h %s = Int_val (%sv);\n" n n
1629 pr " hive_value_h %s = Int_val (%sv);\n" n n
1631 pr " const char *%s = String_val (%sv);\n" n n
1632 | AStringNullable n ->
1633 pr " const char *%s =\n" n;
1634 pr " %sv != Val_int (0) ? String_val (Field (%sv, 0)) : NULL;\n"
1637 pr " int flags = HiveOpenFlags_val (flagsv);\n"
1638 | AUnusedFlags -> ()
1640 pr " int nrvalues = Wosize_val (valuesv);\n";
1641 pr " hive_set_value *values = HiveSetValues_val (valuesv);\n"
1643 pr " hive_set_value *val = HiveSetValue_val (valv);\n"
1648 match fst style with
1649 | RErr -> pr " int r;\n"; "-1"
1650 | RErrDispose -> pr " int r;\n"; "-1"
1651 | RHive -> pr " hive_h *r;\n"; "NULL"
1652 | RNode -> pr " hive_node_h r;\n"; "0"
1655 pr " hive_node_h r;\n";
1657 | RNodeList -> pr " hive_node_h *r;\n"; "NULL"
1658 | RValue -> pr " hive_value_h r;\n"; "0"
1659 | RValueList -> pr " hive_value_h *r;\n"; "NULL"
1660 | RString -> pr " char *r;\n"; "NULL"
1661 | RStringList -> pr " char **r;\n"; "NULL"
1664 pr " size_t len;\n";
1665 pr " hive_type t;\n";
1669 pr " size_t len;\n";
1670 pr " hive_type t;\n";
1679 "-1 && errno != 0" in
1681 (* The libguestfs OCaml bindings call enter_blocking_section
1682 * here. However I don't think that is safe, because we are
1683 * holding pointers to caml strings during the call, and these
1684 * could be moved or freed by other threads. In any case, there
1685 * is very little reason to enter_blocking_section for any hivex
1686 * call, so don't do it. XXX
1688 (*pr " caml_enter_blocking_section ();\n";*)
1689 pr " r = hivex_%s (%s" name (List.hd c_params);
1690 List.iter (pr ", %s") (List.tl c_params);
1692 (*pr " caml_leave_blocking_section ();\n";*)
1695 (* Dispose of the hive handle (even if hivex_close returns error). *)
1696 (match fst style with
1698 pr " /* So we don't double-free in the finalizer. */\n";
1699 pr " Hiveh_val (hv) = NULL;\n";
1706 | AHive | ANode _ | AValue _ | AString _ | AStringNullable _
1707 | AOpenFlags | AUnusedFlags -> ()
1709 pr " free (values);\n";
1712 pr " free (val);\n";
1716 (* Check for errors. *)
1717 pr " if (r == %s)\n" error_code;
1718 pr " raise_error (\"%s\");\n" name;
1721 (match fst style with
1722 | RErr -> pr " rv = Val_unit;\n"
1723 | RErrDispose -> pr " rv = Val_unit;\n"
1724 | RHive -> pr " rv = Val_hiveh (r);\n"
1725 | RNode -> pr " rv = Val_int (r);\n"
1727 pr " if (r == 0)\n";
1728 pr " caml_raise_not_found ();\n";
1730 pr " rv = Val_int (r);\n"
1732 pr " rv = copy_int_array (r);\n";
1734 | RValue -> pr " rv = Val_int (r);\n"
1736 pr " rv = copy_int_array (r);\n";
1739 pr " rv = caml_copy_string (r);\n";
1742 pr " rv = caml_copy_string_array ((const char **) r);\n";
1743 pr " for (int i = 0; r[i] != NULL; ++i) free (r[i]);\n";
1745 | RLenType -> pr " rv = copy_type_len (len, t);\n"
1747 pr " rv = copy_type_value (r, len, t);\n";
1749 | RInt32 -> pr " rv = caml_copy_int32 (r);\n"
1750 | RInt64 -> pr " rv = caml_copy_int32 (r);\n"
1753 pr " CAMLreturn (rv);\n";
1761 HiveOpenFlags_val (value v)
1766 while (v != Val_int (0)) {
1768 flags |= 1 << Int_val (v2);
1775 static hive_set_value *
1776 HiveSetValue_val (value v)
1778 hive_set_value *val = malloc (sizeof (hive_set_value));
1780 val->key = String_val (Field (v, 0));
1781 val->t = HiveType_val (Field (v, 1));
1782 val->len = caml_string_length (Field (v, 2));
1783 val->value = String_val (Field (v, 2));
1788 static hive_set_value *
1789 HiveSetValues_val (value v)
1791 size_t nr_values = Wosize_val (v);
1792 hive_set_value *values = malloc (nr_values * sizeof (hive_set_value));
1796 for (i = 0; i < nr_values; ++i) {
1798 values[i].key = String_val (Field (v2, 0));
1799 values[i].t = HiveType_val (Field (v2, 1));
1800 values[i].len = caml_string_length (Field (v2, 2));
1801 values[i].value = String_val (Field (v2, 2));
1808 HiveType_val (value v)
1811 return Int_val (v); /* REG_NONE etc. */
1813 return Int32_val (Field (v, 0)); /* REG_UNKNOWN of int32 */
1817 Val_hive_type (hive_type t)
1823 CAMLreturn (Val_int (t));
1825 rv = caml_alloc (1, 0); /* REG_UNKNOWN of int32 */
1826 v = caml_copy_int32 (t);
1827 caml_modify (&Field (rv, 0), v);
1833 copy_int_array (size_t *xs)
1839 for (nr = 0; xs[nr] != 0; ++nr)
1842 CAMLreturn (Atom (0));
1844 rv = caml_alloc (nr, 0);
1845 for (i = 0; i < nr; ++i) {
1846 v = Val_int (xs[i]);
1847 Store_field (rv, i, v); /* Safe because v is not a block. */
1854 copy_type_len (size_t len, hive_type t)
1859 rv = caml_alloc (2, 0);
1860 v = Val_hive_type (t);
1861 Store_field (rv, 0, v);
1863 Store_field (rv, 1, len);
1868 copy_type_value (const char *r, size_t len, hive_type t)
1873 rv = caml_alloc (2, 0);
1874 v = Val_hive_type (t);
1875 Store_field (rv, 0, v);
1876 v = caml_alloc_string (len);
1877 memcpy (String_val (v), r, len);
1878 caml_modify (&Field (rv, 1), len);
1882 /* Raise exceptions. */
1884 raise_error (const char *function)
1886 /* Save errno early in case it gets trashed. */
1890 CAMLlocal3 (v1, v2, v3);
1892 v1 = caml_copy_string (function);
1893 v2 = unix_error_of_code (err);
1894 v3 = caml_copy_string (strerror (err));
1895 value vvv[] = { v1, v2, v3 };
1896 caml_raise_with_args (*caml_named_value (\"ocaml_hivex_error\"), 3, vvv);
1902 raise_closed (const char *function)
1907 v = caml_copy_string (function);
1908 caml_raise_with_arg (*caml_named_value (\"ocaml_hivex_closed\"), v);
1913 /* Allocate handles and deal with finalization. */
1915 hivex_finalize (value hv)
1917 hive_h *h = Hiveh_val (hv);
1918 if (h) hivex_close (h);
1921 static struct custom_operations hivex_custom_operations = {
1922 (char *) \"hivex_custom_operations\",
1924 custom_compare_default,
1925 custom_hash_default,
1926 custom_serialize_default,
1927 custom_deserialize_default
1931 Val_hiveh (hive_h *h)
1936 rv = caml_alloc_custom (&hivex_custom_operations,
1937 sizeof (hive_h *), 0, 1);
1944 and generate_perl_pm () =
1945 generate_header HashStyle LGPLv2plus;
1952 Win::Hivex - Perl bindings for reading and writing Windows Registry hive files
1958 $h = Win::Hivex->open ('SOFTWARE');
1959 $root_node = $h->root ();
1960 print $h->node_name ($root_node);
1964 The C<Win::Hivex> module provides a Perl XS binding to the
1965 L<hivex(3)> API for reading and writing Windows Registry binary
1970 All errors turn into calls to C<croak> (see L<Carp(3)>).
1984 XSLoader::load ('Win::Hivex');
1988 $h = Win::Hivex->open ($filename,";
1992 pr "\n [%s => 1,]" (String.lowercase flag)
1997 Open a Windows Registry binary hive file.
1999 The C<verbose> and C<debug> flags enable different levels of
2002 The C<write> flag is required if you will be modifying the
2003 hive file (see L<hivex(3)/WRITING TO HIVE FILES>).
2005 This function returns a hive handle. The hive handle is
2006 closed automatically when its reference count drops to 0.
2012 my $class = ref ($proto) || $proto;
2013 my $filename = shift;
2020 fun (n, flag, description) ->
2021 pr " # %s\n" description;
2022 pr " $flags += %d if $flags{%s};\n" n (String.lowercase flag)
2027 my $self = Win::Hivex::_open ($filename, $flags);
2028 bless $self, $class;
2035 fun (name, style, _, longdesc) ->
2036 (* The close call isn't explicit in Perl: handles are closed
2037 * when their reference count drops to 0.
2039 * The open call is coded specially in Perl.
2041 * Therefore we don't generate prototypes for these two calls:
2043 if fst style <> RErrDispose && List.hd (snd style) = AHive then (
2044 let longdesc = replace_str longdesc "C<hivex_" "C<" in
2045 pr "=item %s\n\n " name;
2046 generate_perl_prototype name style;
2048 pr "%s\n\n" longdesc;
2050 (match fst style with
2062 This returns a node handle.\n\n"
2065 This returns a node handle, or C<undef> if the node was not found.\n\n"
2068 This returns a list of node handles.\n\n"
2071 This returns a value handle.\n\n"
2074 This returns a list of value handles.\n\n"
2077 if List.mem ASetValues (snd style) then
2078 pr "C<@values> is an array of (keys, value) pairs.
2079 Each element should be a hashref containing C<key>, C<t> (type)
2082 Any existing values stored at the node are discarded, and their
2083 C<value> handles become invalid. Thus you can remove all
2084 values stored at C<node> by passing C<@values = []>.\n\n"
2097 Copyright (C) %s Red Hat Inc.
2101 Please see the file COPYING.LIB for the full license.
2107 L<http://libguestfs.org>,
2113 and generate_perl_prototype name style =
2115 (match fst style with
2118 | RHive -> pr "$h = "
2120 | RNodeNotFound -> pr "$node = "
2121 | RNodeList -> pr "@nodes = "
2122 | RValue -> pr "$value = "
2123 | RValueList -> pr "@values = "
2124 | RString -> pr "$string = "
2125 | RStringList -> pr "@strings = "
2126 | RLenType -> pr "($type, $len) = "
2127 | RLenTypeVal -> pr "($type, $data) = "
2128 | RInt32 -> pr "$int32 = "
2129 | RInt64 -> pr "$int64 = "
2132 let args = List.tl (snd style) in
2134 (* AUnusedFlags is dropped in the bindings. *)
2135 let args = List.filter ((<>) AUnusedFlags) args in
2139 let comma = ref false in
2142 if !comma then pr ", "; comma := true;
2147 | AString n -> pr "$%s" n
2148 | AStringNullable n -> pr "[$%s|undef]" n
2149 | AOpenFlags -> pr "[flags]"
2150 | AUnusedFlags -> assert false
2151 | ASetValues -> pr "\\@values"
2152 | ASetValue -> pr "$val"
2157 and generate_perl_xs () =
2158 generate_header CStyle LGPLv2plus;
2161 #include \"EXTERN.h\"
2169 #define PRId64 \"lld\"
2173 my_newSVll(long long val) {
2174 #ifdef USE_64_BIT_ALL
2175 return newSViv(val);
2179 len = snprintf(buf, 100, \"%%\" PRId64, val);
2180 return newSVpv(buf, len);
2185 #define PRIu64 \"llu\"
2190 my_newSVull(unsigned long long val) {
2191 #ifdef USE_64_BIT_ALL
2192 return newSVuv(val);
2196 len = snprintf(buf, 100, \"%%\" PRIu64, val);
2197 return newSVpv(buf, len);
2203 /* http://www.perlmonks.org/?node_id=680842 */
2205 XS_unpack_charPtrPtr (SV *arg) {
2210 if (!arg || !SvOK (arg) || !SvROK (arg) || SvTYPE (SvRV (arg)) != SVt_PVAV)
2211 croak (\"array reference expected\");
2213 av = (AV *)SvRV (arg);
2214 ret = malloc ((av_len (av) + 1 + 1) * sizeof (char *));
2216 croak (\"malloc failed\");
2218 for (i = 0; i <= av_len (av); i++) {
2219 SV **elem = av_fetch (av, i, 0);
2221 if (!elem || !*elem)
2222 croak (\"missing element in list\");
2224 ret[i] = SvPV_nolen (*elem);
2233 /* Handle set_values parameter. */
2234 typedef struct pl_set_values {
2236 hive_set_value *values;
2239 static pl_set_values
2240 unpack_pl_set_values (SV *sv)
2246 if (!sv || !SvOK (sv) || !SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
2247 croak (\"array reference expected\");
2249 av = (AV *)SvRV(sv);
2250 ret.nr_values = av_len (av) + 1;
2251 ret.values = malloc (ret.nr_values * sizeof (hive_set_value));
2253 croak (\"malloc failed\");
2255 for (i = 0; i <= av_len (av); i++) {
2256 SV **hvp = av_fetch (av, i, 0);
2258 if (!hvp || !*hvp || !SvROK (*hvp) || SvTYPE (SvRV (*hvp)) != SVt_PVHV)
2259 croak (\"missing element in list or not a hash ref\");
2261 HV *hv = (HV *)SvRV(*hvp);
2264 svp = hv_fetch (hv, \"key\", 3, 0);
2266 croak (\"missing 'key' in hash\");
2267 ret.values[i].key = SvPV_nolen (*svp);
2269 svp = hv_fetch (hv, \"t\", 1, 0);
2271 croak (\"missing 't' in hash\");
2272 ret.values[i].t = SvIV (*svp);
2274 svp = hv_fetch (hv, \"value\", 5, 0);
2276 croak (\"missing 'value' in hash\");
2277 ret.values[i].value = SvPV (*svp, ret.values[i].len);
2283 static hive_set_value *
2284 unpack_set_value (SV *sv)
2286 hive_set_value *ret;
2288 if (!sv || !SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVHV)
2289 croak (\"not a hash ref\");
2291 ret = malloc (sizeof (hive_set_value));
2293 croak (\"malloc failed\");
2295 HV *hv = (HV *)SvRV(sv);
2298 svp = hv_fetch (hv, \"key\", 3, 0);
2300 croak (\"missing 'key' in hash\");
2301 ret->key = SvPV_nolen (*svp);
2303 svp = hv_fetch (hv, \"t\", 1, 0);
2305 croak (\"missing 't' in hash\");
2306 ret->t = SvIV (*svp);
2308 svp = hv_fetch (hv, \"value\", 5, 0);
2310 croak (\"missing 'value' in hash\");
2311 ret->value = SvPV (*svp, ret->len);
2316 MODULE = Win::Hivex PACKAGE = Win::Hivex
2321 _open (filename, flags)
2325 RETVAL = hivex_open (filename, flags);
2327 croak (\"hivex_open: %%s: %%s\", filename, strerror (errno));
2335 if (hivex_close (h) == -1)
2336 croak (\"hivex_close: %%s\", strerror (errno));
2341 fun (name, style, _, longdesc) ->
2342 (* The close and open calls are handled specially above. *)
2343 if fst style <> RErrDispose && List.hd (snd style) = AHive then (
2344 (match fst style with
2345 | RErr -> pr "void\n"
2346 | RErrDispose -> failwith "perl bindings cannot handle a call which disposes of the handle"
2347 | RHive -> failwith "perl bindings cannot handle a call which returns a handle"
2351 | RString -> pr "SV *\n"
2356 | RLenTypeVal -> pr "void\n"
2357 | RInt32 -> pr "SV *\n"
2358 | RInt64 -> pr "SV *\n"
2361 (* Call and arguments. *)
2363 filter_map (function
2364 | AUnusedFlags -> None
2365 | arg -> Some (name_of_argt arg)) (snd style) in
2369 | AUnusedFlags -> "0"
2370 | ASetValues -> "values.nr_values, values.values"
2371 | arg -> name_of_argt arg) (snd style) in
2373 pr "%s (%s)\n" name (String.concat ", " perl_params);
2384 | AStringNullable n ->
2385 (* http://www.perlmonks.org/?node_id=554277 *)
2386 pr " char *%s = SvOK(ST(%d)) ? SvPV_nolen(ST(%d)) : NULL;\n" n i i
2389 | AUnusedFlags -> ()
2391 pr " pl_set_values values = unpack_pl_set_values (ST(%d));\n" i
2393 pr " hive_set_value *val = unpack_set_value (ST(%d));\n" i
2400 pr " free (values.values);\n"
2403 | AHive | ANode _ | AValue _ | AString _ | AStringNullable _
2404 | AOpenFlags | AUnusedFlags -> ()
2409 (match fst style with
2414 pr " r = hivex_%s (%s);\n"
2415 name (String.concat ", " c_params);
2417 pr " if (r == -1)\n";
2418 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2421 | RErrDispose -> assert false
2422 | RHive -> assert false
2428 pr " /* hive_node_h = hive_value_h = size_t so we cheat\n";
2429 pr " here to simplify the generator */\n";
2432 pr " r = hivex_%s (%s);\n"
2433 name (String.concat ", " c_params);
2435 pr " if (r == 0)\n";
2436 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2438 pr " RETVAL = newSViv (r);\n";
2444 pr " hive_node_h r;\n";
2447 pr " r = hivex_%s (%s);\n"
2448 name (String.concat ", " c_params);
2450 pr " if (r == 0 && errno != 0)\n";
2451 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2453 pr " if (r == 0)\n";
2454 pr " RETVAL = &PL_sv_undef;\n";
2456 pr " RETVAL = newSViv (r);\n";
2464 pr " r = hivex_%s (%s);\n"
2465 name (String.concat ", " c_params);
2467 pr " if (r == NULL)\n";
2468 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2470 pr " RETVAL = newSVpv (r, 0);\n";
2481 pr " r = hivex_%s (%s);\n"
2482 name (String.concat ", " c_params);
2484 pr " if (r == NULL)\n";
2485 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2487 pr " for (n = 0; r[n] != 0; ++n) /**/;\n";
2488 pr " EXTEND (SP, n);\n";
2489 pr " for (i = 0; i < n; ++i)\n";
2490 pr " PUSHs (sv_2mortal (newSViv (r[i])));\n";
2498 pr " r = hivex_%s (%s);\n"
2499 name (String.concat ", " c_params);
2501 pr " if (r == NULL)\n";
2502 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2504 pr " for (n = 0; r[n] != NULL; ++n) /**/;\n";
2505 pr " EXTEND (SP, n);\n";
2506 pr " for (i = 0; i < n; ++i) {\n";
2507 pr " PUSHs (sv_2mortal (newSVpv (r[i], 0)));\n";
2508 pr " free (r[i]);\n";
2515 pr " size_t len;\n";
2516 pr " hive_type type;\n";
2518 pr " r = hivex_%s (%s, &type, &len);\n"
2519 name (String.concat ", " c_params);
2521 pr " if (r == -1)\n";
2522 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2524 pr " EXTEND (SP, 2);\n";
2525 pr " PUSHs (sv_2mortal (newSViv (type)));\n";
2526 pr " PUSHs (sv_2mortal (newSViv (len)));\n";
2531 pr " size_t len;\n";
2532 pr " hive_type type;\n";
2534 pr " r = hivex_%s (%s, &type, &len);\n"
2535 name (String.concat ", " c_params);
2537 pr " if (r == NULL)\n";
2538 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2540 pr " EXTEND (SP, 2);\n";
2541 pr " PUSHs (sv_2mortal (newSViv (type)));\n";
2542 pr " PUSHs (sv_2mortal (newSVpvn (r, len)));\n";
2550 pr " r = hivex_%s (%s);\n"
2551 name (String.concat ", " c_params);
2553 pr " if (r == -1 && errno != 0)\n";
2554 pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
2556 pr " RETVAL = my_newSVll (r);\n";
2564 and generate_python_py () =
2565 generate_header HashStyle LGPLv2plus
2567 and generate_python_c () =
2568 generate_header CStyle LGPLv2plus
2570 let output_to filename k =
2571 let filename_new = filename ^ ".new" in
2572 chan := open_out filename_new;
2575 chan := Pervasives.stdout;
2577 (* Is the new file different from the current file? *)
2578 if Sys.file_exists filename && files_equal filename filename_new then
2579 unlink filename_new (* same, so skip it *)
2581 (* different, overwrite old one *)
2582 (try chmod filename 0o644 with Unix_error _ -> ());
2583 rename filename_new filename;
2584 chmod filename 0o444;
2585 printf "written %s\n%!" filename;
2588 let perror msg = function
2589 | Unix_error (err, _, _) ->
2590 eprintf "%s: %s\n" msg (error_message err)
2592 eprintf "%s: %s\n" msg (Printexc.to_string exn)
2597 try openfile "configure.ac" [O_RDWR] 0
2599 | Unix_error (ENOENT, _, _) ->
2601 You are probably running this from the wrong directory.
2602 Run it from the top source directory using the command
2603 generator/generator.ml
2607 perror "open: configure.ac" exn;
2610 (* Acquire a lock so parallel builds won't try to run the generator
2611 * twice at the same time. Subsequent builds will wait for the first
2612 * one to finish. Note the lock is released implicitly when the
2615 (try lockf lock_fd F_LOCK 1
2617 perror "lock: configure.ac" exn;
2622 output_to "lib/hivex.h" generate_c_header;
2623 output_to "lib/hivex.pod" generate_c_pod;
2625 output_to "lib/hivex.syms" generate_linker_script;
2627 output_to "ocaml/hivex.mli" generate_ocaml_interface;
2628 output_to "ocaml/hivex.ml" generate_ocaml_implementation;
2629 output_to "ocaml/hivex_c.c" generate_ocaml_c;
2631 output_to "perl/lib/Win/Hivex.pm" generate_perl_pm;
2632 output_to "perl/Hivex.xs" generate_perl_xs;
2635 We ran out of time before we could write the Python bindings.
2636 output_to "python/hivex.py" generate_python_py;
2637 output_to "python/hivex-py.c" generate_python_c;
2640 (* Always generate this file last, and unconditionally. It's used
2641 * by the Makefile to know when we must re-run the generator.
2643 let chan = open_out "generator/stamp-generator" in
2647 printf "generated %d lines of code\n" !lines