#!/usr/bin/env ocaml
(* hivex
- * Copyright (C) 2009-2010 Red Hat Inc.
+ * Copyright (C) 2009-2011 Red Hat Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
(* This script generates language bindings and some documentation for
* hivex.
- *
+ *
* After editing this file, run it (./generator/generator.ml) to
* regenerate all the output files. 'make' will rerun this
* automatically when necessary. Note that if you are using a separate
* build directory you must run generator.ml from the _source_
* directory.
- *
+ *
* IMPORTANT: This script should NOT print any warnings. If it prints
* warnings, you should treat them as errors.
- *
+ *
* OCaml tips: (1) In emacs, install tuareg-mode to display and format
* OCaml code correctly. 'vim' comes with a good OCaml editing mode by
* default. (2) Read the resources at http://ocaml-tutorial.org/
#load "unix.cma";;
#load "str.cma";;
-#directory "+xml-light";;
-#load "xml-light.cma";;
open Unix
open Printf
| RErr (* 0 = ok, -1 = error *)
| RErrDispose (* Disposes handle, see hivex_close. *)
| RHive (* Returns a hive_h or NULL. *)
+ | RSize (* Returns size_t or 0. *)
| RNode (* Returns hive_node_h or 0. *)
| RNodeNotFound (* See hivex_node_get_child. *)
| RNodeList (* Returns hive_node_h* or NULL. *)
(* Hive types, from:
* https://secure.wikimedia.org/wikipedia/en/wiki/Windows_Registry#Keys_and_values
- *
+ *
* It's unfortunate that in our original C binding we strayed away from
* the names that Windows uses (eg. REG_SZ for strings). We include
* both our names and the Windows names.
"root", (RNode, [AHive]),
"return the root node of the hive",
"\
-Return root node of the hive. All valid registries must contain
-a root node.";
+Return root node of the hive. All valid hives must contain a root node.";
+
+ "last_modified", (RInt64, [AHive]),
+ "return the modification time from the header of the hive",
+ "\
+Return the modification time from the header of the hive.
+
+The returned value is a Windows filetime.
+To convert this to a Unix C<time_t> see:
+L<http://stackoverflow.com/questions/6161776/convert-windows-filetime-to-second-in-unix-linux/6161842#6161842>";
"node_name", (RString, [AHive; ANode "node"]),
"return the name of the node",
file this hive originally comes from, which is knowledge that is
outside the scope of this library.";
+ "node_timestamp", (RInt64, [AHive; ANode "node"]),
+ "return the modification time of the node",
+ "\
+Return the modification time of the node.
+
+The returned value is a Windows filetime.
+To convert this to a Unix C<time_t> see:
+L<http://stackoverflow.com/questions/6161776/convert-windows-filetime-to-second-in-unix-linux/6161842#6161842>";
+
"node_children", (RNodeList, [AHive; ANode "node"]),
"return children of node",
"\
inside hives that has no meaning and won't give you the
default key.";
+ "value_key_len", (RSize, [AHive; AValue "val"]),
+ "return the length of a value's key",
+ "\
+Return the length of the key (name) of a (key, value) pair. The
+length can legitimately be 0, so errno is the necesary mechanism
+to check for errors.
+
+In the context of Windows Registries, a zero-length name means
+that this value is the default key for this node in the tree.
+This is usually written as C<\"@\">.";
+
"value_key", (RString, [AHive; AValue "val"]),
"return the key of a (key, value) pair",
"\
"node_set_value", (RErr, [AHive; ANode "node"; ASetValue; AUnusedFlags]),
"set a single (key, value) pair at a given node",
"\
-This call can be used to replace a single (key, value) pair
-stored in C<node>. If the key does not already exist, then a
-new key is added. Key matching is case insensitive.
+This call can be used to replace a single C<(key, value)> pair
+stored in C<node>. If the key does not already exist, then a
+new key is added. Key matching is case insensitive.
C<node> is the node to modify.";
]
-(* Used to memoize the result of pod2text. *)
-let pod2text_memo_filename = "generator/.pod2text.data"
-let pod2text_memo : ((int * string * string), string list) Hashtbl.t =
- try
- let chan = open_in pod2text_memo_filename in
- let v = input_value chan in
- close_in chan;
- v
- with
- _ -> Hashtbl.create 13
-let pod2text_memo_updated () =
- let chan = open_out pod2text_memo_filename in
- output_value chan pod2text_memo;
- close_out chan
-
(* Useful functions.
* Note we don't want to use any external OCaml libraries which
* makes this a bit harder than it should be.
let trim ?(test = isspace) str =
trimr ~test (triml ~test str)
+(* Used to memoize the result of pod2text. *)
+let pod2text_memo_filename = "generator/.pod2text.data.version.2"
+let pod2text_memo : ((int option * bool * bool * string * string), string list) Hashtbl.t =
+ try
+ let chan = open_in pod2text_memo_filename in
+ let v = input_value chan in
+ close_in chan;
+ v
+ with
+ _ -> Hashtbl.create 13
+let pod2text_memo_updated () =
+ let chan = open_out pod2text_memo_filename in
+ output_value chan pod2text_memo;
+ close_out chan
+
+(* Useful if you need the longdesc POD text as plain text. Returns a
+ * list of lines.
+ *
+ * Because this is very slow (the slowest part of autogeneration),
+ * we memoize the results.
+ *)
+let pod2text ?width ?(trim = true) ?(discard = true) name longdesc =
+ let key = width, trim, discard, name, longdesc in
+ try Hashtbl.find pod2text_memo key
+ with Not_found ->
+ let filename, chan = Filename.open_temp_file "gen" ".tmp" in
+ fprintf chan "=head1 %s\n\n%s\n" name longdesc;
+ close_out chan;
+ let cmd =
+ match width with
+ | Some width ->
+ sprintf "pod2text -w %d %s" width (Filename.quote filename)
+ | None ->
+ sprintf "pod2text %s" (Filename.quote filename) in
+ let chan = open_process_in cmd in
+ let lines = ref [] in
+ let rec loop i =
+ let line = input_line chan in
+ if i = 1 && discard then (* discard the first line of output *)
+ loop (i+1)
+ else (
+ let line = if trim then triml line else line in
+ lines := line :: !lines;
+ loop (i+1)
+ ) in
+ let lines = try loop 1 with End_of_file -> List.rev !lines in
+ unlink filename;
+ (match close_process_in chan with
+ | WEXITED 0 -> ()
+ | WEXITED i ->
+ failwithf "pod2text: process exited with non-zero status (%d)" i
+ | WSIGNALED i | WSTOPPED i ->
+ failwithf "pod2text: process signalled or stopped by signal %d" i
+ );
+ Hashtbl.add pod2text_memo key lines;
+ pod2text_memo_updated ();
+ lines
+
let rec find s sub =
let len = String.length s in
let sublen = String.length sub in
#ifndef HIVEX_H_
#define HIVEX_H_
+#include <stdlib.h>
#include <stdint.h>
#ifdef __cplusplus
typedef size_t hive_node_h;
typedef size_t hive_value_h;
+#include <errno.h>
+#ifdef ENOKEY
+# define HIVEX_NO_KEY ENOKEY
+#else
+# define HIVEX_NO_KEY ENOENT
+#endif
+
/* Pre-defined types. */
enum hive_type {
";
| RErr -> pr "int "
| RErrDispose -> pr "int "
| RHive -> pr "hive_h *"
+ | RSize -> pr "size_t "
| RNode -> pr "hive_node_h "
| RNodeNotFound -> pr "hive_node_h "
| RNodeList -> pr "hive_node_h *"
=head1 SYNOPSIS
#include <hivex.h>
-
+
";
List.iter (
fun (shortname, style, _, _) ->
=head1 DESCRIPTION
-libhivex is a library for extracting the contents of Windows Registry
+Hivex is a library for extracting the contents of Windows Registry
\"hive\" files. It is designed to be secure against buggy or malicious
registry files.
-Unlike many other tools in this area, it doesn't use the textual .REG
-format for output, because parsing that is as much trouble as parsing
-the original binary format. Instead it makes the file available
-through a C API, or there is a separate program to export the hive as
-XML (see L<hivexml(1)>), or to navigate the file (see L<hivexsh(1)>).
+Unlike other tools in this area, it doesn't use the textual .REG
+format, because parsing that is as much trouble as parsing the
+original binary format. Instead it makes the file available
+through a C API, and then wraps this API in higher level scripting
+and GUI tools.
+
+There is a separate program to export the hive as XML
+(see L<hivexml(1)>), or to navigate the file (see L<hivexsh(1)>).
+There is also a Perl script to export and merge the
+file as a textual .REG (regedit) file, see L<hivexregedit(1)>.
+
+If you just want to export or modify the Registry of a Windows
+virtual machine, you should look at L<virt-win-reg(1)>.
+
+Hivex is also comes with language bindings for
+OCaml, Perl, Python and Ruby.
=head1 TYPES
-=head2 hive_h *
+=head2 C<hive_h *>
This handle describes an open hive file.
-=head2 hive_node_h
+=head2 C<hive_node_h>
This is a node handle, an integer but opaque outside the library.
Valid node handles cannot be 0. The library returns 0 in some
situations to indicate an error.
-=head2 hive_type
+=head2 C<hive_type>
The enum below describes the possible types for the value(s)
stored at each node. Note that you should not trust the
pr "\
};
-=head2 hive_value_h
+=head2 C<hive_value_h>
This is a value handle, an integer but opaque outside the library.
Valid value handles cannot be 0. The library returns 0 in some
situations to indicate an error.
-=head2 hive_set_value
+=head2 C<hive_set_value>
The typedef C<hive_set_value> is used in conjunction with the
C<hivex_node_set_values> call described below.
fun (shortname, style, _, longdesc) ->
let name = "hivex_" ^ shortname in
pr "=head2 %s\n" name;
- pr "\n";
+ pr "\n ";
generate_c_prototype ~extern:false name style;
pr "\n";
pr "%s\n" longdesc;
pr "\
Returns a new hive handle.
On error this returns NULL and sets errno.\n\n"
+ | RSize ->
+ pr "\
+Returns a size.
+On error this returns 0 and sets errno.\n\n"
| RNode ->
pr "\
Returns a node handle.
=item *
Creating a new hive file from scratch. This is impossible at present
-because not all fields in the header are understood.
+because not all fields in the header are understood. In the hivex
+source tree is a file called C<images/minimal> which could be used as
+the basis for a new hive (but I<caveat emptor>).
=item *
=head2 RELATIONSHIP TO .REG FILES
-Although this library does not care about or deal with Windows reg
-files, it's useful to look at the relationship between the registry
-itself and reg files because they are so common.
+The hivex C library does not care about or deal with Windows .REG
+files. Instead we push this complexity up to the Perl
+L<Win::Hivex(3)> library and the Perl programs
+L<hivexregedit(1)> and L<virt-win-reg(1)>.
+Nevertheless it is useful to look at the relationship between the
+Registry and .REG files because they are so common.
-A reg file is a text representation of the registry, or part of the
+A .REG file is a textual representation of the registry, or part of the
registry. The actual registry hives that Windows uses are binary
files. There are a number of Windows and Linux tools that let you
-generate reg files, or merge reg files back into the registry hives.
+generate .REG files, or merge .REG files back into the registry hives.
Notable amongst them is Microsoft's REGEDIT program (formerly known as
REGEDT32).
-A typical reg file will contain many sections looking like this:
+A typical .REG file will contain many sections looking like this:
[HKEY_LOCAL_MACHINE\\SOFTWARE\\Classes\\Stack]
\"@\"=\"Generic Stack\"
This is the path to this node in the registry tree. The first part,
C<HKEY_LOCAL_MACHINE\\SOFTWARE> means that this comes from a hive
-(file) called C<SOFTWARE>. C<\\Classes\\Stack> is the real path part,
+file called C<C:\\WINDOWS\\SYSTEM32\\CONFIG\\SOFTWARE>.
+C<\\Classes\\Stack> is the real path part,
starting at the root node of the C<SOFTWARE> hive.
Below the node name is a list of zero or more key-value pairs. Any
\"@\"=\"Generic Stack\"
This is the \"default key\". In reality (ie. inside the binary hive)
-the key string is the empty string. In reg files this is written as
+the key string is the empty string. In .REG files this is written as
C<@> but this has no meaning either in the hives themselves or in this
library. The value is a string (type 1 - see C<enum hive_type>
above).
This is a regular (key, value) pair, with the value being a type 1
string. Note that inside the binary file the string is likely to be
-UTF-16 encoded. This library converts to and from UTF-8 strings
-transparently.
+UTF-16LE encoded. This library converts to and from UTF-8 strings
+transparently in some cases.
\"TilePath\"=str(2):\"%%systemroot%%\\\\system32\"
\"FriendlyTypeName\"=hex(2):40,00,....
-This value is an expanded string (type 2) represented in the reg file
+This value is an expanded string (type 2) represented in the .REG file
as a series of hex bytes. In this case the string appears to be a
-UTF-16 string.
+UTF-16LE string.
=head1 NOTE ON THE USE OF ERRNO
Corrupt or unsupported Registry file format.
-=item ENOKEY
+=item HIVEX_NO_KEY
Missing root key.
=head1 SEE ALSO
+L<hivexget(1)>,
L<hivexml(1)>,
L<hivexsh(1)>,
+L<hivexregedit(1)>,
L<virt-win-reg(1)>,
+L<Win::Hivex(3)>,
L<guestfs(3)>,
L<http://libguestfs.org/>,
L<virt-cat(1)>,
| RErr -> pr "unit" (* all errors are turned into exceptions *)
| RErrDispose -> pr "unit"
| RHive -> pr "t"
+ | RSize -> pr "int64"
| RNode -> pr "node"
| RNodeNotFound -> pr "node"
| RNodeList -> pr "node array"
| RErr -> pr " int r;\n"; "-1"
| RErrDispose -> pr " int r;\n"; "-1"
| RHive -> pr " hive_h *r;\n"; "NULL"
+ | RSize -> pr " size_t r;\n"; "0"
| RNode -> pr " hive_node_h r;\n"; "0"
| RNodeNotFound ->
pr " errno = 0;\n";
| RErr -> pr " rv = Val_unit;\n"
| RErrDispose -> pr " rv = Val_unit;\n"
| RHive -> pr " rv = Val_hiveh (r);\n"
+ | RSize -> pr " rv = caml_copy_int64 (r);\n"
| RNode -> pr " rv = Val_int (r);\n"
| RNodeNotFound ->
pr " if (r == 0)\n";
pr " rv = copy_type_value (r, len, t);\n";
pr " free (r);\n"
| RInt32 -> pr " rv = caml_copy_int32 (r);\n"
- | RInt64 -> pr " rv = caml_copy_int32 (r);\n"
+ | RInt64 -> pr " rv = caml_copy_int64 (r);\n"
);
pr " CAMLreturn (rv);\n";
v = Val_hive_type (t);
Store_field (rv, 0, v);
v = Val_int (len);
- Store_field (rv, 1, len);
+ Store_field (rv, 1, v);
CAMLreturn (rv);
}
Store_field (rv, 0, v);
v = caml_alloc_string (len);
memcpy (String_val (v), r, len);
- caml_modify (&Field (rv, 1), len);
+ caml_modify (&Field (rv, 1), v);
CAMLreturn (rv);
}
| RLenTypeVal
| RInt32
| RInt64 -> ()
+ | RSize ->
+ pr "\
+This returns a size.\n\n"
| RNode ->
pr "\
This returns a node handle.\n\n"
| RErr
| RErrDispose -> ()
| RHive -> pr "$h = "
+ | RSize -> pr "$size = "
| RNode
| RNodeNotFound -> pr "$node = "
| RNodeList -> pr "@nodes = "
#include <string.h>
#include <hivex.h>
-
-#ifndef PRId64
-#define PRId64 \"lld\"
-#endif
+#include <inttypes.h>
static SV *
my_newSVll(long long val) {
#endif
}
-#ifndef PRIu64
-#define PRIu64 \"llu\"
-#endif
-
#if 0
static SV *
my_newSVull(unsigned long long val) {
| RErr -> pr "void\n"
| RErrDispose -> failwith "perl bindings cannot handle a call which disposes of the handle"
| RHive -> failwith "perl bindings cannot handle a call which returns a handle"
+ | RSize
| RNode
| RNodeNotFound
| RValue
| RErrDispose -> assert false
| RHive -> assert false
- | RInt32
+ | RSize
| RNode
| RValue ->
pr "PREINIT:\n";
pr " PUSHs (sv_2mortal (newSVpvn (r, len)));\n";
pr " free (r);\n";
+ | RInt32 ->
+ pr "PREINIT:\n";
+ pr " int32_t r;\n";
+ pr " CODE:\n";
+ pr " errno = 0;\n";
+ pr " r = hivex_%s (%s);\n"
+ name (String.concat ", " c_params);
+ free_args ();
+ pr " if (r == -1 && errno != 0)\n";
+ pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n"
+ name;
+ pr " RETVAL = newSViv (r);\n";
+ pr " OUTPUT:\n";
+ pr " RETVAL\n"
+
| RInt64 ->
pr "PREINIT:\n";
pr " int64_t r;\n";
)
) functions
+and generate_python_c () =
+ generate_header CStyle LGPLv2plus;
+
+ pr "\
+#define PY_SSIZE_T_CLEAN 1
+#include <Python.h>
+
+#if PY_VERSION_HEX < 0x02050000
+typedef int Py_ssize_t;
+#define PY_SSIZE_T_MAX INT_MAX
+#define PY_SSIZE_T_MIN INT_MIN
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include \"hivex.h\"
+
+#ifndef HAVE_PYCAPSULE_NEW
+typedef struct {
+ PyObject_HEAD
+ hive_h *h;
+} Pyhivex_Object;
+#endif
+
+static hive_h *
+get_handle (PyObject *obj)
+{
+ assert (obj);
+ assert (obj != Py_None);
+#ifndef HAVE_PYCAPSULE_NEW
+ return ((Pyhivex_Object *) obj)->h;
+#else
+ return (hive_h *) PyCapsule_GetPointer(obj, \"hive_h\");
+#endif
+}
+
+static PyObject *
+put_handle (hive_h *h)
+{
+ assert (h);
+#ifndef HAVE_PYCAPSULE_NEW
+ return
+ PyCObject_FromVoidPtrAndDesc ((void *) h, (char *) \"hive_h\", NULL);
+#else
+ return PyCapsule_New ((void *) h, \"hive_h\", NULL);
+#endif
+}
+
+/* This returns pointers into the Python objects, which should
+ * not be freed.
+ */
+static int
+get_value (PyObject *v, hive_set_value *ret)
+{
+ PyObject *obj;
+
+ obj = PyDict_GetItemString (v, \"key\");
+ if (!obj) {
+ PyErr_SetString (PyExc_RuntimeError, \"no 'key' element in dictionary\");
+ return -1;
+ }
+ if (!PyString_Check (obj)) {
+ PyErr_SetString (PyExc_RuntimeError, \"'key' element is not a string\");
+ return -1;
+ }
+ ret->key = PyString_AsString (obj);
+
+ obj = PyDict_GetItemString (v, \"t\");
+ if (!obj) {
+ PyErr_SetString (PyExc_RuntimeError, \"no 't' element in dictionary\");
+ return -1;
+ }
+ if (!PyInt_Check (obj)) {
+ PyErr_SetString (PyExc_RuntimeError, \"'t' element is not an integer\");
+ return -1;
+ }
+ ret->t = PyInt_AsLong (obj);
+
+ obj = PyDict_GetItemString (v, \"value\");
+ if (!obj) {
+ PyErr_SetString (PyExc_RuntimeError, \"no 'value' element in dictionary\");
+ return -1;
+ }
+ if (!PyString_Check (obj)) {
+ PyErr_SetString (PyExc_RuntimeError, \"'value' element is not a string\");
+ return -1;
+ }
+ ret->value = PyString_AsString (obj);
+ ret->len = PyString_Size (obj);
+
+ return 0;
+}
+
+typedef struct py_set_values {
+ size_t nr_values;
+ hive_set_value *values;
+} py_set_values;
+
+static int
+get_values (PyObject *v, py_set_values *ret)
+{
+ Py_ssize_t slen;
+ size_t len, i;
+
+ if (!PyList_Check (v)) {
+ PyErr_SetString (PyExc_RuntimeError, \"expecting a list parameter\");
+ return -1;
+ }
+
+ slen = PyList_Size (v);
+ if (slen < 0) {
+ PyErr_SetString (PyExc_RuntimeError, \"get_string_list: PyList_Size failure\");
+ return -1;
+ }
+ len = (size_t) slen;
+ ret->nr_values = len;
+ ret->values = malloc (len * sizeof (hive_set_value));
+ if (!ret->values) {
+ PyErr_SetString (PyExc_RuntimeError, strerror (errno));
+ return -1;
+ }
+
+ for (i = 0; i < len; ++i) {
+ if (get_value (PyList_GetItem (v, i), &(ret->values[i])) == -1) {
+ free (ret->values);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static PyObject *
+put_string_list (char * const * const argv)
+{
+ PyObject *list;
+ size_t argc, i;
+
+ for (argc = 0; argv[argc] != NULL; ++argc)
+ ;
+
+ list = PyList_New (argc);
+ for (i = 0; i < argc; ++i)
+ PyList_SetItem (list, i, PyString_FromString (argv[i]));
+
+ return list;
+}
+
+static void
+free_strings (char **argv)
+{
+ size_t argc;
+
+ for (argc = 0; argv[argc] != NULL; ++argc)
+ free (argv[argc]);
+ free (argv);
+}
+
+/* Since hive_node_t is the same as hive_value_t this also works for values. */
+static PyObject *
+put_node_list (hive_node_h *nodes)
+{
+ PyObject *list;
+ size_t argc, i;
+
+ for (argc = 0; nodes[argc] != 0; ++argc)
+ ;
+
+ list = PyList_New (argc);
+ for (i = 0; i < argc; ++i)
+ PyList_SetItem (list, i, PyLong_FromLongLong ((long) nodes[i]));
+
+ return list;
+}
+
+static PyObject *
+put_len_type (size_t len, hive_type t)
+{
+ PyObject *r = PyTuple_New (2);
+ PyTuple_SetItem (r, 0, PyInt_FromLong ((long) t));
+ PyTuple_SetItem (r, 1, PyLong_FromLongLong ((long) len));
+ return r;
+}
+
+static PyObject *
+put_val_type (char *val, size_t len, hive_type t)
+{
+ PyObject *r = PyTuple_New (2);
+ PyTuple_SetItem (r, 0, PyInt_FromLong ((long) t));
+ PyTuple_SetItem (r, 1, PyString_FromStringAndSize (val, len));
+ return r;
+}
+
+";
+
+ (* Generate functions. *)
+ List.iter (
+ fun (name, style, _, longdesc) ->
+ pr "static PyObject *\n";
+ pr "py_hivex_%s (PyObject *self, PyObject *args)\n" name;
+ pr "{\n";
+ pr " PyObject *py_r;\n";
+
+ let error_code =
+ match fst style with
+ | RErr -> pr " int r;\n"; "-1"
+ | RErrDispose -> pr " int r;\n"; "-1"
+ | RHive -> pr " hive_h *r;\n"; "NULL"
+ | RSize -> pr " size_t r;\n"; "0"
+ | RNode -> pr " hive_node_h r;\n"; "0"
+ | RNodeNotFound ->
+ pr " errno = 0;\n";
+ pr " hive_node_h r;\n";
+ "0 && errno != 0"
+ | RNodeList -> pr " hive_node_h *r;\n"; "NULL"
+ | RValue -> pr " hive_value_h r;\n"; "0"
+ | RValueList -> pr " hive_value_h *r;\n"; "NULL"
+ | RString -> pr " char *r;\n"; "NULL"
+ | RStringList -> pr " char **r;\n"; "NULL"
+ | RLenType ->
+ pr " int r;\n";
+ pr " size_t len;\n";
+ pr " hive_type t;\n";
+ "-1"
+ | RLenTypeVal ->
+ pr " char *r;\n";
+ pr " size_t len;\n";
+ pr " hive_type t;\n";
+ "NULL"
+ | RInt32 ->
+ pr " errno = 0;\n";
+ pr " int32_t r;\n";
+ "-1 && errno != 0"
+ | RInt64 ->
+ pr " errno = 0;\n";
+ pr " int64_t r;\n";
+ "-1 && errno != 0" in
+
+ (* Call and arguments. *)
+ let c_params =
+ List.map (function
+ | AUnusedFlags -> "0"
+ | ASetValues -> "values.nr_values, values.values"
+ | ASetValue -> "&val"
+ | arg -> name_of_argt arg) (snd style) in
+ let c_params =
+ match fst style with
+ | RLenType | RLenTypeVal -> c_params @ ["&t"; "&len"]
+ | _ -> c_params in
+
+ List.iter (
+ function
+ | AHive ->
+ pr " hive_h *h;\n";
+ pr " PyObject *py_h;\n"
+ | ANode n
+ | AValue n ->
+ pr " long %s;\n" n
+ | AString n
+ | AStringNullable n ->
+ pr " char *%s;\n" n
+ | AOpenFlags ->
+ pr " int flags;\n"
+ | AUnusedFlags -> ()
+ | ASetValues ->
+ pr " py_set_values values;\n";
+ pr " PyObject *py_values;\n"
+ | ASetValue ->
+ pr " hive_set_value val;\n";
+ pr " PyObject *py_val;\n"
+ ) (snd style);
+
+ pr "\n";
+
+ (* Convert the required parameters. *)
+ pr " if (!PyArg_ParseTuple (args, (char *) \"";
+ List.iter (
+ function
+ | AHive ->
+ pr "O"
+ | ANode n
+ | AValue n ->
+ pr "l"
+ | AString n ->
+ pr "s"
+ | AStringNullable n ->
+ pr "z"
+ | AOpenFlags ->
+ pr "i"
+ | AUnusedFlags -> ()
+ | ASetValues
+ | ASetValue ->
+ pr "O"
+ ) (snd style);
+
+ pr ":hivex_%s\"" name;
+
+ List.iter (
+ function
+ | AHive ->
+ pr ", &py_h"
+ | ANode n
+ | AValue n ->
+ pr ", &%s" n
+ | AString n
+ | AStringNullable n ->
+ pr ", &%s" n
+ | AOpenFlags ->
+ pr ", &flags"
+ | AUnusedFlags -> ()
+ | ASetValues ->
+ pr ", &py_values"
+ | ASetValue ->
+ pr ", &py_val"
+ ) (snd style);
+
+ pr "))\n";
+ pr " return NULL;\n";
+
+ (* Convert some Python argument types to C. *)
+ List.iter (
+ function
+ | AHive ->
+ pr " h = get_handle (py_h);\n"
+ | ANode _
+ | AValue _
+ | AString _
+ | AStringNullable _
+ | AOpenFlags
+ | AUnusedFlags -> ()
+ | ASetValues ->
+ pr " if (get_values (py_values, &values) == -1)\n";
+ pr " return NULL;\n"
+ | ASetValue ->
+ pr " if (get_value (py_val, &val) == -1)\n";
+ pr " return NULL;\n"
+ ) (snd style);
+
+ (* Call the C function. *)
+ pr " r = hivex_%s (%s);\n" name (String.concat ", " c_params);
+
+ (* Free up arguments. *)
+ List.iter (
+ function
+ | AHive | ANode _ | AValue _
+ | AString _ | AStringNullable _
+ | AOpenFlags | AUnusedFlags -> ()
+ | ASetValues ->
+ pr " free (values.values);\n"
+ | ASetValue -> ()
+ ) (snd style);
+
+ (* Check for errors from C library. *)
+ pr " if (r == %s) {\n" error_code;
+ pr " PyErr_SetString (PyExc_RuntimeError,\n";
+ pr " strerror (errno));\n";
+ pr " return NULL;\n";
+ pr " }\n";
+ pr "\n";
+
+ (* Convert return value to Python. *)
+ (match fst style with
+ | RErr
+ | RErrDispose ->
+ pr " Py_INCREF (Py_None);\n";
+ pr " py_r = Py_None;\n"
+ | RHive ->
+ pr " py_r = put_handle (r);\n"
+ | RSize
+ | RNode ->
+ pr " py_r = PyLong_FromLongLong (r);\n"
+ | RNodeNotFound ->
+ pr " if (r)\n";
+ pr " py_r = PyLong_FromLongLong (r);\n";
+ pr " else {\n";
+ pr " Py_INCREF (Py_None);\n";
+ pr " py_r = Py_None;\n";
+ pr " }\n";
+ | RNodeList
+ | RValueList ->
+ pr " py_r = put_node_list (r);\n";
+ pr " free (r);\n"
+ | RValue ->
+ pr " py_r = PyLong_FromLongLong (r);\n"
+ | RString ->
+ pr " py_r = PyString_FromString (r);\n";
+ pr " free (r);"
+ | RStringList ->
+ pr " py_r = put_string_list (r);\n";
+ pr " free_strings (r);\n"
+ | RLenType ->
+ pr " py_r = put_len_type (len, t);\n"
+ | RLenTypeVal ->
+ pr " py_r = put_val_type (r, len, t);\n";
+ pr " free (r);\n"
+ | RInt32 ->
+ pr " py_r = PyInt_FromLong ((long) r);\n"
+ | RInt64 ->
+ pr " py_r = PyLong_FromLongLong (r);\n"
+ );
+ pr " return py_r;\n";
+ pr "}\n";
+ pr "\n"
+ ) functions;
+
+ (* Table of functions. *)
+ pr "static PyMethodDef methods[] = {\n";
+ List.iter (
+ fun (name, _, _, _) ->
+ pr " { (char *) \"%s\", py_hivex_%s, METH_VARARGS, NULL },\n"
+ name name
+ ) functions;
+ pr " { NULL, NULL, 0, NULL }\n";
+ pr "};\n";
+ pr "\n";
+
+ (* Init function. *)
+ pr "\
+void
+initlibhivexmod (void)
+{
+ static int initialized = 0;
+
+ if (initialized) return;
+ Py_InitModule ((char *) \"libhivexmod\", methods);
+ initialized = 1;
+}
+"
+
and generate_python_py () =
- generate_header HashStyle LGPLv2plus
+ generate_header HashStyle LGPLv2plus;
-and generate_python_c () =
- generate_header CStyle LGPLv2plus
+ pr "\
+u\"\"\"Python bindings for hivex
+
+import hivex
+h = hivex.Hivex (filename)
+
+The hivex module provides Python bindings to the hivex API for
+examining and modifying Windows Registry 'hive' files.
+
+Read the hivex(3) man page to find out how to use the API.
+\"\"\"
+
+import libhivexmod
+
+class Hivex:
+ \"\"\"Instances of this class are hivex API handles.\"\"\"
+
+ def __init__ (self, filename";
+
+ List.iter (
+ fun (_, flag, _) -> pr ", %s = False" (String.lowercase flag)
+ ) open_flags;
+
+ pr "):
+ \"\"\"Create a new hivex handle.\"\"\"
+ flags = 0
+";
+
+ List.iter (
+ fun (n, flag, description) ->
+ pr " # %s\n" description;
+ pr " if %s: flags += %d\n" (String.lowercase flag) n
+ ) open_flags;
+
+ pr " self._o = libhivexmod.open (filename, flags)
+
+ def __del__ (self):
+ libhivexmod.close (self._o)
+
+";
+
+ List.iter (
+ fun (name, style, shortdesc, _) ->
+ (* The close and open calls are handled specially above. *)
+ if fst style <> RErrDispose && List.hd (snd style) = AHive then (
+ let args = List.tl (snd style) in
+ let args = List.filter (
+ function AOpenFlags | AUnusedFlags -> false
+ | _ -> true
+ ) args in
+
+ pr " def %s (self" name;
+ List.iter (fun arg -> pr ", %s" (name_of_argt arg)) args;
+ pr "):\n";
+ pr " u\"\"\"%s\"\"\"\n" shortdesc;
+ pr " return libhivexmod.%s (self._o" name;
+ List.iter (
+ fun arg ->
+ pr ", ";
+ match arg with
+ | AHive -> assert false
+ | ANode n | AValue n
+ | AString n | AStringNullable n -> pr "%s" n
+ | AOpenFlags
+ | AUnusedFlags -> assert false
+ | ASetValues -> pr "values"
+ | ASetValue -> pr "val"
+ ) args;
+ pr ")\n";
+ pr "\n"
+ )
+ ) functions
+
+and generate_ruby_c () =
+ generate_header CStyle LGPLv2plus;
+
+ pr "\
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#include <ruby.h>
+
+#include \"hivex.h\"
+
+#include \"extconf.h\"
+
+/* For Ruby < 1.9 */
+#ifndef RARRAY_LEN
+#define RARRAY_LEN(r) (RARRAY((r))->len)
+#endif
+
+static VALUE m_hivex; /* hivex module */
+static VALUE c_hivex; /* hive_h handle */
+static VALUE e_Error; /* used for all errors */
+
+static void
+ruby_hivex_free (void *hvp)
+{
+ hive_h *h = hvp;
+
+ if (h)
+ hivex_close (h);
+}
+
+static void
+get_value (VALUE valv, hive_set_value *val)
+{
+ VALUE key = rb_hash_lookup (valv, ID2SYM (rb_intern (\"key\")));
+ VALUE type = rb_hash_lookup (valv, ID2SYM (rb_intern (\"type\")));
+ VALUE value = rb_hash_lookup (valv, ID2SYM (rb_intern (\"value\")));
+
+ val->key = StringValueCStr (key);
+ val->t = NUM2ULL (type);
+ val->len = RSTRING (value)->len;
+ val->value = RSTRING (value)->ptr;
+}
+
+static hive_set_value *
+get_values (VALUE valuesv, size_t *nr_values)
+{
+ size_t i;
+ hive_set_value *ret;
+
+ *nr_values = RARRAY_LEN (valuesv);
+ ret = malloc (sizeof (*ret) * *nr_values);
+ if (ret == NULL)
+ abort ();
+
+ for (i = 0; i < *nr_values; ++i) {
+ VALUE v = rb_ary_entry (valuesv, i);
+ get_value (v, &ret[i]);
+ }
+
+ return ret;
+}
+
+";
+
+ List.iter (
+ fun (name, (ret, args), shortdesc, longdesc) ->
+ let () =
+ (* Generate rdoc. *)
+ let doc = replace_str longdesc "C<hivex_" "C<h." in
+ let doc = pod2text ~width:60 name doc in
+ let doc = String.concat "\n * " doc in
+ let doc = trim doc in
+
+ let call, args =
+ match args with
+ | AHive :: args -> "h." ^ name, args
+ | args -> "Hivex::" ^ name, args in
+ let args = filter_map (
+ function
+ | AUnusedFlags -> None
+ | args -> Some (name_of_argt args)
+ ) args in
+ let args = String.concat ", " args in
+
+ let ret =
+ match ret with
+ | RErr | RErrDispose -> "nil"
+ | RHive -> "Hivex::Hivex"
+ | RSize | RNode | RNodeNotFound -> "integer"
+ | RNodeList -> "list"
+ | RValue -> "integer"
+ | RValueList -> "list"
+ | RString -> "string"
+ | RStringList -> "list"
+ | RLenType -> "hash"
+ | RLenTypeVal -> "hash"
+ | RInt32 -> "integer"
+ | RInt64 -> "integer" in
+
+ pr "\
+/*
+ * call-seq:
+ * %s(%s) -> %s
+ *
+ * %s
+ *
+ * %s
+ *
+ * (For the C API documentation for this function, see
+ * +hivex_%s+[http://libguestfs.org/hivex.3.html#hivex_%s]).
+ */
+" call args ret shortdesc doc name name in
+
+ (* Generate the function. *)
+ pr "static VALUE\n";
+ pr "ruby_hivex_%s (" name;
+
+ let () =
+ (* If the first argument is not AHive, then this is a module-level
+ * function, and Ruby passes an implicit module argument which we
+ * must ignore. Otherwise the first argument is the hive handle.
+ *)
+ let args =
+ match args with
+ | AHive :: args -> pr "VALUE hv"; args
+ | args -> pr "VALUE modulev"; args in
+ List.iter (
+ function
+ | AUnusedFlags -> ()
+ | arg ->
+ pr ", VALUE %sv" (name_of_argt arg)
+ ) args;
+ pr ")\n" in
+
+ pr "{\n";
+
+ List.iter (
+ function
+ | AHive ->
+ pr " hive_h *h;\n";
+ pr " Data_Get_Struct (hv, hive_h, h);\n";
+ pr " if (!h)\n";
+ pr " rb_raise (rb_eArgError, \"%%s: used handle after closing it\",\n";
+ pr " \"%s\");\n" name;
+ | ANode n ->
+ pr " hive_node_h %s = NUM2ULL (%sv);\n" n n
+ | AValue n ->
+ pr " hive_value_h %s = NUM2ULL (%sv);\n" n n
+ | AString n ->
+ pr " const char *%s = StringValueCStr (%sv);\n" n n;
+ | AStringNullable n ->
+ pr " const char *%s =\n" n;
+ pr " !NIL_P (%sv) ? StringValueCStr (%sv) : NULL;\n" n n
+ | AOpenFlags ->
+ pr " int flags = 0;\n";
+ List.iter (
+ fun (n, flag, _) ->
+ pr " if (RTEST (rb_hash_lookup (flagsv, ID2SYM (rb_intern (\"%s\")))))\n"
+ (String.lowercase flag);
+ pr " flags += %d;\n" n
+ ) open_flags
+ | AUnusedFlags -> ()
+ | ASetValues ->
+ pr " size_t nr_values;\n";
+ pr " hive_set_value *values;\n";
+ pr " values = get_values (valuesv, &nr_values);\n"
+ | ASetValue ->
+ pr " hive_set_value val;\n";
+ pr " get_value (valv, &val);\n"
+ ) args;
+ pr "\n";
+
+ let error_code =
+ match ret with
+ | RErr -> pr " int r;\n"; "-1"
+ | RErrDispose -> pr " int r;\n"; "-1"
+ | RHive -> pr " hive_h *r;\n"; "NULL"
+ | RSize -> pr " size_t r;\n"; "0"
+ | RNode -> pr " hive_node_h r;\n"; "0"
+ | RNodeNotFound ->
+ pr " errno = 0;\n";
+ pr " hive_node_h r;\n";
+ "0 && errno != 0"
+ | RNodeList -> pr " hive_node_h *r;\n"; "NULL"
+ | RValue -> pr " hive_value_h r;\n"; "0"
+ | RValueList -> pr " hive_value_h *r;\n"; "NULL"
+ | RString -> pr " char *r;\n"; "NULL"
+ | RStringList -> pr " char **r;\n"; "NULL"
+ | RLenType ->
+ pr " int r;\n";
+ pr " size_t len;\n";
+ pr " hive_type t;\n";
+ "-1"
+ | RLenTypeVal ->
+ pr " char *r;\n";
+ pr " size_t len;\n";
+ pr " hive_type t;\n";
+ "NULL"
+ | RInt32 ->
+ pr " errno = 0;\n";
+ pr " int32_t r;\n";
+ "-1 && errno != 0"
+ | RInt64 ->
+ pr " errno = 0;\n";
+ pr " int64_t r;\n";
+ "-1 && errno != 0" in
+ pr "\n";
+
+ let c_params =
+ List.map (function
+ | ASetValues -> ["nr_values"; "values"]
+ | ASetValue -> ["&val"]
+ | AUnusedFlags -> ["0"]
+ | arg -> [name_of_argt arg]) args in
+ let c_params =
+ match ret with
+ | RLenType | RLenTypeVal -> c_params @ [["&t"; "&len"]]
+ | _ -> c_params in
+ let c_params = List.concat c_params in
+
+ pr " r = hivex_%s (%s" name (List.hd c_params);
+ List.iter (pr ", %s") (List.tl c_params);
+ pr ");\n";
+ pr "\n";
+
+ (* Dispose of the hive handle (even if hivex_close returns error). *)
+ (match ret with
+ | RErrDispose ->
+ pr " /* So we don't double-free in the finalizer. */\n";
+ pr " DATA_PTR (hv) = NULL;\n";
+ pr "\n";
+ | _ -> ()
+ );
+
+ List.iter (
+ function
+ | AHive
+ | ANode _
+ | AValue _
+ | AString _
+ | AStringNullable _
+ | AOpenFlags
+ | AUnusedFlags -> ()
+ | ASetValues ->
+ pr " free (values);\n"
+ | ASetValue -> ()
+ ) args;
+
+ (* Check for errors from C library. *)
+ pr " if (r == %s)\n" error_code;
+ pr " rb_raise (e_Error, \"%%s\", strerror (errno));\n";
+ pr "\n";
+
+ (match ret with
+ | RErr | RErrDispose ->
+ pr " return Qnil;\n"
+ | RHive ->
+ pr " return Data_Wrap_Struct (c_hivex, NULL, ruby_hivex_free, r);\n"
+ | RSize
+ | RNode
+ | RValue
+ | RInt64 ->
+ pr " return ULL2NUM (r);\n"
+ | RInt32 ->
+ pr " return INT2NUM (r);\n"
+ | RNodeNotFound ->
+ pr " if (r)\n";
+ pr " return ULL2NUM (r);\n";
+ pr " else\n";
+ pr " return Qnil;\n"
+ | RNodeList
+ | RValueList ->
+ pr " size_t i, len = 0;\n";
+ pr " for (i = 0; r[i] != 0; ++i) len++;\n";
+ pr " VALUE rv = rb_ary_new2 (len);\n";
+ pr " for (i = 0; r[i] != 0; ++i)\n";
+ pr " rb_ary_push (rv, ULL2NUM (r[i]));\n";
+ pr " free (r);\n";
+ pr " return rv;\n"
+ | RString ->
+ pr " VALUE rv = rb_str_new2 (r);\n";
+ pr " free (r);\n";
+ pr " return rv;\n"
+ | RStringList ->
+ pr " size_t i, len = 0;\n";
+ pr " for (i = 0; r[i] != NULL; ++i) len++;\n";
+ pr " VALUE rv = rb_ary_new2 (len);\n";
+ pr " for (i = 0; r[i] != NULL; ++i) {\n";
+ pr " rb_ary_push (rv, rb_str_new2 (r[i]));\n";
+ pr " free (r[i]);\n";
+ pr " }\n";
+ pr " free (r);\n";
+ pr " return rv;\n"
+ | RLenType ->
+ pr " VALUE rv = rb_hash_new ();\n";
+ pr " rb_hash_aset (rv, ID2SYM (rb_intern (\"len\")), INT2NUM (len));\n";
+ pr " rb_hash_aset (rv, ID2SYM (rb_intern (\"type\")), INT2NUM (t));\n";
+ pr " return rv;\n"
+ | RLenTypeVal ->
+ pr " VALUE rv = rb_hash_new ();\n";
+ pr " rb_hash_aset (rv, ID2SYM (rb_intern (\"len\")), INT2NUM (len));\n";
+ pr " rb_hash_aset (rv, ID2SYM (rb_intern (\"type\")), INT2NUM (t));\n";
+ pr " rb_hash_aset (rv, ID2SYM (rb_intern (\"value\")), rb_str_new (r, len));\n";
+ pr " free (r);\n";
+ pr " return rv;\n"
+ );
+
+ pr "}\n";
+ pr "\n"
+ ) functions;
+
+ pr "\
+/* Initialize the module. */
+void Init__hivex ()
+{
+ m_hivex = rb_define_module (\"Hivex\");
+ c_hivex = rb_define_class_under (m_hivex, \"Hivex\", rb_cObject);
+ e_Error = rb_define_class_under (m_hivex, \"Error\", rb_eStandardError);
+
+ /* XXX How to pass arguments? */
+#if 0
+#ifdef HAVE_RB_DEFINE_ALLOC_FUNC
+ rb_define_alloc_func (c_hivex, ruby_hivex_open);
+#endif
+#endif
+
+";
+
+ (* Methods. *)
+ List.iter (
+ fun (name, (_, args), _, _) ->
+ let args = List.filter (
+ function
+ | AUnusedFlags -> false
+ | _ -> true
+ ) args in
+ let nr_args = List.length args in
+ match args with
+ | AHive :: _ ->
+ pr " rb_define_method (c_hivex, \"%s\",\n" name;
+ pr " ruby_hivex_%s, %d);\n" name (nr_args-1)
+ | args -> (* class function *)
+ pr " rb_define_module_function (m_hivex, \"%s\",\n" name;
+ pr " ruby_hivex_%s, %d);\n" name nr_args
+ ) functions;
+
+ pr "}\n"
let output_to filename k =
let filename_new = filename ^ ".new" in
output_to "perl/lib/Win/Hivex.pm" generate_perl_pm;
output_to "perl/Hivex.xs" generate_perl_xs;
-(*
- We ran out of time before we could write the Python bindings.
output_to "python/hivex.py" generate_python_py;
output_to "python/hivex-py.c" generate_python_c;
-*)
+
+ output_to "ruby/ext/hivex/_hivex.c" generate_ruby_c;
(* Always generate this file last, and unconditionally. It's used
* by the Makefile to know when we must re-run the generator.