X-Git-Url: http://git.annexia.org/?p=libguestfs.git;a=blobdiff_plain;f=hivex%2Fhivex.pod;fp=hivex%2Fhivex.pod;h=0000000000000000000000000000000000000000;hp=275eb423efa3874f13074ac71e7f26a2f7f6a1d1;hb=a8c3723e3818b5a7a31520043c6831115ab9d0e0;hpb=aa4700f0325743aaa16b525637b00ee61b39df13 diff --git a/hivex/hivex.pod b/hivex/hivex.pod deleted file mode 100644 index 275eb42..0000000 --- a/hivex/hivex.pod +++ /dev/null @@ -1,655 +0,0 @@ -=encoding utf8 - -=head1 NAME - -hivex - Windows Registry "hive" extraction library - -=head1 SYNOPSIS - - hive_h *hivex_open (const char *filename, int flags); - int hivex_close (hive_h *h); - -=head1 DESCRIPTION - -libhivex is a library for extracting the contents of Windows Registry -"hive" files. It is designed to be secure against buggy or malicious -registry files. - -Unlike many other tools in this area, it doesn't use the textual .REG -format for output, because parsing that is as much trouble as parsing -the original binary format. Instead it makes the file available -through a C API, or there is a separate program to export the hive as -XML (see L), or to get individual keys (see -L). - -=head2 OPENING AND CLOSING A HIVE - -=over 4 - -=item hive_h *hivex_open (const char *filename, int flags); - -Opens the hive named C for reading. - -Flags is an ORed list of the open flags (or C<0> if you don't -want to pass any flags). These flags are defined: - -=over 4 - -=item HIVEX_OPEN_VERBOSE - -Verbose messages. - -=item HIVEX_OPEN_DEBUG - -Very verbose messages, suitable for debugging problems in the library -itself. - -This is also selected if the C environment variable -is set to 1. - -=item HIVEX_OPEN_WRITE - -Open the hive for writing. If omitted, the hive is read-only. - -See L. - -=back - -C returns a hive handle. On error this returns NULL and -sets C to indicate the error. - -=item int hivex_close (hive_h *h); - -Close a hive handle and free all associated resources. - -Note that any uncommitted writes are I committed by this call, -but instead are lost. See L. - -Returns 0 on success. On error this returns -1 and sets errno. - -=back - -=head2 NAVIGATING THE TREE OF HIVE SUBKEYS - -=over 4 - -=item hive_node_h - -This is a node handle, an integer but opaque outside the library. -Valid node handles cannot be 0. The library returns 0 in some -situations to indicate an error. - -=item hive_node_h hivex_root (hive_h *h); - -Return root node of the hive. All valid registries must contain -a root node. - -On error this returns 0 and sets errno. - -=item char *hivex_node_name (hive_h *h, hive_node_h node); - -Return the name of the node. The name is reencoded as UTF-8 -and returned as a C string. - -The string should be freed by the caller when it is no longer needed. - -Note that the name of the root node is a dummy, such as -C<$$$PROTO.HIV> (other names are possible: it seems to depend on the -tool or program that created the hive in the first place). You can -only know the "real" name of the root node by knowing which registry -file this hive originally comes from, which is knowledge that is -outside the scope of this library. - -On error this returns NULL and sets errno. - -=item hive_node_h *hivex_node_children (hive_h *h, hive_node_h node); - -Return a 0-terminated array of nodes which are the subkeys -(children) of C. - -The array should be freed by the caller when it is no longer needed. - -On error this returns NULL and sets errno. - -=item hive_node_h hivex_node_get_child (hive_h *h, hive_node_h node, const char *name); - -Return the child of node with the name C, if it exists. - -The name is matched case insensitively. - -If the child node does not exist, this returns 0 without -setting errno. - -On error this returns 0 and sets errno. - -=item hive_node_h hivex_node_parent (hive_h *h, hive_node_h node); - -Return the parent of C. - -On error this returns 0 and sets errno. - -The parent pointer of the root node in registry files that we -have examined seems to be invalid, and so this function will -return an error if called on the root node. - -=back - -=head2 GETTING VALUES AT A NODE - -The enum below describes the possible types for the value(s) -stored at each node. - - enum hive_type { - hive_t_none = 0, - hive_t_string = 1, - hive_t_expand_string = 2, - hive_t_binary = 3, - hive_t_dword = 4, - hive_t_dword_be = 5, - hive_t_link = 6, - hive_t_multiple_strings = 7, - hive_t_resource_list = 8, - hive_t_full_resource_description = 9, - hive_t_resource_requirements_list = 10, - hive_t_qword = 11 - }; - -=over 4 - -=item hive_value_h - -This is a value handle, an integer but opaque outside the library. -Valid value handles cannot be 0. The library returns 0 in some -situations to indicate an error. - -=item hive_value_h *hivex_node_values (hive_h *h, hive_node_h node); - -Return the 0-terminated array of (key, value) pairs attached to -this node. - -The array should be freed by the caller when it is no longer needed. - -On error this returns NULL and sets errno. - -=item hive_value_h hivex_node_get_value (hive_h *h, hive_node_h node, const char *key); - -Return the value attached to this node which has the name C, -if it exists. - -The key name is matched case insensitively. - -Note that to get the default key, you should pass the empty -string C<""> here. The default key is often written C<"@">, but -inside hives that has no meaning and won't give you the -default key. - -If no such key exists, this returns 0 and does not set errno. - -On error this returns 0 and sets errno. - -=item char *hivex_value_key (hive_h *h, hive_value_h value); - -Return the key (name) of a (key, value) pair. The name -is reencoded as UTF-8 and returned as a C string. - -The string should be freed by the caller when it is no longer needed. - -Note that this function can return a zero-length string. In the -context of Windows Registries, this means that this value is the -default key for this node in the tree. This is usually written -as C<"@">. - -On error this returns NULL and sets errno. - -=item int hivex_value_type (hive_h *h, hive_value_h value, hive_type *t, size_t *len); - -Return the data type and length of the value in this (key, value) -pair. See also C which returns all this -information, and the value itself. Also, C functions -below which can be used to return the value in a more useful form when -you know the type in advance. - -Returns 0 on success. On error this returns -1 and sets errno. - -=item char *hivex_value_value (hive_h *h, hive_value_h value, hive_type *t, size_t *len); - -Return the value of this (key, value) pair. The value should -be interpreted according to its type (see C). - -The value is returned in an array of bytes of length C. - -The value should be freed by the caller when it is no longer needed. - -On error this returns NULL and sets errno. - -=item char *hivex_value_string (hive_h *h, hive_value_h value); - -If this value is a string, return the string reencoded as UTF-8 -(as a C string). This only works for values which have type -C, C or C. - -The string should be freed by the caller when it is no longer needed. - -On error this returns NULL and sets errno. - -=item char **hivex_value_multiple_strings (hive_h *h, hive_value_h value); - -If this value is a multiple-string, return the strings reencoded -as UTF-8 (as a NULL-terminated array of C strings). This only -works for values which have type C. - -The string array and each string in it should be freed by the -caller when they are no longer needed. - -On error this returns NULL and sets errno. - -=item int32_t hivex_value_dword (hive_h *h, hive_value_h value); - -If this value is a DWORD (Windows int32), return it. This only works -for values which have type C or C. - -=item int64_t hivex_value_qword (hive_h *h, hive_value_h value); - -If this value is a QWORD (Windows int64), return it. This only -works for values which have type C. - -=back - -=head2 VISITING ALL NODES - -The visitor pattern is useful if you want to visit all nodes -in the tree or all nodes below a certain point in the tree. - -First you set up your own C with your -callback functions. - -Each of these callback functions should return 0 on success or -1 -on error. If any callback returns -1, then the entire visit -terminates immediately. If you don't need a callback function at -all, set the function pointer to NULL. - - struct hivex_visitor { - int (*node_start) (hive_h *, void *opaque, hive_node_h, const char *name); - int (*node_end) (hive_h *, void *opaque, hive_node_h, const char *name); - int (*value_string) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, const char *str); - int (*value_multiple_strings) (hive_h *, void *opaque, hive_node_h, - hive_value_h, hive_type t, size_t len, const char *key, char **argv); - int (*value_string_invalid_utf16) (hive_h *, void *opaque, hive_node_h, - hive_value_h, hive_type t, size_t len, const char *key, - const char *str); - int (*value_dword) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, int32_t); - int (*value_qword) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, int64_t); - int (*value_binary) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, const char *value); - int (*value_none) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, const char *value); - int (*value_other) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, const char *value); - /* If value_any callback is not NULL, then the other value_* - * callbacks are not used, and value_any is called on all values. - */ - int (*value_any) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, const char *value); - }; - -=over 4 - -=item int hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags); - -Visit all the nodes recursively in the hive C. - -C should be a C structure with callback -fields filled in as required (unwanted callbacks can be set to -NULL). C must be the length of the 'visitor' struct (you -should pass C for this). - -This returns 0 if the whole recursive visit was completed -successfully. On error this returns -1. If one of the callback -functions returned an error than we don't touch errno. If the -error was generated internally then we set errno. - -You can skip bad registry entries by setting C to -C. If this flag is not set, then a bad registry -causes the function to return an error immediately. - -This function is robust if the registry contains cycles or -pointers which are invalid or outside the registry. It detects -these cases and returns an error. - -=item int hivex_visit_node (hive_h *h, hive_node_h node, const struct hivex_visitor *visitor, size_t len, void *opaque); - -Same as C but instead of starting out at the root, this -starts at C. - -=back - -=head2 WRITING TO HIVE FILES - -The hivex library supports making limited modifications to hive files. -We have tried to implement this very conservatively in order to reduce -the chance of corrupting your registry. However you should be careful -and take back-ups, since Microsoft has never documented the hive -format, and so it is possible there are nuances in the -reverse-engineered format that we do not understand. - -To be able to modify a hive, you must pass the C -flag to C, otherwise any write operation will return with -errno C. - -The write operations shown below do not modify the on-disk file -immediately. You must call C in order to write the -changes to disk. If you call C without committing then -any writes are discarded. - -Hive files internally consist of a "memory dump" of binary blocks -(like the C heap), and some of these blocks can be unused. The hivex -library never reuses these unused blocks. Instead, to ensure -robustness in the face of the partially understood on-disk format, -hivex only allocates new blocks after the end of the file, and makes -minimal modifications to existing structures in the file to point to -these new blocks. This makes hivex slightly less disk-efficient than -it could be, but disk is cheap, and registry modifications tend to be -very small. - -When deleting nodes, it is possible that this library may leave -unreachable live blocks in the hive. This is because certain parts of -the hive disk format such as security (sk) records and big data (db) -records and classname fields are not well understood (and not -documented at all) and we play it safe by not attempting to modify -them. Apart from wasting a little bit of disk space, it is not -thought that unreachable blocks are a problem. - -=over 4 - -=item int hivex_commit (hive_h *h, const char *filename, int flags); - -Commit (write) any changes which have been made. - -C is the new file to write. If C then we -overwrite the original file (ie. the file name that was passed to -C). C is not used, always pass 0. - -Returns 0 on success. On error this returns -1 and sets errno. - -Note this does not close the hive handle. You can perform further -operations on the hive after committing, including making more -modifications. If you no longer wish to use the hive, call -C after this. - -=item hive_node_h hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name); - -Add a new child node named C to the existing node C. -The new child initially has no subnodes and contains no keys or -values. The sk-record (security descriptor) is inherited from -the parent. - -The parent must not have an existing child called C, so if you -want to overwrite an existing child, call C -first. - -Returns the node handle. On error this returns 0 and sets errno. - -=item int hivex_node_delete_child (hive_h *h, hive_node_h node); - -Delete the node C. All values at the node and all subnodes are -deleted (recursively). The C handle and the handles of all -subnodes become invalid. You cannot delete the root node. - -Returns 0 on success. On error this returns -1 and sets errno. - -=item hive_set_value - -The typedef C is used in conjunction with the -C call described below. - - struct hive_set_value { - char *key; /* key - a UTF-8 encoded ASCIIZ string */ - hive_type t; /* type of value field */ - size_t len; /* length of value field in bytes */ - char *value; /* value field */ - }; - typedef struct hive_set_value hive_set_value; - -To set the default value for a node, you have to pass C. - -Note that the C field is just treated as a list of bytes, and -is stored directly in the hive. The caller has to ensure correct -encoding and endianness, for example converting dwords to little -endian. - -The correct type and encoding for values depends on the node and key -in the registry, the version of Windows, and sometimes even changes -between versions of Windows for the same key. We don't document it -here. Often it's not documented at all. - -=item int hivex_node_set_values (hive_h *h, hive_node_h node, size_t nr_values, const hive_set_value *values, int flags); - -This call can be used to set all the (key, value) pairs stored in C. - -C is the node to modify. C is an array of (key, value) -pairs. There should be C elements in this array. C -is not used, always pass 0. - -Any existing values stored at the node are discarded, and their -C handles become invalid. Thus you can remove all -values stored at C by passing C. - -Returns 0 on success. On error this returns -1 and sets errno. - -Note that this library does not offer a way to modify just a single -key at a node. We don't implement a way to do this efficiently. - -=back - -=head3 WRITE OPERATIONS WHICH ARE NOT SUPPORTED - -=over 4 - -=item * - -Changing the root node. - -=item * - -Creating a new hive file from scratch. This is impossible at present -because not all fields in the header are understood. - -=item * - -Modifying or deleting single values at a node. - -=item * - -Modifying security key (sk) records or classnames. -Previously we did not understand these records. However now they -are well-understood and we could add support if it was required -(but nothing much really uses them). - -=back - -=head1 THE STRUCTURE OF THE WINDOWS REGISTRY - -Note: To understand the relationship between hives and the common -Windows Registry keys (like C) please see the -Wikipedia page on the Windows Registry. - -The Windows Registry is split across various binary files, each -file being known as a "hive". This library only handles a single -hive file at a time. - -Hives are n-ary trees with a single root. Each node in the tree -has a name. - -Each node in the tree (including non-leaf nodes) may have an -arbitrary list of (key, value) pairs attached to it. It may -be the case that one of these pairs has an empty key. This -is referred to as the default key for the node. - -The (key, value) pairs are the place where the useful data is -stored in the registry. The key is always a string (possibly the -empty string for the default key). The value is a typed object -(eg. string, int32, binary, etc.). - -=head2 RELATIONSHIP TO .REG FILES - -Although this library does not care about or deal with Windows reg -files, it's useful to look at the relationship between the registry -itself and reg files because they are so common. - -A reg file is a text representation of the registry, or part of the -registry. The actual registry hives that Windows uses are binary -files. There are a number of Windows and Linux tools that let you -generate reg files, or merge reg files back into the registry hives. -Notable amongst them is Microsoft's REGEDIT program (formerly known as -REGEDT32). - -A typical reg file will contain many sections looking like this: - - [HKEY_LOCAL_MACHINE\SOFTWARE\Classes\Stack] - "@"="Generic Stack" - "TileInfo"="prop:System.FileCount" - "TilePath"=str(2):"%systemroot%\\system32" - "ThumbnailCutoff"=dword:00000000 - "FriendlyTypeName"=hex(2):40,00,25,00,53,00,79,00,73,00,74,00,65,00,6d,00,52,00,6f,00,\ - 6f,00,74,00,25,00,5c,00,53,00,79,00,73,00,74,00,65,00,6d,00,\ - 33,00,32,00,5c,00,73,00,65,00,61,00,72,00,63,00,68,00,66,00,\ - 6f,00,6c,00,64,00,65,00,72,00,2e,00,64,00,6c,00,6c,00,2c,00,\ - 2d,00,39,00,30,00,32,00,38,00,00,00,d8 - -Taking this one piece at a time: - - [HKEY_LOCAL_MACHINE\SOFTWARE\Classes\Stack] - -This is the path to this node in the registry tree. The first part, -C means that this comes from a hive -(file) called C. C<\Classes\Stack> is the real path part, -starting at the root node of the C hive. - -Below the node name is a list of zero or more key-value pairs. Any -interior or leaf node in the registry may have key-value pairs -attached. - - "@"="Generic Stack" - -This is the "default key". In reality (ie. inside the binary hive) -the key string is the empty string. In reg files this is written as -C<@> but this has no meaning either in the hives themselves or in this -library. The value is a string (type 1 - see C -above). - - "TileInfo"="prop:System.FileCount" - -This is a regular (key, value) pair, with the value being a type 1 -string. Note that inside the binary file the string is likely to be -UTF-16 encoded. This library converts to and from UTF-8 strings -transparently. - - "TilePath"=str(2):"%systemroot%\\system32" - -The value in this case has type 2 (expanded string) meaning that some -%...% variables get expanded by Windows. (This library doesn't know -or care about variable expansion). - - "ThumbnailCutoff"=dword:00000000 - -The value in this case is a dword (type 4). - - "FriendlyTypeName"=hex(2):40,00,.... - -This value is an expanded string (type 2) represented in the reg file -as a series of hex bytes. In this case the string appears to be a -UTF-16 string. - -=head1 NOTE ON THE USE OF ERRNO - -Many functions in this library set errno to indicate errors. These -are the values of errno you may encounter (this list is not -exhaustive): - -=over 4 - -=item ENOTSUP - -Corrupt or unsupported Registry file format. - -=item ENOKEY - -Missing root key. - -=item EINVAL - -Passed an invalid argument to the function. - -=item EFAULT - -Followed a Registry pointer which goes outside -the registry or outside a registry block. - -=item ELOOP - -Registry contains cycles. - -=item ERANGE - -Field in the registry out of range. - -=item EEXIST - -Registry key already exists. - -=item EROFS - -Tried to write to a registry which is not opened for writing. - -=back - -=head1 ENVIRONMENT VARIABLES - -=over 4 - -=item HIVEX_DEBUG - -Setting HIVEX_DEBUG=1 will enable very verbose messages. This is -useful for debugging problems with the library itself. - -=back - -=head1 SEE ALSO - -L, -L, -L, -L, -L, -L, -L, -L. - -=head1 AUTHORS - -Richard W.M. Jones (C) - -=head1 COPYRIGHT - -Copyright (C) 2009-2010 Red Hat Inc. - -Derived from code by Petter Nordahl-Hagen under a compatible license: -Copyright (C) 1997-2007 Petter Nordahl-Hagen. - -Derived from code by Markus Stephany under a compatible license: -Copyright (C) 2000-2004 Markus Stephany. - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; -version 2.1 of the License. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -See file LICENSE for the full license.