# Win::Hivex::Regedit
-# Copyright (C) 2009-2010 Red Hat Inc.
+# Copyright (C) 2009-2011 Red Hat Inc.
# Derived from code by Petter Nordahl-Hagen under a compatible license:
# Copyright (c) 1997-2007 Petter Nordahl-Hagen.
# Derived from code by Markus Stephany under a compatible license:
use warnings;
use Carp qw(croak confess);
-use Encode qw(encode);
+use Encode qw(encode decode);
require Exporter;
=head2 reg_export
- reg_export ($h, $key, $fh, [prefix => $prefix]);
+ reg_export ($h, $key, $fh,
+ [prefix => $prefix],
+ [unsafe_printable_strings => 1]);
This function exports the registry keys starting at the root
C<$key> and recursively downwards into the file handle C<$fh>.
"Key 1"=...
"Key 2"=...
-The output is written as pure 7 bit ASCII, with line endings which are
-the default for the local host. You may need to convert the file's
-encoding using L<iconv(1)> and line endings using L<unix2dos(1)> if
-sending to a Windows user. Strings are always encoded as hex bytes.
-See L</ENCODING STRINGS> below.
+If C<unsafe_printable_strings> is not given or is false, then the
+output is written as pure 7 bit ASCII, with line endings which are the
+default for the local host. Strings are always encoded as hex bytes.
+This is safe because it preserves the original content and encoding of
+strings. See L</ENCODING STRINGS> below.
+
+If C<unsafe_printable_strings> is true, then strings are assumed to be
+UTF-16LE and are converted to UTF-8 for output. The final zero
+codepoint in the string is removed if there is one. This is unsafe
+because it does not preserve the fidelity of the strings in the
+Registry and because the content type of strings is not always
+UTF-16LE. However it is useful if you just want to display strings
+for quick hacking and debugging.
+
+You may need to convert the file's encoding using L<iconv(1)> and line
+endings using L<unix2dos(1)> if sending to a Windows user.
Nodes and keys are sorted alphabetically in the output.
print $fh $path;
print $fh "]\n";
+ my $unsafe_printable_strings = $params{unsafe_printable_strings};
+
# Get the values.
my @values = $h->node_values ($node);
if ($type eq 4 && length ($data) == 4) { # only handle dword specially
my $dword = unpack ("V", $data);
printf $fh "dword:%08x\n", $dword
+ } elsif ($unsafe_printable_strings && ($type eq 1 || $type eq 2)) {
+ # Guess that the encoding is UTF-16LE. Convert it to UTF-8
+ # for printing.
+ $data = decode ("utf16le", $data);
+ $data =~ s/\x{0}$//; # remove final zero codepoint
+ $data =~ s/"/\\"/g; # XXX more quoting needed?
+ printf $fh "str(%x):\"%s\"\n", $type, $data;
} else {
# Encode everything else as hex, see encoding section below.
printf $fh "hex(%x):", $type;
=head1 COPYRIGHT
-Copyright (C) 2010 Red Hat Inc.
+Copyright (C) 2010-2011 Red Hat Inc.
=head1 LICENSE
#!/usr/bin/perl -w
-# Copyright (C) 2010 Red Hat Inc.
+# Copyright (C) 2010-2011 Red Hat Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
The default is to use UTF-16LE, which should work with recent versions
of Windows.
+=cut
+
+my $unsafe_printable_strings;
+
+=item B<--unsafe-printable-strings>
+
+When exporting (only), assume strings are UTF-16LE and print them as
+strings instead of hex sequences. Remove the final zero codepoint
+from strings if present.
+
+This is unsafe and does not preserve the fidelity of strings in the
+original hive for various reasons:
+
+=over 4
+
+=item *
+
+Assumes the original encoding is UTF-16LE. ASCII strings and strings
+in other encodings will be corrupted by this transformation.
+
+=item *
+
+Assumes that everything which has type 1 or 2 is really a string
+and that everything else is not a string, but the type field in
+real hives is not reliable.
+
+=item *
+
+Loses information about whether a zero codepoint followed the string
+in the hive or not.
+
+=back
+
+This all happens because the hive itself contains no information about
+how strings are encoded (see
+L<Win::Hivex::Regedit(3)/ENCODING STRINGS>).
+
+You should only use this option for quick hacking and debugging of the
+hive contents, and I<never> use it if the output is going to be passed
+into another program or stored in another hive.
+
=back
=cut
"export" => \$export,
"prefix=s" => \$prefix,
"encoding=s" => \$encoding,
+ "unsafe-printable-strings" => \$unsafe_printable_strings,
) or pod2usage (2);
pod2usage (1) if $help;
print "Windows Registry Editor Version 5.00\n\n";
- reg_export ($h, $key, \*STDOUT, prefix => $prefix);
+ reg_export ($h, $key, \*STDOUT,
+ prefix => $prefix,
+ unsafe_printable_strings => $unsafe_printable_strings);
}
=head1 SEE ALSO