From: Richard Jones Date: Mon, 12 Jul 2010 14:06:35 +0000 (+0100) Subject: Don't try to process junk after a string value as UTF-16. X-Git-Tag: 1.2.3~4 X-Git-Url: http://git.annexia.org/?a=commitdiff_plain;h=b71b88f588f8660935a7d462e97b84aa2d669249;p=hivex.git Don't try to process junk after a string value as UTF-16. Thanks to Hilko Bengen for characterizing the issue and providing an initial version of this patch. --- diff --git a/lib/hivex.c b/lib/hivex.c index 2b22924..13d7556 100644 --- a/lib/hivex.c +++ b/lib/hivex.c @@ -62,6 +62,8 @@ #define HIVEX_MAX_ALLOCATION 1000000 static char *windows_utf16_to_utf8 (/* const */ char *input, size_t len); +static size_t utf16_string_len_in_bytes (const char *str); +static size_t utf16_string_len_in_bytes_max (const char *str, size_t len); struct hive_h { char *filename; @@ -1319,6 +1321,20 @@ hivex_value_string (hive_h *h, hive_value_h value) return NULL; } + /* Deal with the case where Windows has allocated a large buffer + * full of random junk, and only the first few bytes of the buffer + * contain a genuine UTF-16 string. + * + * In this case, iconv would try to process the junk bytes as UTF-16 + * and inevitably find an illegal sequence (EILSEQ). Instead, stop + * after we find the first \0\0. + * + * (Found by Hilko Bengen in a fresh Windows XP SOFTWARE hive). + */ + size_t slen = utf16_string_len_in_bytes_max (data, len); + if (slen > len) + len = slen; + char *ret = windows_utf16_to_utf8 (data, len); free (data); if (ret == NULL) @@ -1355,6 +1371,21 @@ utf16_string_len_in_bytes (const char *str) return ret; } +/* As for utf16_string_len_in_bytes but only read up to a maximum length. */ +static size_t +utf16_string_len_in_bytes_max (const char *str, size_t len) +{ + size_t ret = 0; + + while (len > 0 && (str[0] || str[1])) { + str += 2; + ret += 2; + len -= 2; + } + + return ret; +} + /* http://blogs.msdn.com/oldnewthing/archive/2009/10/08/9904646.aspx */ char ** hivex_value_multiple_strings (hive_h *h, hive_value_h value)