From 754669441d3dccc339e9db0fb590f6ebe92b09d9 Mon Sep 17 00:00:00 2001 From: "rjones@intel.home.annexia.org" Date: Mon, 12 May 2008 21:52:04 +0100 Subject: [PATCH 1/1] Runlist parsing, now working. --- lib/diskimage_ntfs.ml | 232 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 180 insertions(+), 52 deletions(-) diff --git a/lib/diskimage_ntfs.ml b/lib/diskimage_ntfs.ml index 4e950e3..ea077af 100644 --- a/lib/diskimage_ntfs.ml +++ b/lib/diskimage_ntfs.ml @@ -100,51 +100,52 @@ and parse_mft dev mft_lcn mft_size = let bits = dev#read_bitstring mft_lcn mft_size in (* ... and turn the MFT into records. *) - let rec loop bits = - if Bitmatch.bitstring_length bits > 0 then ( - bitmatch bits with - | { "FILE" : 32 : string; - (* Assume 3 USAs starting at offset 0x30. XXX? *) - 0x30 : 16 : littleendian; - 0x03 : 16 : littleendian; - _ : 64; (* lsn *) - _ : 16; (* sequence_number *) - _ : 16; (* link_count *) - _ : 16; (* attrs_offset *) - _ : 16; (* MFT_RECORD_FLAGS *) - bytes_in_use : 32 : littleendian; - record_size : 32 : littleendian; - _ : 64; (* base_mft_record *) - _ : 16; (* next_attr_instance *) - _ : 16; (* reserved *) - _ : 32; (* mft_record_number *) - _ : 64; (* USN, 3 * USAs -- see above. *) - - (* The attributes. Subtract header size (0x30 bytes) - * and space for the USN/USAs (8 bytes). - *) - attrs : (Int32.to_int record_size - 0x30 - 8)*8 : bitstring; - - (* Subsequent MFT records: *) - rest : -1 : bitstring } -> - - if !debug then - eprintf "got an MFT record, now parsing attributes ...\n%!"; - - let attrs = parse_attrs attrs in - - loop rest (* loop rest of MFT records *) - - (* Just assume that the end of the list of MFT records - * is marked by all zeroes. This seems to be the - * case, but not sure if it is generally true. - * XXX? + let records = parse_mft_records bits in + records + +and parse_mft_records bits = + bitmatch bits with + | { "FILE" : 32 : string; + (* Assume 3 USAs starting at offset 0x30. XXX? *) + 0x30 : 16 : littleendian; + 0x03 : 16 : littleendian; + _ : 64; (* lsn *) + _ : 16; (* sequence_number *) + _ : 16; (* link_count *) + _ : 16; (* attrs_offset *) + _ : 16; (* MFT_RECORD_FLAGS *) + bytes_in_use : 32 : littleendian; + record_size : 32 : littleendian; + _ : 64; (* base_mft_record *) + _ : 16; (* next_attr_instance *) + _ : 16; (* reserved *) + _ : 32; (* mft_record_number *) + _ : 64; (* USN, 3 * USAs -- see above. *) + + (* The attributes. Subtract header size (0x30 bytes) + * and space for the USN/USAs (8 bytes). *) - | { 0x00000000_l : 32 } -> - () - ) in - let mft_records = loop bits in - mft_records + attrs : (Int32.to_int record_size - 0x30 - 8)*8 : bitstring; + + (* Subsequent MFT records: *) + rest : -1 : bitstring } -> + + if !debug then + eprintf "got an MFT record, now parsing attributes ...\n%!"; + + let attrs = parse_attrs attrs in + + parse_mft_records rest (* loop rest of MFT records *) + + (* Just assume that the end of the list of MFT records + * is marked by all zeroes. This seems to be the + * case, but not sure if it is generally true. + * XXX? + *) + | { 0x00000000_l : 32 } -> + () + + | { _ } -> () and parse_attrs attrs = (* Parse the MFT record attributes. *) @@ -167,18 +168,69 @@ and parse_attrs attrs = | { attr_type : 32 : littleendian; attr_size : 32 : littleendian; 1 : 8; (* non-resident attribute *) - pad : (Int32.to_int attr_size - 9) * 8 : bitstring; + 0 : 8; (* name length, assume unnamed *) + _ : 16; (* name offset *) + _ : 16; (* flags *) + _ : 16; (* instance number *) + 0L : 64 : littleendian; (* lowest VCN, assume single extent *) + highest_vcn : 64 : littleendian; (* size in clusters - 1 *) + 0x40 : 16 : littleendian; (* mapping pairs offset *) + 0 : 8; (* assume not compressed *) + pad : 40 : bitstring; (* padding *) + allocated_size : 64 : littleendian; (* allocate size on disk *) + data_size : 64 : littleendian; (* byte size of the attribute *) + initialized_size : 64 : littleendian; + + (* Table of virtual clusters to logical clusters. *) + mapping_pairs : (Int32.to_int attr_size - 0x40) * 8 : bitstring; + rest : -1 : bitstring } -> - if !debug then - eprintf "cannot parse non-resident attr %lx\n%!" attr_type; + + (* XXX let attr = *) + parse_nonresident_attr attr_type highest_vcn + allocated_size data_size initialized_size + mapping_pairs; + parse_attrs rest + (* Not matched above, so we don't know how to parse this attribute, but + * there is still enough information to skip to the next one. + *) + | { attr_type : 32 : littleendian; + attr_size : 32 : littleendian; + pad : (Int32.to_int attr_size - 8) * 8 : bitstring; + rest : -1 : bitstring } -> + + if !debug then ( + eprintf "cannot parse MFT attribute entry\n%!"; + Bitmatch.hexdump_bitstring Pervasives.stderr attrs + ); + + parse_attrs rest + + (* Otherwise unparsable & unskippable attribute entry. *) | { _ } -> if !debug then eprintf "corrupt MFT attribute entry\n%!" and parse_resident_attr attr_type attr = match attr_type with + | 0x10_l -> (* AT_STANDARD_INFORMATION *) + (bitmatch attr with + | { creation_time : 64; + last_data_change_time : 64; + last_mft_change_time : 64; + last_access_time : 64 + (* other stuff follows, just ignore it *) } -> + if !debug then + eprintf "creation time: %Lx, last_access_time: %Lx\n" + creation_time last_access_time + + | { _ } -> + if !debug then + eprintf "cannot parse AT_STANDARD_INFORMATION\n%!" + ); + | 0x30_l -> (* AT_FILE_NAME *) (bitmatch attr with | { _ : 64; (* parent directory ref *) @@ -196,18 +248,94 @@ and parse_resident_attr attr_type attr = let name = ucs2_to_utf8 name name_len in if !debug then - eprintf "filename: %s (size: %Ld bytes)\n" - name data_size + eprintf "filename: %s (size: %Ld bytes)\n" name data_size | { _ } -> if !debug then - eprintf "cannot parse AT_FILE_NAME\n%!"; + eprintf "cannot parse AT_FILE_NAME\n%!" ); | _ -> (* unknown attribute - just ignore *) if !debug then - eprintf "unknown resident attribute %lx\n%!" - attr_type + eprintf "unknown resident attribute %lx\n%!" attr_type + +and parse_nonresident_attr attr_type highest_vcn + allocated_size data_size initialized_size + mapping_pairs = + match attr_type with + | 0x80_l -> (* AT_DATA, ie. the $Data stream *) + if !debug then ( + eprintf "AT_DATA: size = %Ld bytes, highest_vcn = 0x%Lx\n" + data_size highest_vcn; + Bitmatch.hexdump_bitstring Pervasives.stderr mapping_pairs + ); + + let lowest_vcn = ~^0 (* see assumption above *) in + let runlist = parse_runlist lowest_vcn ~^0 mapping_pairs in + if !debug then ( + eprintf "AT_DATA: runlist is:\n"; + List.iter ( + function + | ((vcn, deltavcn), Some lcn) -> + eprintf "\tVCNs %s..%s -> LCN %s\n" + (Int63.to_string vcn) (Int63.to_string (vcn +^ deltavcn -^ ~^1)) + (Int63.to_string lcn) + | ((vcn, deltavcn), None) -> + eprintf "\tVCNs %s..%s -> sparse hole\n" + (Int63.to_string vcn) (Int63.to_string (vcn +^ deltavcn -^ ~^1)) + ) runlist + ); + + | _ -> + if !debug then + eprintf "unknown non-resident attribute %lx\n%!" attr_type + +(* mapping_pairs is not straightforward and not documented well. See + * ntfsprogs libntfs/runlist.c:ntfs_mapping_pairs_decompress + *) +and parse_runlist vcn lcn bits = + bitmatch bits with + | { 0 : 8 } -> (* end of table *) + [] + + | { 0 : 4; + vcnlen : 4; + deltavcn : vcnlen * 8 : littleendian; + rest : -1 : bitstring + } when vcnlen >= 1 && vcnlen <= 4 -> + + let deltavcn = Int63.of_int64 deltavcn in + + (* This is a sparse file hole. *) + ((vcn, deltavcn), None) :: + parse_runlist (vcn +^ deltavcn) lcn rest + + | { (* Really these fields are signed, but we'll just limit it to + * sensible values in the when clause instead. + *) + lcnlen : 4; + vcnlen : 4; + deltavcn : vcnlen * 8 : littleendian; + deltalcn : lcnlen * 8 : littleendian; + rest : -1 : bitstring + } when (vcnlen >= 1 && vcnlen <= 4) && (lcnlen >= 1 || lcnlen <= 4) -> + + let deltavcn = Int63.of_int64 deltavcn in + let deltalcn = Int63.of_int64 deltalcn in (* XXX signed *) + + let lcn = lcn +^ deltalcn in + + eprintf "lcnlen = %d, vcnlen = %d\n" lcnlen vcnlen; + + ((vcn, deltavcn), Some lcn) :: + parse_runlist (vcn +^ deltavcn) lcn rest + + | { _ } -> + if !debug then ( + eprintf "unknown field in the runlist\n%!"; + Bitmatch.hexdump_bitstring Pervasives.stderr bits + ); + [] (* Poor man's little-endian UCS-2 to UTF-8 conversion. * XXX Should use Camomile. -- 1.8.3.1