let bits = dev#read_bitstring mft_lcn mft_size in
(* ... and turn the MFT into records. *)
- let rec loop bits =
- if Bitmatch.bitstring_length bits > 0 then (
- bitmatch bits with
- | { "FILE" : 32 : string;
- (* Assume 3 USAs starting at offset 0x30. XXX? *)
- 0x30 : 16 : littleendian;
- 0x03 : 16 : littleendian;
- _ : 64; (* lsn *)
- _ : 16; (* sequence_number *)
- _ : 16; (* link_count *)
- _ : 16; (* attrs_offset *)
- _ : 16; (* MFT_RECORD_FLAGS *)
- bytes_in_use : 32 : littleendian;
- record_size : 32 : littleendian;
- _ : 64; (* base_mft_record *)
- _ : 16; (* next_attr_instance *)
- _ : 16; (* reserved *)
- _ : 32; (* mft_record_number *)
- _ : 64; (* USN, 3 * USAs -- see above. *)
-
- (* The attributes. Subtract header size (0x30 bytes)
- * and space for the USN/USAs (8 bytes).
- *)
- attrs : (Int32.to_int record_size - 0x30 - 8)*8 : bitstring;
-
- (* Subsequent MFT records: *)
- rest : -1 : bitstring } ->
-
- if !debug then
- eprintf "got an MFT record, now parsing attributes ...\n%!";
-
- let attrs = parse_attrs attrs in
-
- loop rest (* loop rest of MFT records *)
-
- (* Just assume that the end of the list of MFT records
- * is marked by all zeroes. This seems to be the
- * case, but not sure if it is generally true.
- * XXX?
+ let records = parse_mft_records bits in
+ records
+
+and parse_mft_records bits =
+ bitmatch bits with
+ | { "FILE" : 32 : string;
+ (* Assume 3 USAs starting at offset 0x30. XXX? *)
+ 0x30 : 16 : littleendian;
+ 0x03 : 16 : littleendian;
+ _ : 64; (* lsn *)
+ _ : 16; (* sequence_number *)
+ _ : 16; (* link_count *)
+ _ : 16; (* attrs_offset *)
+ _ : 16; (* MFT_RECORD_FLAGS *)
+ bytes_in_use : 32 : littleendian;
+ record_size : 32 : littleendian;
+ _ : 64; (* base_mft_record *)
+ _ : 16; (* next_attr_instance *)
+ _ : 16; (* reserved *)
+ _ : 32; (* mft_record_number *)
+ _ : 64; (* USN, 3 * USAs -- see above. *)
+
+ (* The attributes. Subtract header size (0x30 bytes)
+ * and space for the USN/USAs (8 bytes).
*)
- | { 0x00000000_l : 32 } ->
- ()
- ) in
- let mft_records = loop bits in
- mft_records
+ attrs : (Int32.to_int record_size - 0x30 - 8)*8 : bitstring;
+
+ (* Subsequent MFT records: *)
+ rest : -1 : bitstring } ->
+
+ if !debug then
+ eprintf "got an MFT record, now parsing attributes ...\n%!";
+
+ let attrs = parse_attrs attrs in
+
+ parse_mft_records rest (* loop rest of MFT records *)
+
+ (* Just assume that the end of the list of MFT records
+ * is marked by all zeroes. This seems to be the
+ * case, but not sure if it is generally true.
+ * XXX?
+ *)
+ | { 0x00000000_l : 32 } ->
+ ()
+
+ | { _ } -> ()
and parse_attrs attrs =
(* Parse the MFT record attributes. *)
| { attr_type : 32 : littleendian;
attr_size : 32 : littleendian;
1 : 8; (* non-resident attribute *)
- pad : (Int32.to_int attr_size - 9) * 8 : bitstring;
+ 0 : 8; (* name length, assume unnamed *)
+ _ : 16; (* name offset *)
+ _ : 16; (* flags *)
+ _ : 16; (* instance number *)
+ 0L : 64 : littleendian; (* lowest VCN, assume single extent *)
+ highest_vcn : 64 : littleendian; (* size in clusters - 1 *)
+ 0x40 : 16 : littleendian; (* mapping pairs offset *)
+ 0 : 8; (* assume not compressed *)
+ pad : 40 : bitstring; (* padding *)
+ allocated_size : 64 : littleendian; (* allocate size on disk *)
+ data_size : 64 : littleendian; (* byte size of the attribute *)
+ initialized_size : 64 : littleendian;
+
+ (* Table of virtual clusters to logical clusters. *)
+ mapping_pairs : (Int32.to_int attr_size - 0x40) * 8 : bitstring;
+
rest : -1 : bitstring } ->
- if !debug then
- eprintf "cannot parse non-resident attr %lx\n%!" attr_type;
+
+ (* XXX let attr = *)
+ parse_nonresident_attr attr_type highest_vcn
+ allocated_size data_size initialized_size
+ mapping_pairs;
+
parse_attrs rest
+ (* Not matched above, so we don't know how to parse this attribute, but
+ * there is still enough information to skip to the next one.
+ *)
+ | { attr_type : 32 : littleendian;
+ attr_size : 32 : littleendian;
+ pad : (Int32.to_int attr_size - 8) * 8 : bitstring;
+ rest : -1 : bitstring } ->
+
+ if !debug then (
+ eprintf "cannot parse MFT attribute entry\n%!";
+ Bitmatch.hexdump_bitstring Pervasives.stderr attrs
+ );
+
+ parse_attrs rest
+
+ (* Otherwise unparsable & unskippable attribute entry. *)
| { _ } ->
if !debug then
eprintf "corrupt MFT attribute entry\n%!"
and parse_resident_attr attr_type attr =
match attr_type with
+ | 0x10_l -> (* AT_STANDARD_INFORMATION *)
+ (bitmatch attr with
+ | { creation_time : 64;
+ last_data_change_time : 64;
+ last_mft_change_time : 64;
+ last_access_time : 64
+ (* other stuff follows, just ignore it *) } ->
+ if !debug then
+ eprintf "creation time: %Lx, last_access_time: %Lx\n"
+ creation_time last_access_time
+
+ | { _ } ->
+ if !debug then
+ eprintf "cannot parse AT_STANDARD_INFORMATION\n%!"
+ );
+
| 0x30_l -> (* AT_FILE_NAME *)
(bitmatch attr with
| { _ : 64; (* parent directory ref *)
let name = ucs2_to_utf8 name name_len in
if !debug then
- eprintf "filename: %s (size: %Ld bytes)\n"
- name data_size
+ eprintf "filename: %s (size: %Ld bytes)\n" name data_size
| { _ } ->
if !debug then
- eprintf "cannot parse AT_FILE_NAME\n%!";
+ eprintf "cannot parse AT_FILE_NAME\n%!"
);
| _ -> (* unknown attribute - just ignore *)
if !debug then
- eprintf "unknown resident attribute %lx\n%!"
- attr_type
+ eprintf "unknown resident attribute %lx\n%!" attr_type
+
+and parse_nonresident_attr attr_type highest_vcn
+ allocated_size data_size initialized_size
+ mapping_pairs =
+ match attr_type with
+ | 0x80_l -> (* AT_DATA, ie. the $Data stream *)
+ if !debug then (
+ eprintf "AT_DATA: size = %Ld bytes, highest_vcn = 0x%Lx\n"
+ data_size highest_vcn;
+ Bitmatch.hexdump_bitstring Pervasives.stderr mapping_pairs
+ );
+
+ let lowest_vcn = ~^0 (* see assumption above *) in
+ let runlist = parse_runlist lowest_vcn ~^0 mapping_pairs in
+ if !debug then (
+ eprintf "AT_DATA: runlist is:\n";
+ List.iter (
+ function
+ | ((vcn, deltavcn), Some lcn) ->
+ eprintf "\tVCNs %s..%s -> LCN %s\n"
+ (Int63.to_string vcn) (Int63.to_string (vcn +^ deltavcn -^ ~^1))
+ (Int63.to_string lcn)
+ | ((vcn, deltavcn), None) ->
+ eprintf "\tVCNs %s..%s -> sparse hole\n"
+ (Int63.to_string vcn) (Int63.to_string (vcn +^ deltavcn -^ ~^1))
+ ) runlist
+ );
+
+ | _ ->
+ if !debug then
+ eprintf "unknown non-resident attribute %lx\n%!" attr_type
+
+(* mapping_pairs is not straightforward and not documented well. See
+ * ntfsprogs libntfs/runlist.c:ntfs_mapping_pairs_decompress
+ *)
+and parse_runlist vcn lcn bits =
+ bitmatch bits with
+ | { 0 : 8 } -> (* end of table *)
+ []
+
+ | { 0 : 4;
+ vcnlen : 4;
+ deltavcn : vcnlen * 8 : littleendian;
+ rest : -1 : bitstring
+ } when vcnlen >= 1 && vcnlen <= 4 ->
+
+ let deltavcn = Int63.of_int64 deltavcn in
+
+ (* This is a sparse file hole. *)
+ ((vcn, deltavcn), None) ::
+ parse_runlist (vcn +^ deltavcn) lcn rest
+
+ | { (* Really these fields are signed, but we'll just limit it to
+ * sensible values in the when clause instead.
+ *)
+ lcnlen : 4;
+ vcnlen : 4;
+ deltavcn : vcnlen * 8 : littleendian;
+ deltalcn : lcnlen * 8 : littleendian;
+ rest : -1 : bitstring
+ } when (vcnlen >= 1 && vcnlen <= 4) && (lcnlen >= 1 || lcnlen <= 4) ->
+
+ let deltavcn = Int63.of_int64 deltavcn in
+ let deltalcn = Int63.of_int64 deltalcn in (* XXX signed *)
+
+ let lcn = lcn +^ deltalcn in
+
+ eprintf "lcnlen = %d, vcnlen = %d\n" lcnlen vcnlen;
+
+ ((vcn, deltavcn), Some lcn) ::
+ parse_runlist (vcn +^ deltavcn) lcn rest
+
+ | { _ } ->
+ if !debug then (
+ eprintf "unknown field in the runlist\n%!";
+ Bitmatch.hexdump_bitstring Pervasives.stderr bits
+ );
+ []
(* Poor man's little-endian UCS-2 to UTF-8 conversion.
* XXX Should use Camomile.