X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=lib%2Fdiskimage_ntfs.ml;h=0c4974c0b436632f1a9ec17e07cb03ae8aaf7be8;hb=dfe6b5dddb13624323c5883381dedd34ad4a4663;hp=6c1aafbba497dadd2d7d9d8d6f446b48dcd9b0e6;hpb=554b96246db7b4db7972a0aa50eb33d4d6039ab6;p=virt-df.git diff --git a/lib/diskimage_ntfs.ml b/lib/diskimage_ntfs.ml index 6c1aafb..0c4974c 100644 --- a/lib/diskimage_ntfs.ml +++ b/lib/diskimage_ntfs.ml @@ -25,14 +25,51 @@ open Printf open Diskimage_impl open Int63.Operators +let id = "ntfs" + +(* Type of the private data, basically all the metadata that we + * read from the NTFS volume. + *) +type ntfs_fs = { + ntfs_dev : device; (* Device. *) + ntfs_blocksize : int63; (* Blocksize (cluster size) *) + ntfs_mft_lcn : int63; (* MFT location (bytes) *) + ntfs_mft_size : int63; (* MFT size (bytes) *) + ntfs_mft_records : ntfs_mft_record list; (* Files in MFT *) +} +and ntfs_mft_record = { + ntfs_filename : ntfs_filename option; (* Filename, if present. *) + ntfs_info : ntfs_info option; (* Standard information, if present. *) + ntfs_data : ntfs_data option; (* $Data stream, if present. *) +} +and ntfs_filename = { + ntfs_name : string; (* Filename (UTF-8 encoded). *) +} +and ntfs_info = { + ntfs_creation_time : int64; + ntfs_last_data_change_time : int64; + ntfs_last_mft_change_time : int64; + ntfs_last_access_time : int64; +} +and ntfs_data = { + ntfs_data_size : int63; (* Actual size of data. *) + ntfs_runlist : ntfs_runentry list; (* Runlist. *) +} +and ntfs_runentry = + (* VCN start,size => LCN / None if sparse hole *) + (int63 * int63) * int63 option + (* Private data functions. *) let attach_private_data, get_private_data = private_data_functions (fun {fs_cb = {fs_cb_uq = u}} -> u) -let id = "ntfs" - +(* Probe for an NTFS filesystem on this device. *) let rec probe dev = - (* Load the boot sector. *) + let fs = probe_superblock dev in + fs + +and probe_superblock dev = + (* Load the boot sector / superblock. *) let bits = dev#read_bitstring ~^0 ~^512 in (* Most of this data comes from ntfsprogs' layout.h header file. *) @@ -64,7 +101,7 @@ let rec probe dev = _ : 24; volume_serial_number : 64 : littleendian; checksum : 32 : littleendian; (* Boot sector checksum. *) - code : 8 * 426 : bitstring; (* Boot code. *) + _ : 8 * 426 : bitstring; (* Boot code. *) 0x55AA : 16 } -> (* End of bootsector magic. *) let blocksize = bytes_per_sector * sectors_per_cluster in @@ -74,127 +111,312 @@ let rec probe dev = dev#name blocksize volume_serial_number; let blocksize = Int63.of_int blocksize in + let number_of_sectors = Int63.of_int64 number_of_sectors in + let bytes_per_sector = Int63.of_int bytes_per_sector in + + (* The blocksize of the filesystem is likely to be quite different + * from that of the underlying device, so create an overlay device + * with the natural filesystem blocksize. + *) + let fs_dev = new blocksize_overlay blocksize dev in + + (* Get the location and size of the Master File Table. *) let mft_lcn = Int63.of_int64 mft_lcn *^ blocksize in let mft_size = Int63.of_int clusters_per_mft_record *^ blocksize in - (* Read the whole of the MFT. *) - let bits = dev#read_bitstring mft_lcn mft_size in - (* ... and turn the MFT into records. *) - let rec loop bits = - if Bitmatch.bitstring_length bits > 0 then ( - bitmatch bits with - | { "FILE" : 32 : string; - (* Assume 3 USAs starting at offset 0x30. XXX? *) - 0x30 : 16 : littleendian; - 0x03 : 16 : littleendian; - _ : 64; (* lsn *) - _ : 16; (* sequence_number *) - _ : 16; (* link_count *) - _ : 16; (* attrs_offset *) - _ : 16; (* MFT_RECORD_FLAGS *) - bytes_in_use : 32 : littleendian; - record_size : 32 : littleendian; - _ : 64; (* base_mft_record *) - _ : 16; (* next_attr_instance *) - _ : 16; (* reserved *) - _ : 32; (* mft_record_number *) - _ : 64; (* USN, 3 * USAs -- see above. *) - - (* The attributes. Subtract header size (0x30 bytes) - * and space for the USN/USAs (8 bytes). - *) - attrs : (Int32.to_int record_size - 0x30 - 8)*8 : bitstring; - - (* Subsequent MFT records: *) - rest : -1 : bitstring } -> - - if !debug then - eprintf "got an MFT record, now parsing attributes ...\n%!"; - - (* Parse the MFT record attributes. *) - let rec loop2 attrs = - bitmatch attrs with - | { 0xFFFFFFFF_l : 32 : littleendian } -> (* AT_END *) - if !debug then - eprintf "found AT_END, end of attributes\n%!"; - () - - | { attr_type : 32 : littleendian; - attr_size : 32 : littleendian; - 0 : 8; (* means attribute is resident *) - pad : 24*8 - 8 - 64 : bitstring; (* actually meaningful *) - attr : (Int32.to_int attr_size - 24) * 8 : bitstring; - rest : -1 : bitstring } -> - - (match attr_type with - | 0x30_l -> (* AT_FILE_NAME *) - (bitmatch attr with - | { _ : 64; (* parent directory ref *) - _ : 64; (* creation time *) - _ : 64; (* last change time *) - _ : 64; (* last MFT change time *) - _ : 64; (* last access time *) - allocated_size : 64 : littleendian; - data_size : 64 : littleendian; - _ : 32; - _ : 32; - name_len : 8; - name_type_flags : 8; - name : name_len*16 : string } -> - - let name = ucs2_to_utf8 name name_len in - if !debug then - eprintf "filename: %s (size: %Ld bytes)\n" - name data_size - - | { _ } -> - if !debug then - eprintf "cannot parse AT_FILE_NAME\n%!"; - ); - | _ -> - if !debug then - eprintf "unknown resident attribute %lx\n%!" - attr_type - ); - - loop2 rest - - | { attr_type : 32 : littleendian; - attr_size : 32 : littleendian; - 1 : 8; (* non-resident attribute *) - pad : (Int32.to_int attr_size - 9) * 8 : bitstring; - rest : -1 : bitstring } -> - if !debug then - eprintf "cannot parse non-resident attr %lx\n%!" - attr_type; - loop2 rest - - | { _ } -> - if !debug then - eprintf "corrupt MFT attribute entry\n%!" - in - loop2 attrs; - - loop rest (* loop rest of MFT records *) - - (* Just assume that the end of the list of MFT records - * is marked by all zeroes. This seems to be the - * case, but not sure if it is generally true. - * XXX? - *) - | { 0x00000000_l : 32 } -> - () - ) in - let mft_records = loop bits in - - - - - - raise Not_found; + let mft = parse_mft dev mft_lcn mft_size in + + let ntfs = { + ntfs_dev = fs_dev; + ntfs_blocksize = blocksize; + ntfs_mft_lcn = mft_lcn; + ntfs_mft_size = mft_size; + ntfs_mft_records = mft + } in + + (* Query free space. I cannot find any metadata in the NTFS + * structures which records free space directly, so instead we + * need to read the $Bitmap::$Data (bitmap of allocated LCNs). + *) + let blocks_used, blocks_avail = parse_bitmap_freespace ntfs in + + (* Create a filesystem structure. *) + let fs = { + fs_cb = callbacks (); + fs_dev = fs_dev; + fs_blocksize = blocksize; + fs_blocks_total = number_of_sectors *^ bytes_per_sector /^ blocksize; + fs_is_swap = false; + fs_blocks_reserved = ~^0; (* XXX MFT, bitmap are "reserved" *) + fs_blocks_avail = blocks_avail; + fs_blocks_used = blocks_used; + fs_inodes_total = ~^0; (* XXX MFT records are like inodes *) + fs_inodes_reserved = ~^0; + fs_inodes_avail = ~^0; + fs_inodes_used = ~^0; + } in + + attach_private_data fs ntfs; + fs | { _ } -> raise Not_found (* Not an NTFS boot sector. *) +and parse_mft dev mft_lcn mft_size = + (* Read the whole of the MFT (which is an array of MFT records) ... *) + let bits = dev#read_bitstring mft_lcn mft_size in + + (* ... and turn the MFT into records. *) + let records = parse_mft_records bits in + records + +and parse_mft_records bits = + bitmatch bits with + | { "FILE" : 32 : string; + (* Assume 3 USAs starting at offset 0x30. XXX? *) + 0x30 : 16 : littleendian; + 0x03 : 16 : littleendian; + _ : 64; (* lsn *) + _ : 16; (* sequence_number *) + _ : 16; (* link_count *) + _ : 16; (* attrs_offset *) + _ : 16; (* MFT_RECORD_FLAGS *) + bytes_in_use : 32 : littleendian; + record_size : 32 : littleendian; + _ : 64; (* base_mft_record *) + _ : 16; (* next_attr_instance *) + _ : 16; (* reserved *) + _ : 32; (* mft_record_number *) + _ : 64; (* USN, 3 * USAs -- see above. *) + + (* The attributes. Subtract header size (0x30 bytes) + * and space for the USN/USAs (8 bytes). + *) + attrs : (Int32.to_int record_size - 0x30 - 8)*8 : bitstring; + + (* Subsequent MFT records: *) + rest : -1 : bitstring } -> + + if !debug then + eprintf "got an MFT record, now parsing attributes ...\n%!"; + + let mft_record = { + ntfs_filename = None; + ntfs_info = None; + ntfs_data = None + } in + let mft_record = parse_attrs attrs mft_record in + + mft_record :: parse_mft_records rest (* loop rest of MFT records *) + + (* Just assume that the end of the list of MFT records + * is marked by all zeroes. This seems to be the + * case, but not sure if it is generally true. + * XXX? + *) + | { 0x00000000_l : 32 } -> [] + + | { _ } -> [] + +and parse_attrs attrs mft_record = + (* Parse the MFT record attributes. *) + bitmatch attrs with + | { 0xFFFFFFFF_l : 32 : littleendian } -> (* AT_END *) + if !debug then + eprintf "found AT_END, end of attributes\n%!"; + mft_record + + | { attr_type : 32 : littleendian; + attr_size : 32 : littleendian; + 0 : 8; (* means attribute is resident *) + _ : 24*8 - 8 - 64 : bitstring; (* actually meaningful *) + attr : (Int32.to_int attr_size - 24) * 8 : bitstring; + rest : -1 : bitstring } -> + + let mft_record = parse_resident_attr attr_type attr mft_record in + parse_attrs rest mft_record + + | { attr_type : 32 : littleendian; + attr_size : 32 : littleendian; + 1 : 8; (* non-resident attribute *) + 0 : 8; (* name length, assume unnamed *) + _ : 16; (* name offset *) + _ : 16; (* flags *) + _ : 16; (* instance number *) + 0L : 64 : littleendian; (* lowest VCN, assume single extent *) + highest_vcn : 64 : littleendian; (* size in clusters - 1 *) + 0x40 : 16 : littleendian; (* mapping pairs offset *) + 0 : 8; (* assume not compressed *) + _ : 40 : bitstring; (* padding *) + allocated_size : 64 : littleendian; (* allocate size on disk *) + data_size : 64 : littleendian; (* byte size of the attribute *) + initialized_size : 64 : littleendian; + + (* Table of virtual clusters to logical clusters. *) + mapping_pairs : (Int32.to_int attr_size - 0x40) * 8 : bitstring; + + rest : -1 : bitstring } -> + + let data_size = Int63.of_int64 data_size in + + let mft_record = + parse_nonresident_attr attr_type highest_vcn + allocated_size data_size initialized_size + mapping_pairs mft_record in + + parse_attrs rest mft_record + + (* Not matched above, so we don't know how to parse this attribute, but + * there is still enough information to skip to the next one. + *) + | { attr_type : 32 : littleendian; + attr_size : 32 : littleendian; + _ : (Int32.to_int attr_size - 8) * 8 : bitstring; + rest : -1 : bitstring } -> + + if !debug then + eprintf "cannot parse MFT attribute entry, attr_type = %lx\n%!" + attr_type; + + parse_attrs rest mft_record + + (* Otherwise unparsable & unskippable attribute entry. *) + | { _ } -> + if !debug then + eprintf "corrupt MFT attribute entry\n%!"; + mft_record + +and parse_resident_attr attr_type attr mft_record = + match attr_type with + | 0x10_l -> (* AT_STANDARD_INFORMATION *) + (bitmatch attr with + | { creation_time : 64; + last_data_change_time : 64; + last_mft_change_time : 64; + last_access_time : 64 + (* other stuff follows, just ignore it *) } -> + + let info = { + ntfs_creation_time = creation_time; + ntfs_last_data_change_time = last_data_change_time; + ntfs_last_mft_change_time = last_mft_change_time; + ntfs_last_access_time = last_access_time + } in + { mft_record with ntfs_info = Some info } + + | { _ } -> + if !debug then + eprintf "cannot parse AT_STANDARD_INFORMATION\n%!"; + mft_record + ); + + | 0x30_l -> (* AT_FILE_NAME *) + (bitmatch attr with + | { _ : 64; (* parent directory ref *) + _ : 64; (* creation time *) + _ : 64; (* last change time *) + _ : 64; (* last MFT change time *) + _ : 64; (* last access time *) + _ : 64; (* allocated size *) + _ : 64; (* data size *) + _ : 32; + _ : 32; + name_len : 8; + name_type_flags : 8; + name : name_len*16 : string } -> + + let name = ucs2_to_utf8 name name_len in + let filename = { + ntfs_name = name + } in + { mft_record with ntfs_filename = Some filename } + + | { _ } -> + if !debug then + eprintf "cannot parse AT_FILE_NAME\n%!"; + mft_record + ); + + | _ -> (* unknown attribute - just ignore *) + if !debug then + eprintf "unknown resident attribute %lx\n%!" attr_type; + mft_record + +and parse_nonresident_attr attr_type highest_vcn + allocated_size data_size initialized_size + mapping_pairs mft_record = + match attr_type with + | 0x80_l -> (* AT_DATA, ie. the $Data stream *) + let lowest_vcn = ~^0 (* see assumption above *) in + let runlist = parse_runlist lowest_vcn ~^0 mapping_pairs in + if !debug then ( + eprintf "AT_DATA: runlist is:\n"; + List.iter ( + function + | ((vcn, deltavcn), Some lcn) -> + eprintf "\tVCNs %s..%s -> LCN %s\n" + (Int63.to_string vcn) (Int63.to_string (vcn +^ deltavcn -^ ~^1)) + (Int63.to_string lcn) + | ((vcn, deltavcn), None) -> + eprintf "\tVCNs %s..%s -> sparse hole\n" + (Int63.to_string vcn) (Int63.to_string (vcn +^ deltavcn -^ ~^1)) + ) runlist + ); + + let data = { + ntfs_data_size = data_size; + ntfs_runlist = runlist + } in + { mft_record with ntfs_data = Some data } + + | _ -> + if !debug then + eprintf "unknown non-resident attribute %lx\n%!" attr_type; + mft_record + +(* mapping_pairs is not straightforward and not documented well. See + * ntfsprogs libntfs/runlist.c:ntfs_mapping_pairs_decompress + *) +and parse_runlist vcn lcn bits = + bitmatch bits with + | { 0 : 8 } -> (* end of table *) + [] + + | { 0 : 4; + vcnlen : 4; + deltavcn : vcnlen * 8 : littleendian; + rest : -1 : bitstring + } when vcnlen >= 1 && vcnlen <= 4 -> + + let deltavcn = Int63.of_int64 deltavcn in + + (* This is a sparse file hole. *) + ((vcn, deltavcn), None) :: + parse_runlist (vcn +^ deltavcn) lcn rest + + | { (* Really these fields are signed, but we'll just limit it to + * sensible values in the when clause instead. + *) + lcnlen : 4; + vcnlen : 4; + deltavcn : vcnlen * 8 : littleendian; + deltalcn : lcnlen * 8 : littleendian; + rest : -1 : bitstring + } when (vcnlen >= 1 && vcnlen <= 4) && (lcnlen >= 1 || lcnlen <= 4) -> + + let deltavcn = Int63.of_int64 deltavcn in + let deltalcn = Int63.of_int64 deltalcn in (* XXX signed *) + + let lcn = lcn +^ deltalcn in + + ((vcn, deltavcn), Some lcn) :: + parse_runlist (vcn +^ deltavcn) lcn rest + + | { _ } -> + if !debug then ( + eprintf "unknown field in the runlist\n%!"; + Bitmatch.hexdump_bitstring Pervasives.stderr bits + ); + [] (* Poor man's little-endian UCS-2 to UTF-8 conversion. * XXX Should use Camomile. @@ -235,7 +457,145 @@ and ucs2_to_utf8 name len = done; outstr -and offset_is_free _ _ = false +(* Parse $Bitmap::$Data to get free/used. Returns (used, free) blocks. *) +and parse_bitmap_freespace ntfs = + (* Can throw Not_found - allow that to escape because we don't + * expect an NTFS filesystem without this magic file. + *) + let file = find_system_file ntfs "$Bitmap" in + + (* Count used/free bits. *) + let used = ref ~^0 and free = ref ~^0 in + iter_blocks ntfs file ( + fun lcn vcn data -> + for i = 0 to String.length data - 1 do + let c = Char.code data.[i] in + if c = 0 then (* common cases *) + free := !free +^ ~^8 + else if c = 0xff then + used := !used +^ ~^8 + else ( (* uncommon case: count the bits *) + let m = ref 0x80 in + while !m > 0 do + if c land !m <> 0 then + used := !used +^ ~^1 + else + free := !free +^ ~^1; + m := !m lsr 1 + done + ) + done + ); + (!used, !free) + +and find_system_file { ntfs_mft_records = mft_records } fname = + let rec loop = + function + | [] -> raise Not_found + | ({ ntfs_filename = Some { ntfs_name = name } } as file) :: _ + when name = fname -> + file + | _ :: rest -> loop rest + in + loop mft_records + +and iter_blocks { ntfs_blocksize = blocksize; ntfs_dev = dev } + { ntfs_data = data } f = + match data with + | None -> () (* No $Data attribute. *) + | Some { ntfs_data_size = data_size; ntfs_runlist = runlist } -> + let rec loop data_size = function + | [] -> () + + (* Run of vcnsize clusters. *) + | ((vcnstart, vcnsize), Some lcn) :: rest -> + let data_size = ref data_size in + let lcn = ref lcn in + let vcn = ref vcnstart in + let vcnsize = ref vcnsize in + while !vcnsize > ~^0 && !data_size > ~^0 do + let size = min blocksize !data_size in + let data = dev#read (!lcn *^ blocksize) size in + f (Some !lcn) !vcn data; + lcn := !lcn +^ ~^1; + vcn := !vcn +^ ~^1; + vcnsize := !vcnsize -^ ~^1; + data_size := !data_size -^ size + done; + loop !data_size rest + + (* Sparse hole. *) + | ((vcnstart, vcnsize), None) :: rest -> + let data_size = ref data_size in + let vcn = ref vcnstart in + let vcnsize = ref vcnsize in + while !vcnsize > ~^0 && !data_size > ~^0 do + let size = min blocksize !data_size in + let data = String.make (Int63.to_int size) '\000' in + f None !vcn data; + vcn := !vcn +^ ~^1; + vcnsize := !vcnsize -^ ~^1; + data_size := !data_size -^ size + done; + loop !data_size rest + in + loop data_size runlist + +(* This is a bit limited at the moment because it can only read from + * a contiguous part of the file. System files are usually contiguous + * so this is OK for us. + *) +and read_file { ntfs_blocksize = blocksize; ntfs_dev = dev } + { ntfs_data = data } offset size = + match data with + | None -> raise Not_found (* No $Data attribute. *) + | Some { ntfs_data_size = data_size; ntfs_runlist = runlist } -> + if offset < ~^0 || size < ~^0 || offset +^ size >= data_size then + invalid_arg "ntfs: read_file: tried to read outside file"; + + (* Get the first and last VCNs containing the data. *) + let vcn = offset /^ blocksize in + let vcnoffset = offset %^ blocksize in + let vcnend = (offset +^ size -^ ~^1) /^ blocksize in + + (* Find the run containing this VCN. *) + let rec find = function + | [] -> raise Not_found + | ((vcnstart, vcnsize), lcn) :: _ + when vcnstart <= vcn && vcn < vcnstart +^ vcnsize && + vcnstart <= vcnend && vcnend < vcnstart +^ vcnsize -> + lcn + | _ :: rest -> find rest + in + let lcn = find runlist in + + (* Read the LCNs. *) + let data = + match lcn with + | Some lcn -> dev#read (lcn *^ blocksize +^ vcnoffset) size + | None -> String.make (Int63.to_int size) '\000' (* sparse hole *) in + data + +(* This is easy: just look at the bitmap. *) +and offset_is_free fs offset = + try + let ntfs = get_private_data fs in + let blocksize = ntfs.ntfs_blocksize in + + (* Get the $Bitmap file. *) + let file = find_system_file ntfs "$Bitmap" in + + let lcn = offset /^ blocksize in + + (* Read the byte in the bitmap corresponding to this LCN. *) + let byteoffset = lcn >^> 3 and bitoffset = lcn &^ ~^7 in + let byte = read_file ntfs file byteoffset ~^1 in + let byte = Char.code byte.[0] in + let bit = Int63.of_int byte >^> (0x80 lsr Int63.to_int bitoffset) in + + bit <> ~^0 + with + Not_found -> false (* play it safe *) and callbacks = let i = ref 0 in