(* 'df' command for virtual domains. (C) Copyright 2007 Richard W.M. Jones, Red Hat Inc. http://libvirt.org/ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. Support for NTFS. *) open Unix open Printf open Diskimage_impl open Int63.Operators let id = "ntfs" (* Type of the private data, basically all the metadata that we * read from the NTFS volume. *) type ntfs_fs = { ntfs_dev : device; (* Device. *) ntfs_blocksize : int63; (* Blocksize (cluster size) *) ntfs_mft_lcn : int63; (* MFT location (bytes) *) ntfs_mft_size : int63; (* MFT size (bytes) *) ntfs_mft_records : ntfs_mft_record list; (* Files in MFT *) } and ntfs_mft_record = { ntfs_filename : ntfs_filename option; (* Filename, if present. *) ntfs_info : ntfs_info option; (* Standard information, if present. *) ntfs_data : ntfs_data option; (* $Data stream, if present. *) } and ntfs_filename = { ntfs_name : string; (* Filename (UTF-8 encoded). *) } and ntfs_info = { ntfs_creation_time : int64; ntfs_last_data_change_time : int64; ntfs_last_mft_change_time : int64; ntfs_last_access_time : int64; } and ntfs_data = { ntfs_data_size : int63; (* Actual size of data. *) ntfs_runlist : ntfs_runentry list; (* Runlist. *) } and ntfs_runentry = (* VCN start,size => LCN / None if sparse hole *) (int63 * int63) * int63 option (* Private data functions. *) let attach_private_data, get_private_data = private_data_functions (fun {fs_cb = {fs_cb_uq = u}} -> u) (* Probe for an NTFS filesystem on this device. *) let rec probe dev = let fs = probe_superblock dev in fs and probe_superblock dev = (* Load the boot sector / superblock. *) let bits = dev#read_bitstring ~^0 ~^512 in (* Most of this data comes from ntfsprogs' layout.h header file. *) bitmatch bits with | { _ : 24; (* Jump to boot up code. *) "NTFS " : 64 : string; (* NTFS OEM ID (magic). *) bytes_per_sector : 16 : littleendian; sectors_per_cluster : 8 : littleendian; _ : 16; (* Reserved sectors - unused. *) _ : 8; (* FATs - unused. *) _ : 16; (* Root entries - unused. *) _ : 16; (* Sectors - unused. *) _ : 8; (* Media type, probably 'f8' = HDD *) _ : 16; (* Sectors per FAT - unused. *) _ : 16; (* Sectors per track. *) _ : 16; (* Heads. *) _ : 32; (* Hidden sectors. *) _ : 32; (* Large sectors. *) _ : 8; (* Physical drive, 0 = FDD, 0x80 = HDD*) _ : 8; (* Current head. *) _ : 8; (* Extended boot signature. *) _ : 8; (* Reserved. *) number_of_sectors : 64 : littleendian; mft_lcn : 64 : littleendian; (* MFT location in clusters. *) mftmirr_lcn : 64 : littleendian; (* MFT mirror location. *) clusters_per_mft_record : 8; _ : 24; clusters_per_index_record : 8; _ : 24; volume_serial_number : 64 : littleendian; checksum : 32 : littleendian; (* Boot sector checksum. *) code : 8 * 426 : bitstring; (* Boot code. *) 0x55AA : 16 } -> (* End of bootsector magic. *) let blocksize = bytes_per_sector * sectors_per_cluster in if !debug then eprintf "%s: NTFS boot sector with blocksize = %d, serial = %Lx\n%!" dev#name blocksize volume_serial_number; let blocksize = Int63.of_int blocksize in let number_of_sectors = Int63.of_int64 number_of_sectors in (* The blocksize of the filesystem is likely to be quite different * from that of the underlying device, so create an overlay device * with the natural filesystem blocksize. *) let fs_dev = new blocksize_overlay blocksize dev in (* Get the location and size of the Master File Table. *) let mft_lcn = Int63.of_int64 mft_lcn *^ blocksize in let mft_size = Int63.of_int clusters_per_mft_record *^ blocksize in let mft = parse_mft dev mft_lcn mft_size in let ntfs = { ntfs_dev = fs_dev; ntfs_blocksize = blocksize; ntfs_mft_lcn = mft_lcn; ntfs_mft_size = mft_size; ntfs_mft_records = mft } in (* Query free space. I cannot find any metadata in the NTFS * structures which records free space directly, so instead we * need to read the $Bitmap::$Data (bitmap of allocated LCNs). *) let blocks_used, blocks_avail = parse_bitmap_freespace ntfs in (* Create a filesystem structure. *) let fs = { fs_cb = callbacks (); fs_dev = fs_dev; fs_blocksize = blocksize; fs_blocks_total = number_of_sectors *^ bytes_per_sector /^ blocksize; fs_is_swap = false; fs_blocks_reserved = ~^0; (* XXX MFT, bitmap are "reserved" *) fs_blocks_avail = blocks_avail; fs_blocks_used = blocks_used; fs_inodes_total = ~^0; (* XXX MFT records are like inodes *) fs_inodes_reserved = ~^0; fs_inodes_avail = ~^0; fs_inodes_used = ~^0; } in attach_private_data fs ntfs; fs | { _ } -> raise Not_found (* Not an NTFS boot sector. *) and parse_mft dev mft_lcn mft_size = (* Read the whole of the MFT (which is an array of MFT records) ... *) let bits = dev#read_bitstring mft_lcn mft_size in (* ... and turn the MFT into records. *) let records = parse_mft_records bits in records and parse_mft_records bits = bitmatch bits with | { "FILE" : 32 : string; (* Assume 3 USAs starting at offset 0x30. XXX? *) 0x30 : 16 : littleendian; 0x03 : 16 : littleendian; _ : 64; (* lsn *) _ : 16; (* sequence_number *) _ : 16; (* link_count *) _ : 16; (* attrs_offset *) _ : 16; (* MFT_RECORD_FLAGS *) bytes_in_use : 32 : littleendian; record_size : 32 : littleendian; _ : 64; (* base_mft_record *) _ : 16; (* next_attr_instance *) _ : 16; (* reserved *) _ : 32; (* mft_record_number *) _ : 64; (* USN, 3 * USAs -- see above. *) (* The attributes. Subtract header size (0x30 bytes) * and space for the USN/USAs (8 bytes). *) attrs : (Int32.to_int record_size - 0x30 - 8)*8 : bitstring; (* Subsequent MFT records: *) rest : -1 : bitstring } -> if !debug then eprintf "got an MFT record, now parsing attributes ...\n%!"; let mft_record = { ntfs_filename = None; ntfs_info = None; ntfs_data = None } in let mft_record = parse_attrs attrs mft_record in mft_record :: parse_mft_records rest (* loop rest of MFT records *) (* Just assume that the end of the list of MFT records * is marked by all zeroes. This seems to be the * case, but not sure if it is generally true. * XXX? *) | { 0x00000000_l : 32 } -> [] | { _ } -> [] and parse_attrs attrs mft_record = (* Parse the MFT record attributes. *) bitmatch attrs with | { 0xFFFFFFFF_l : 32 : littleendian } -> (* AT_END *) if !debug then eprintf "found AT_END, end of attributes\n%!"; mft_record | { attr_type : 32 : littleendian; attr_size : 32 : littleendian; 0 : 8; (* means attribute is resident *) pad : 24*8 - 8 - 64 : bitstring; (* actually meaningful *) attr : (Int32.to_int attr_size - 24) * 8 : bitstring; rest : -1 : bitstring } -> let mft_record = parse_resident_attr attr_type attr mft_record in parse_attrs rest mft_record | { attr_type : 32 : littleendian; attr_size : 32 : littleendian; 1 : 8; (* non-resident attribute *) 0 : 8; (* name length, assume unnamed *) _ : 16; (* name offset *) _ : 16; (* flags *) _ : 16; (* instance number *) 0L : 64 : littleendian; (* lowest VCN, assume single extent *) highest_vcn : 64 : littleendian; (* size in clusters - 1 *) 0x40 : 16 : littleendian; (* mapping pairs offset *) 0 : 8; (* assume not compressed *) pad : 40 : bitstring; (* padding *) allocated_size : 64 : littleendian; (* allocate size on disk *) data_size : 64 : littleendian; (* byte size of the attribute *) initialized_size : 64 : littleendian; (* Table of virtual clusters to logical clusters. *) mapping_pairs : (Int32.to_int attr_size - 0x40) * 8 : bitstring; rest : -1 : bitstring } -> let data_size = Int63.of_int64 data_size in let mft_record = parse_nonresident_attr attr_type highest_vcn allocated_size data_size initialized_size mapping_pairs mft_record in parse_attrs rest mft_record (* Not matched above, so we don't know how to parse this attribute, but * there is still enough information to skip to the next one. *) | { attr_type : 32 : littleendian; attr_size : 32 : littleendian; pad : (Int32.to_int attr_size - 8) * 8 : bitstring; rest : -1 : bitstring } -> if !debug then eprintf "cannot parse MFT attribute entry, attr_type = %lx\n%!" attr_type; parse_attrs rest mft_record (* Otherwise unparsable & unskippable attribute entry. *) | { _ } -> if !debug then eprintf "corrupt MFT attribute entry\n%!"; mft_record and parse_resident_attr attr_type attr mft_record = match attr_type with | 0x10_l -> (* AT_STANDARD_INFORMATION *) (bitmatch attr with | { creation_time : 64; last_data_change_time : 64; last_mft_change_time : 64; last_access_time : 64 (* other stuff follows, just ignore it *) } -> let info = { ntfs_creation_time = creation_time; ntfs_last_data_change_time = last_data_change_time; ntfs_last_mft_change_time = last_mft_change_time; ntfs_last_access_time = last_access_time } in { mft_record with ntfs_info = Some info } | { _ } -> if !debug then eprintf "cannot parse AT_STANDARD_INFORMATION\n%!"; mft_record ); | 0x30_l -> (* AT_FILE_NAME *) (bitmatch attr with | { _ : 64; (* parent directory ref *) _ : 64; (* creation time *) _ : 64; (* last change time *) _ : 64; (* last MFT change time *) _ : 64; (* last access time *) _ : 64; (* allocated size *) _ : 64; (* data size *) _ : 32; _ : 32; name_len : 8; name_type_flags : 8; name : name_len*16 : string } -> let name = ucs2_to_utf8 name name_len in let filename = { ntfs_name = name } in { mft_record with ntfs_filename = Some filename } | { _ } -> if !debug then eprintf "cannot parse AT_FILE_NAME\n%!"; mft_record ); | _ -> (* unknown attribute - just ignore *) if !debug then eprintf "unknown resident attribute %lx\n%!" attr_type; mft_record and parse_nonresident_attr attr_type highest_vcn allocated_size data_size initialized_size mapping_pairs mft_record = match attr_type with | 0x80_l -> (* AT_DATA, ie. the $Data stream *) let lowest_vcn = ~^0 (* see assumption above *) in let runlist = parse_runlist lowest_vcn ~^0 mapping_pairs in if !debug then ( eprintf "AT_DATA: runlist is:\n"; List.iter ( function | ((vcn, deltavcn), Some lcn) -> eprintf "\tVCNs %s..%s -> LCN %s\n" (Int63.to_string vcn) (Int63.to_string (vcn +^ deltavcn -^ ~^1)) (Int63.to_string lcn) | ((vcn, deltavcn), None) -> eprintf "\tVCNs %s..%s -> sparse hole\n" (Int63.to_string vcn) (Int63.to_string (vcn +^ deltavcn -^ ~^1)) ) runlist ); let data = { ntfs_data_size = data_size; ntfs_runlist = runlist } in { mft_record with ntfs_data = Some data } | _ -> if !debug then eprintf "unknown non-resident attribute %lx\n%!" attr_type; mft_record (* mapping_pairs is not straightforward and not documented well. See * ntfsprogs libntfs/runlist.c:ntfs_mapping_pairs_decompress *) and parse_runlist vcn lcn bits = bitmatch bits with | { 0 : 8 } -> (* end of table *) [] | { 0 : 4; vcnlen : 4; deltavcn : vcnlen * 8 : littleendian; rest : -1 : bitstring } when vcnlen >= 1 && vcnlen <= 4 -> let deltavcn = Int63.of_int64 deltavcn in (* This is a sparse file hole. *) ((vcn, deltavcn), None) :: parse_runlist (vcn +^ deltavcn) lcn rest | { (* Really these fields are signed, but we'll just limit it to * sensible values in the when clause instead. *) lcnlen : 4; vcnlen : 4; deltavcn : vcnlen * 8 : littleendian; deltalcn : lcnlen * 8 : littleendian; rest : -1 : bitstring } when (vcnlen >= 1 && vcnlen <= 4) && (lcnlen >= 1 || lcnlen <= 4) -> let deltavcn = Int63.of_int64 deltavcn in let deltalcn = Int63.of_int64 deltalcn in (* XXX signed *) let lcn = lcn +^ deltalcn in ((vcn, deltavcn), Some lcn) :: parse_runlist (vcn +^ deltavcn) lcn rest | { _ } -> if !debug then ( eprintf "unknown field in the runlist\n%!"; Bitmatch.hexdump_bitstring Pervasives.stderr bits ); [] (* Poor man's little-endian UCS-2 to UTF-8 conversion. * XXX Should use Camomile. *) and ucs2_to_utf8 name len = (* Calculate length of final string. *) let outlen = ref 0 in let j = ref 0 in for i = 0 to len-1 do let j' = !j in j := j' + 2; let c0 = Char.code name.[j'] and c1 = Char.code name.[j'+1] in let c = c0 + c1 * 256 in if c < 128 then incr outlen else if c < 0x800 then outlen := !outlen + 2 else outlen := !outlen + 3 done; let outstr = String.create !outlen in j := 0; outlen := 0; for i = 0 to len-1 do let j' = !j in j := j' + 2; let c0 = Char.code name.[j'] and c1 = Char.code name.[j'+1] in let c = c0 + c1 * 256 in if c < 128 then ( outstr.[!outlen] <- Char.chr c; incr outlen ) else if c < 0x800 then ( outstr.[!outlen] <- Char.chr (0b11000000 lor (c lsr 6)); outstr.[!outlen+1] <- Char.chr (0b10000000 lor (c land 0b00111111)); outlen := !outlen + 2 ) else ( outstr.[!outlen] <- Char.chr (0b11100000 lor (c lsr 12)); outstr.[!outlen+1] <- Char.chr (0b10000000 lor ((c lsr 6) lor 0b00111111)); outstr.[!outlen+2] <- Char.chr (0b10000000 lor (c land 0b00111111)); outlen := !outlen + 3 ) done; outstr (* Parse $Bitmap::$Data to get free/used. Returns (used, free) blocks. *) and parse_bitmap_freespace ntfs = (* Can throw Not_found - allow that to escape because we don't * expect an NTFS filesystem without this magic file. *) let file = find_system_file ntfs "$Bitmap" in (* Count used/free bits. *) let used = ref ~^0 and free = ref ~^0 in iter_blocks ntfs file ( fun lcn vcn data -> for i = 0 to String.length data - 1 do let c = Char.code data.[i] in if c = 0 then (* common cases *) free := !free +^ ~^8 else if c = 0xff then used := !used +^ ~^8 else ( (* uncommon case: count the bits *) let m = ref 0x80 in while !m > 0 do if c land !m <> 0 then used := !used +^ ~^1 else free := !free +^ ~^1; m := !m lsr 1 done ) done ); (!used, !free) and find_system_file { ntfs_mft_records = mft_records } fname = let rec loop = function | [] -> raise Not_found | ({ ntfs_filename = Some { ntfs_name = name } } as file) :: _ when name = fname -> file | _ :: rest -> loop rest in loop mft_records and iter_blocks { ntfs_blocksize = blocksize; ntfs_dev = dev } { ntfs_data = data } f = match data with | None -> () (* No $Data attribute. *) | Some { ntfs_data_size = data_size; ntfs_runlist = runlist } -> let rec loop data_size = function | [] -> () (* Run of vcnsize clusters. *) | ((vcnstart, vcnsize), Some lcn) :: rest -> let data_size = ref data_size in let lcn = ref lcn in let vcn = ref vcnstart in let vcnsize = ref vcnsize in while !vcnsize > ~^0 && !data_size > ~^0 do let size = min blocksize !data_size in let data = dev#read (!lcn *^ blocksize) size in f (Some !lcn) !vcn data; lcn := !lcn +^ ~^1; vcn := !vcn +^ ~^1; vcnsize := !vcnsize -^ ~^1; data_size := !data_size -^ size done; loop !data_size rest (* Sparse hole. *) | ((vcnstart, vcnsize), None) :: rest -> let data_size = ref data_size in let vcn = ref vcnstart in let vcnsize = ref vcnsize in while !vcnsize > ~^0 && !data_size > ~^0 do let size = min blocksize !data_size in let data = String.make size '\000' in f None !vcn data; vcn := !vcn +^ ~^1; vcnsize := !vcnsize -^ ~^1; data_size := !data_size -^ size done; loop !data_size rest in loop data_size runlist and offset_is_free _ _ = false and callbacks = let i = ref 0 in fun () -> { fs_cb_uq = (incr i; !i); fs_cb_name = id; fs_cb_printable_name = "Windows NTFS"; fs_cb_offset_is_free = offset_is_free; } (* Register the plugin. *) let () = register_plugin ~filesystem:probe id