Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*)
-open Printf
-open ExtList
-open Unix
+include Diskimage_impl
-open Int63.Operators
-
-include Diskimage_utils
-
-(* Use as the natural block size for disk images, but really we should
- * use the 'blockdev -getbsz' command to find the real block size.
- *)
-let disk_block_size = ~^512
-
-(*----------------------------------------------------------------------*)
-(* The plug-ins. *)
-let partition_types = [
- Diskimage_mbr.plugin_id,
- ("MBR", Diskimage_mbr.probe);
-]
-
-let filesystem_types = [
- Diskimage_ext2.plugin_id,
- ("Linux ext2/3", Diskimage_ext2.probe);
- Diskimage_linux_swap.plugin_id,
- ("Linux swap", Diskimage_linux_swap.probe);
- Diskimage_linux_swsuspend.plugin_id,
- ("Linux s/w suspend", Diskimage_linux_swsuspend.probe);
-]
-
-let lvm_types = [
- Diskimage_lvm2.plugin_id,
- ("Linux LVM2", Diskimage_lvm2.probe, Diskimage_lvm2.list);
-]
-
-let name_of_parts id =
- let name, _ = List.assoc id partition_types in
- name
-let name_of_filesystem id =
- let name, _ = List.assoc id filesystem_types in
- name
-let name_of_lvm id =
- let name, _, _ = List.assoc id lvm_types in
- name
-
-(* Probe a device for partitions. Returns [Some parts] or [None]. *)
-let probe_for_partitions dev =
- if !debug then eprintf "probing for partitions on %s ...\n%!" dev#name;
- let rec loop = function
- | [] -> None
- | (parts_plugin_id, (_, probe_fn)) :: rest ->
- try Some (probe_fn dev)
- with Not_found -> loop rest
- in
- let r = loop partition_types in
- if !debug then (
- match r with
- | None -> eprintf "no partitions found on %s\n%!" dev#name
- | Some { parts_plugin_id = name; parts = parts } ->
- eprintf "found %d %s partitions on %s\n"
- (List.length parts) name dev#name
- );
- r
-
-(* Probe a device for a filesystem. Returns [Some fs] or [None]. *)
-let probe_for_filesystem dev =
- if !debug then eprintf "probing for a filesystem on %s ...\n%!" dev#name;
- let rec loop = function
- | [] -> None
- | (fs_name, (_, probe_fn)) :: rest ->
- try Some (probe_fn dev)
- with Not_found -> loop rest
- in
- let r = loop filesystem_types in
- if !debug then (
- match r with
- | None -> eprintf "no filesystem found on %s\n%!" dev#name
- | Some fs ->
- eprintf "found a filesystem on %s:\n" dev#name;
- eprintf "\t%s\n%!" fs.fs_plugin_id
- );
- r
-
-(* Probe a device for a PV. Returns [Some lvm_name] or [None]. *)
-let probe_for_pv dev =
- if !debug then eprintf "probing if %s is a PV ...\n%!" dev#name;
- let rec loop = function
- | [] -> None
- | (lvm_name, (_, probe_fn, _)) :: rest ->
- try Some (probe_fn lvm_name dev)
- with Not_found -> loop rest
- in
- let r = loop lvm_types in
- if !debug then (
- match r with
- | None -> eprintf "no PV found on %s\n%!" dev#name
- | Some { lvm_plugin_id = name } ->
- eprintf "%s contains a %s PV\n%!" dev#name name
- );
- r
-
-let list_lvs lvm_name devs =
- let _, _, list_lvs_fn = List.assoc lvm_name lvm_types in
- list_lvs_fn devs
-
-(*----------------------------------------------------------------------*)
-(* Create machine description. *)
-let open_machine name disks =
- let disks = List.map (
- fun (name, path) ->
- let dev = new block_device path disk_block_size (* XXX *) in
- { d_name = name; d_dev = dev; d_content = `Unknown }
- ) disks in
- { m_name = name; m_disks = disks; m_lv_filesystems = [] }
-
-let close_machine { m_disks = m_disks } =
- (* Only close the disks, assume all other devices are derived from them. *)
- List.iter (fun { d_dev = d_dev } -> d_dev#close ()) m_disks
-
-(* Main scanning function for filesystems. *)
-let scan_machine ({ m_disks = m_disks } as machine) =
- let m_disks = List.map (
- fun ({ d_dev = dev } as disk) ->
- let dev = (dev :> device) in
- (* See if it is partitioned first. *)
- let parts = probe_for_partitions dev in
- match parts with
- | Some parts ->
- { disk with d_content = `Partitions parts }
- | None ->
- (* Not partitioned. Does it contain a filesystem? *)
- let fs = probe_for_filesystem dev in
- match fs with
- | Some fs ->
- { disk with d_content = `Filesystem fs }
- | None ->
- (* Not partitioned, no filesystem, is it a PV? *)
- let pv = probe_for_pv dev in
- match pv with
- | Some lvm_name ->
- { disk with d_content = `PhysicalVolume lvm_name }
- | None ->
- disk (* Spare/unknown. *)
- ) m_disks in
-
- (* Now we have either detected partitions or a filesystem on each
- * physical device (or perhaps neither). See what is on those
- * partitions.
- *)
- let m_disks = List.map (
- function
- | ({ d_dev = dev; d_content = `Partitions parts } as disk) ->
- let ps = List.map (
- fun p ->
- if p.part_status = Bootable || p.part_status = Nonbootable then (
- let fs = probe_for_filesystem p.part_dev in
- match fs with
- | Some fs ->
- { p with part_content = `Filesystem fs }
- | None ->
- (* Is it a PV? *)
- let pv = probe_for_pv p.part_dev in
- match pv with
- | Some lvm_name ->
- { p with part_content = `PhysicalVolume lvm_name }
- | None ->
- p (* Spare/unknown. *)
- ) else p
- ) parts.parts in
- let parts = { parts with parts = ps } in
- { disk with d_content = `Partitions parts }
- | disk -> disk
- ) m_disks in
-
- (* LVM filesystem detection
- *
- * Look for all disks/partitions which have been identified as PVs
- * and pass those back to the respective LVM plugin for LV detection.
- *
- * (Note - a two-stage process because an LV can be spread over
- * several PVs, so we have to detect all PVs belonging to a
- * domain first).
- *
- * XXX To deal with RAID (ie. md devices) we will need to loop
- * around here because RAID is like LVM except that they normally
- * present as block devices which can be used by LVM.
- *)
- (* First: LV detection.
- * Find all physical volumes, can be disks or partitions.
- *)
- let pvs_on_disks = List.filter_map (
- function
- | { d_dev = d_dev;
- d_content = `PhysicalVolume pv } -> Some (pv, (d_dev :> device))
- | _ -> None
- ) m_disks in
- let pvs_on_partitions = List.map (
- function
- | { d_content = `Partitions { parts = parts } } ->
- List.filter_map (
- function
- | { part_dev = part_dev;
- part_content = `PhysicalVolume pv } ->
- Some (pv, part_dev)
- | _ -> None
- ) parts
- | _ -> []
- ) m_disks in
- let lvs = List.concat (pvs_on_disks :: pvs_on_partitions) in
-
- (* Second: filesystem on LV detection.
- * Group the LVs by plug-in type.
- *)
- let cmp (a,_) (b,_) = compare a b in
- let lvs = List.sort ~cmp lvs in
- let lvs = group_by lvs in
-
- let lvs =
- List.map (fun (pv, devs) -> list_lvs pv.lvm_plugin_id devs) lvs in
- let lvs = List.concat lvs in
-
- (* lvs is a list of potential LV devices. Now run them through the
- * probes to see if any contain filesystems.
- *)
- let filesystems =
- List.filter_map (
- fun ({ lv_dev = dev } as lv) ->
- match probe_for_filesystem dev with
- | Some fs -> Some (lv, fs)
- | None -> None
- ) lvs in
-
- { machine with
- m_disks = m_disks;
- m_lv_filesystems = filesystems }
-
-(*----------------------------------------------------------------------*)
-
-(* We describe the ownership of each part of the disk using a
- * segment tree. http://en.wikipedia.org/wiki/Segment_tree
- *
- * Note that each part can (and usually is) owned multiple times
- * (eg. by a filesystem and by the partition that the filesystem
- * lies inside). Also, the segment tree is effectively read-only.
- * We build it up as a final step given the flat list of segments
- * identified by the algorithm in 'iter_over_machine'.
+(* This just forces the plug-ins to get loaded when anyone references
+ * this library. You need one line for each plug-in.
*)
-
-(* General binary tree type. Data 'a is stored in the leaves and 'b
- * is stored in the nodes.
- *)
-type ('a,'b) binary_tree =
- | Leaf of 'a
- | Node of ('a,'b) binary_tree * 'b * ('a,'b) binary_tree
-
-(* This prints out the binary tree in graphviz dot format. *)
-let print_binary_tree leaf_printer node_printer tree =
- (* Assign a unique, fixed label to each node. *)
- let label =
- let i = ref 0 in
- let hash = Hashtbl.create 13 in
- fun node ->
- try Hashtbl.find hash node
- with Not_found ->
- let i = incr i; !i in
- let label = "n" ^ string_of_int i in
- Hashtbl.add hash node label;
- label
- in
- (* Recursively generate the graphviz file. *)
- let rec print = function
- | (Leaf a as leaf) ->
- eprintf " %s [shape=box, label=\"%s\"];\n"
- (label leaf) (leaf_printer a)
- | (Node (left,b,right) as node) ->
- eprintf " %s [label=\"%s\"];\n"
- (label node) (node_printer b);
- eprintf " %s -> %s [tailport=sw];\n" (label node) (label left);
- eprintf " %s -> %s [tailport=se];\n" (label node) (label right);
- print left;
- print right;
- in
- eprintf "/* Use 'dot -Tpng foo.dot > foo.png' to convert to a png file. */\n";
- eprintf "digraph G {\n";
- print tree;
- eprintf "}\n%!";
-
-type owner =
- [ `Filesystem of filesystem
- | `Partitions of partitions
- | `PhysicalVolume of pv ]
-
-(* A segment describes the owner of a range of disk addresses. *)
-type segment =
- int63 * int63 * (* disk offset, size of segment *)
- owner * (* owner *)
- int63 (* owner offset *)
-
-type interval = int63 * int63 (* start point, end point (bytes) *)
-
-(* The special segment tree structure that we construct in create_ownership. *)
-type segment_tree =
- (interval * segment list, interval * segment list) binary_tree
-
-type ownership =
- (device * (* block_device (disk) *)
- segment_tree) list (* segment tree for this disk *)
-
-(* List of owned segments before we build the segment tree. *)
-type ownership_list =
- (device * (* block_device (disk) *)
- segment) list
-
-(* Ownership tables. *)
-let create_ownership machine =
- (* Iterate over all the things which can claim ownership of a
- * disk block (filesystems, partitions, PVs).
- *)
- let rec iter_over_machine
- ({m_disks = disks; m_lv_filesystems = lv_filesystems} as machine) =
-
- (* No segments to begin with. *)
- let ownership = [] in
-
- (* Iterate over disks. *)
- let ownership =
- List.fold_left (
- fun ownership ->
- function
- | { d_content = (`Filesystem fs as owner) } ->
- iter_over_filesystem machine ownership fs owner
- | { d_content = (`Partitions parts as owner) } ->
- iter_over_partitions machine ownership parts owner
- | { d_content = (`PhysicalVolume pv as owner) } ->
- iter_over_pv machine ownership pv owner
- | { d_content = `Unknown } -> ownership
- ) ownership disks in
-
- (* Iterate over LV filesystems. *)
- let ownership =
- List.fold_left (
- fun ownership (lv, fs) ->
- let owner = `Filesystem fs in
- iter_over_filesystem machine ownership fs owner
- ) ownership lv_filesystems in
-
- ownership
-
- (* Iterate over the blocks in a single filesystem. *)
- and iter_over_filesystem machine ownership {fs_dev = dev} owner =
- iter_over_device machine ownership dev owner
-
- (* Iterate over the blocks in a set of partitions, then
- * iterate over the contents of the partitions.
- *)
- and iter_over_partitions machine ownership
- {parts = parts; parts_dev = parts_dev} owner =
- let ownership = iter_over_device machine ownership parts_dev owner in
-
- let ownership =
- List.fold_left (
- fun ownership ->
- function
- | { part_content = (`Filesystem fs as owner) } ->
- iter_over_filesystem machine ownership fs owner
- | { part_content = (`PhysicalVolume pv as owner) } ->
- iter_over_pv machine ownership pv owner
- | { part_content = `Unknown } -> ownership
- ) ownership parts in
-
- ownership
-
- (* Iterate over the blocks in a PV. *)
- and iter_over_pv machine ownership {pv_dev = dev} owner =
- iter_over_device machine ownership dev owner
-
- (* Iterate over the blocks in a device, assigning ownership to 'owner'
- *
- * In reality (1): There can be several owners for each block, so we
- * incrementally add ownership to the ownership_list (which eventually
- * will be turned into a segment tree).
- * In reality (2): Iterating over blocks would take ages and result
- * in a very inefficient ownership representation. Instead we look
- * at minimum contiguous extents.
- *)
- and iter_over_device { m_disks = disks } ownership dev owner =
- let size = dev#size in
- let disks = List.map (fun {d_dev = dev} -> (dev :> device)) disks in
-
- let rec loop ownership offset =
- if offset < size then (
- let devs, extent = get_next_extent disks dev offset in
- if devs = [] then
- eprintf "warning: no device found under %s\n"
- (string_of_owner owner);
- let ownership =
- List.fold_left (
- fun ownership (disk, disk_offset) ->
- let elem = disk, (disk_offset, extent, owner, offset) in
- elem :: ownership
- ) ownership devs in
- loop ownership (offset +^ extent)
- )
- else ownership
- in
- loop ownership ~^0
-
- (* Return the length of the next contiguous region in the device starting
- * at the given byte offset. Also return the underlying block device(s)
- * if there is one.
- *)
- and get_next_extent disks (dev : device) offset =
- let this_extent = dev#contiguous offset in
-
- (* If this disk is a block_device (a member of the 'disks' list)
- * then we've hit the bottom layer of devices, so just return it.
- *)
- if List.memq dev disks then
- [dev, offset], this_extent
- else (
- let blocksize = dev#blocksize in
- let block = offset /^ blocksize in
- let offset_in_block = offset -^ block *^ blocksize in
-
- (* Map from this block to the devices one layer down. *)
- let devs = dev#map_block block in
-
- (* Get the real device offsets, adding the offset from start of block. *)
- let devs =
- List.map
- (fun (dev, dev_offset) -> dev, dev_offset +^ offset_in_block)
- devs in
-
- let devs =
- List.map
- (fun (dev, dev_offset) ->
- get_next_extent disks dev dev_offset)
- devs in
-
- (* Work out the minimum contiguous extent from this offset. *)
- let devs, extent =
- let extents = List.map snd devs in
- let devs = List.concat (List.map fst devs) in
- let extent = List.fold_left min this_extent extents in
- devs, extent in
-
- devs, extent
- )
-
- and string_of_owner = function
- | `Filesystem {fs_plugin_id = fs_plugin_id; fs_dev = fs_dev} ->
- sprintf "%s(%s)" fs_dev#name fs_plugin_id
- | `PhysicalVolume { pv_uuid = pv_uuid } ->
- "PV:" ^ pv_uuid
- | `Partitions { parts_plugin_id = parts_plugin_id } ->
- parts_plugin_id
- in
-
- (* Build the list of segments. *)
- let ownership : ownership_list = iter_over_machine machine in
-
- (* Group the segments together by disk. *)
- let ownership =
- let ownership = List.sort ownership in
- group_by ownership in
-
- (* If debugging, print the segments that we found. *)
- if !debug then (
- List.iter (
- fun (disk, segments) ->
- eprintf "ownership segment list of %s %s:\n" machine.m_name disk#name;
- List.iter (
- fun (disk_offset, size, owner, owner_offset) ->
- let blocksize = disk#blocksize in
- let disk_offset_in_blocks, disk_offset_in_block =
- disk_offset /^ blocksize, disk_offset %^ blocksize in
- let size_in_blocks, size_in_block =
- size /^ blocksize, size %^ blocksize in
-
- eprintf " %s[%s:%s] %s[%s:%s] %s@%s\n"
- (Int63.to_string disk_offset)
- (Int63.to_string disk_offset_in_blocks)
- (Int63.to_string disk_offset_in_block)
- (Int63.to_string size)
- (Int63.to_string size_in_blocks)
- (Int63.to_string size_in_block)
- (string_of_owner owner)
- (Int63.to_string owner_offset)
- ) segments
- ) ownership
- );
-
- (* Build the segment tree from the ownership list (of segments).
- * For an explanation of this process see:
- * http://en.wikipedia.org/wiki/Segment_tree
- *)
- let ownership =
- List.map (
- fun (disk, segments) ->
- (* Construct the list of distinct endpoints. *)
- let eps =
- List.map
- (fun (start, size, _, _) -> [start; start +^ size])
- segments in
- let eps = sort_uniq (List.concat eps) in
-
- (* Construct the elementary intervals. *)
- let elints =
- let elints, lastpoint =
- List.fold_left (
- fun (elints, prevpoint) point ->
- ((point, point) :: (prevpoint, point) :: elints), point
- ) ([], Int63.min_int) eps in
- let elints = (lastpoint, Int63.max_int) :: elints in
- List.rev elints in
-
- if !debug then (
- eprintf "elementary intervals for %s (%d in total):\n"
- disk#name (List.length elints);
- List.iter (
- fun (startpoint, endpoint) ->
- eprintf " %s %s\n"
- (Int63.to_string startpoint) (Int63.to_string endpoint)
- ) elints
- );
-
- (* Construct the binary tree of elementary intervals. *)
- let tree =
- (* Each elementary interval becomes a leaf. *)
- let elints = List.map (fun elint -> Leaf elint) elints in
- (* Recursively build this into a binary tree. *)
- let rec make_layer = function
- | [] -> []
- | ([_] as x) -> x
- (* Turn pairs of leaves at the bottom level into nodes. *)
- | (Leaf _ as a) :: (Leaf _ as b) :: xs ->
- let xs = make_layer xs in
- Node (a, (), b) :: xs
- (* Turn pairs of nodes at higher levels into nodes. *)
- | (Node _ as left) :: ((Node _|Leaf _) as right) :: xs ->
- let xs = make_layer xs in
- Node (left, (), right) :: xs
- | Leaf _ :: _ -> assert false (* never happens??? (I think) *)
- in
- let rec loop = function
- | [] -> assert false
- | [x] -> x
- | xs -> loop (make_layer xs)
- in
- loop elints in
-
- if !debug then (
- let leaf_printer (startpoint, endpoint) =
- sprintf "%s-%s"
- (Int63.to_string startpoint) (Int63.to_string endpoint)
- in
- let node_printer () = "" in
- print_binary_tree leaf_printer node_printer tree
- );
-
- (* Insert the segments into the tree one by one. *)
- let tree =
- (* For each node/leaf in the tree, add its interval and an
- * empty list which will be used to store the segments.
- *)
- let rec interval_tree = function
- | Leaf elint -> Leaf (elint, [])
- | Node (left, (), right) ->
- let left = interval_tree left in
- let right = interval_tree right in
- let (leftstart, _) = interval_of_node left in
- let (_, rightend) = interval_of_node right in
- let interval = leftstart, rightend in
- Node (left, (interval, []), right)
- and interval_of_node = function
- | Leaf (elint, _) -> elint
- | Node (_, (interval, _), _) -> interval
- in
-
- let tree = interval_tree tree in
- (* This should always be true: *)
- assert (interval_of_node tree = (Int63.min_int, Int63.max_int));
-
- (* "Contained in" operator.
- * 'a <-< b' iff 'a' is a subinterval of 'b'.
- * |<---- a ---->|
- * |<----------- b ----------->|
- *)
- let (<-<) (a1, a2) (b1, b2) = b1 <= a1 && a2 <= b2 in
-
- (* "Intersects" operator.
- * 'a /\ b' iff intervals 'a' and 'b' overlap, eg:
- * |<---- a ---->|
- * |<----------- b ----------->|
- *)
- let ( /\ ) (a1, a2) (b1, b2) = a2 > b1 || b2 > a1 in
-
- let rec insert_segment tree segment =
- let start, size, owner, owner_offset = segment in
- let seginterval = start, start +^ size in
- eprintf "inserting (%s,%s) ...\n"
- (Int63.to_string (fst seginterval))
- (Int63.to_string (snd seginterval));
- match tree with
- (* Test if we should insert into this leaf or node: *)
- | Leaf (interval, segs) when interval <-< seginterval ->
- Leaf (interval, segment :: segs)
- | Node (left, (interval, segs), right)
- when interval <-< seginterval ->
- Node (left, (interval, segment :: segs), right)
-
- | (Leaf _) as leaf -> leaf
-
- (* Else, should we insert into left or right subtrees? *)
- | Node (left, i, right) ->
- let left =
- if seginterval /\ interval_of_node left then
- insert_segment left segment
- else
- left in
- let right =
- if seginterval /\ interval_of_node right then
- insert_segment right segment
- else
- right in
- Node (left, i, right)
- in
- let tree = List.fold_left insert_segment tree segments in
- tree in
-
- if !debug then (
- let printer ((sp, ep), segments) =
- sprintf "[%s-%s] " (Int63.to_string sp) (Int63.to_string ep) ^
- String.concat ";"
- (List.map (fun (_, _, owner,_) -> string_of_owner owner)
- segments)
- in
- print_binary_tree printer printer tree
- );
- (disk, tree)
- ) ownership in
-
- (* Return the ownership structure. *)
- ownership
-
-let get_owners_lookup machine ownership (disk : block_device) =
- (* Get the correct tree. *)
- let tree = List.assoc (disk :> device) ownership in
-
- fun offset ->
- let rec query = function
- | Leaf (_, segments) -> segments
- | Node ((Leaf ((_, leftend), _) | Node (_, ((_, leftend), _), _) as left),
- (_, segments),
- right) ->
- let subsegments =
- if offset < leftend then query left else query right in
- segments @ subsegments
- in
- let owners = query tree in
-
- List.map (
- fun (_, _, owner, owner_offset) -> (owner, offset -^ owner_offset)
- ) owners
+let _ = Diskimage_ext2.id
+let _ = Diskimage_linux_swap.id
+let _ = Diskimage_linux_swsuspend.id
+let _ = Diskimage_fat.id
+let _ = Diskimage_ntfs.id
+let _ = Diskimage_lvm2.id
+let _ = Diskimage_mbr.id