(* Diskimage library for reading disk images. (C) Copyright 2007-2008 Richard W.M. Jones, Red Hat Inc. http://libvirt.org/ This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version, with the OCaml linking exception described in ../COPYING.LIB. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *) open ExtList open Printf open Unix open Int63.Operators let debug = ref false (* Use as the natural block size for disk images, but really we should * use the 'blockdev -getbsz' command to find the real block size. *) let disk_block_size = ~^512 class virtual device = object (self) method virtual size : int63 method virtual name : string method virtual blocksize : int63 method virtual map_block : int63 -> (device * int63) list method virtual contiguous : Int63.t -> Int63.t (* Block-based read. Inefficient so normally overridden in subclasses. *) method read offset len = if offset < ~^0 || len < ~^0 then invalid_arg "device: read: negative offset or length"; let blocksize = self#blocksize in (* Break the request into blocks. * Find the first and last blocks of this request. *) let first_blk = offset /^ blocksize in let offset_in_first_blk = offset -^ first_blk *^ blocksize in let last_blk = (offset +^ len -^ ~^1) /^ blocksize in (* Buffer for the result. *) let buf = Buffer.create (Int63.to_int len) in let not_mapped_error () = invalid_arg "device: read: block not mapped" in (* Copy the first block (partial). *) (match self#map_block first_blk with | [] -> not_mapped_error () | (dev, base) :: _ -> let len = min len (blocksize -^ offset_in_first_blk) in let str = dev#read (base +^ offset_in_first_blk) len in Buffer.add_string buf str ); (* Copy the middle blocks. *) let rec loop blk = if blk < last_blk then ( (match self#map_block blk with | [] -> not_mapped_error () | (dev, base) :: _ -> let str = dev#read ~^0 self#blocksize in Buffer.add_string buf str ); loop (Int63.succ blk) ) in loop (Int63.succ first_blk); (* Copy the last block (partial). *) if first_blk < last_blk then ( match self#map_block last_blk with | [] -> not_mapped_error () | (dev, base) :: _ -> let len = (offset +^ len) -^ last_blk *^ blocksize in let str = dev#read ~^0 len in Buffer.add_string buf str ); assert (Int63.to_int len = Buffer.length buf); Buffer.contents buf (* Helper method to read a chunk of data into a bitstring. *) method read_bitstring offset len = let str = self#read offset len in (str, 0, String.length str lsl 3) end (* A concrete device which just direct-maps a file or /dev device. *) class block_device filename blocksize = let fd = openfile filename [ O_RDONLY ] 0 in let size = Int63.of_int64 (LargeFile.fstat fd).LargeFile.st_size in object (self) inherit device method read offset len = let offset = Int63.to_int64 offset in let len = Int63.to_int len in ignore (LargeFile.lseek fd offset SEEK_SET); let str = String.make len '\000' in ignore (read fd str 0 len); str method size = size method name = filename method blocksize = blocksize method map_block _ = [] method contiguous offset = size -^ offset method close () = close fd end (* A linear offset/size from an underlying device. *) class offset_device name start size blocksize (dev : device) = object inherit device method name = name method size = size method read offset len = if offset < ~^0 || len < ~^0 || offset +^ len > size then invalid_arg ( sprintf "%s: tried to read outside device boundaries (%s/%s/%s)" name (Int63.to_string offset) (Int63.to_string len) (Int63.to_string size) ); dev#read (start+^offset) len method blocksize = blocksize method map_block i = [dev, i *^ blocksize +^ start] method contiguous offset = size -^ offset end (* A device with just a modified block size. *) class blocksize_overlay new_blocksize (dev : device) = object inherit device method name = dev#name method size = dev#size method read = dev#read method blocksize = new_blocksize method map_block new_blk = let orig_blk = new_blk *^ new_blocksize /^ dev#blocksize in dev#map_block orig_blk method contiguous offset = dev#size -^ offset end (* The null device. Any attempt to read generates an error. *) let null_device : device = object inherit device method read _ _ = assert false method size = ~^0 method name = "null" method blocksize = ~^1 method map_block _ = assert false method contiguous _ = ~^0 end type machine = { m_name : string; (* Machine name. *) m_disks : disk list; (* Machine disks. *) m_lv_filesystems : (lv * filesystem) list; (* Machine LV filesystems. *) } and disk = { d_name : string; (* Device name (eg "hda") *) (* About the device itself. *) d_dev : block_device; (* Disk device. *) d_content : disk_content; (* What's on it. *) } and disk_content = [ `Unknown (* Not probed or unknown. *) | `Partitions of partitions (* Contains partitions. *) | `Filesystem of filesystem (* Contains a filesystem directly. *) | `PhysicalVolume of pv (* Contains an LVM PV. *) ] (* Partitions. *) and partitions = { parts_cb : partitioner_callbacks; (* Partitioning scheme. *) parts_dev : device; (* Partitions (whole) device. *) parts : partition list (* Partitions. *) } and partition = { part_status : partition_status; (* Bootable, etc. *) part_type : int; (* Partition filesystem type. *) part_dev : device; (* Partition device. *) part_content : partition_content; (* What's on it. *) } and partition_status = Bootable | Nonbootable | Malformed | NullEntry and partition_content = [ `Unknown (* Not probed or unknown. *) | `Filesystem of filesystem (* Filesystem. *) | `PhysicalVolume of pv (* Contains an LVM PV. *) ] (* Filesystems (also swap devices). *) and filesystem = { fs_cb : filesystem_callbacks; (* Filesystem. *) fs_dev : device; (* Device containing the filesystem. *) fs_blocksize : int63; (* Block size (bytes). *) fs_blocks_total : int63; (* Total blocks. *) fs_is_swap : bool; (* If swap, following not valid. *) fs_blocks_reserved : int63; (* Blocks reserved for super-user. *) fs_blocks_avail : int63; (* Blocks free (available). *) fs_blocks_used : int63; (* Blocks in use. *) fs_inodes_total : int63; (* Total inodes. *) fs_inodes_reserved : int63; (* Inodes reserved for super-user. *) fs_inodes_avail : int63; (* Inodes free (available). *) fs_inodes_used : int63; (* Inodes in use. *) } (* Physical volumes. *) and pv = { pv_cb : lvm_callbacks; (* The LVM plug-in. *) pv_dev : device; (* Device covering whole PV. *) pv_uuid : string; (* UUID. *) } (* Logical volumes. *) and lv = { lv_dev : device; (* Logical volume device. *) } (* Tables of callbacks. *) and partitioner_probe = device -> partitions and partitioner_callbacks = { parts_cb_uq : int; parts_cb_name : string; parts_cb_offset_is_free : partitions -> Int63.t -> bool; } and filesystem_probe = device -> filesystem and filesystem_callbacks = { fs_cb_uq : int; fs_cb_name : string; fs_cb_printable_name : string; fs_cb_offset_is_free : filesystem -> Int63.t -> bool; } and lvm_probe = device -> pv and lvm_callbacks = { lvm_cb_uq : int; lvm_cb_name : string; lvm_cb_list_lvs : pv list -> lv list; lvm_cb_offset_is_free : pv -> Int63.t -> bool; } let name_of_filesystem { fs_cb = { fs_cb_printable_name = name } } = name (*----------------------------------------------------------------------*) (* Helper functions. *) (* Convert a UUID (containing '-' chars) to canonical form. *) let canonical_uuid uuid = let uuid' = String.make 32 ' ' in let j = ref 0 in for i = 0 to String.length uuid - 1 do if !j >= 32 then invalid_arg "canonical_uuid"; let c = uuid.[i] in if c <> '-' then ( uuid'.[!j] <- c; incr j ) done; if !j <> 32 then invalid_arg "canonical_uuid"; uuid' (* This version by Isaac Trotts. *) let group_by ?(cmp = Pervasives.compare) ls = let ls' = List.fold_left (fun acc (day1, x1) -> match acc with [] -> [day1, [x1]] | (day2, ls2) :: acctl -> if cmp day1 day2 = 0 then (day1, x1 :: ls2) :: acctl else (day1, [x1]) :: acc) [] ls in let ls' = List.rev ls' in List.map (fun (x, xs) -> x, List.rev xs) ls' let rec uniq ?(cmp = Pervasives.compare) = function | [] -> [] | [x] -> [x] | x :: y :: xs when cmp x y = 0 -> uniq (x :: xs) | x :: y :: xs -> x :: uniq (y :: xs) let sort_uniq ?cmp xs = let xs = ExtList.List.sort ?cmp xs in let xs = uniq ?cmp xs in xs let rec range a b = if a < b then a :: range (a+1) b else [] (*----------------------------------------------------------------------*) (* The plug-ins. *) let partitioners = ref [] let filesystems = ref [] let lvms = ref [] let register_plugin ?partitioner ?filesystem ?lvm id = (match partitioner with | None -> () | Some probe -> partitioners := !partitioners @ [id, probe] ); (match filesystem with | None -> () | Some probe -> filesystems := !filesystems @ [id, probe] ); (match lvm with | None -> () | Some probe -> lvms := !lvms @ [id, probe] ) (* Probe a device for partitions. Returns [Some parts] or [None]. *) let probe_for_partitions dev = if !debug then eprintf "probing for partitions on %s ...\n%!" dev#name; let rec loop = function | [] -> None | (_, probe) :: rest -> try Some (probe dev) with Not_found -> loop rest in let r = loop !partitioners in if !debug then ( match r with | None -> eprintf "no partitions found on %s\n%!" dev#name | Some { parts_cb = { parts_cb_name = name }; parts = parts } -> eprintf "found %d %s partitions on %s\n" (List.length parts) name dev#name ); r (* Probe a device for a filesystem. Returns [Some fs] or [None]. *) let probe_for_filesystem dev = if !debug then eprintf "probing for a filesystem on %s ...\n%!" dev#name; let rec loop = function | [] -> None | (_, probe) :: rest -> try Some (probe dev) with Not_found -> loop rest in let r = loop !filesystems in if !debug then ( match r with | None -> eprintf "no filesystem found on %s\n%!" dev#name | Some fs -> eprintf "found a filesystem on %s:\n" dev#name; eprintf "\t%s\n%!" fs.fs_cb.fs_cb_name ); r (* Probe a device for a PV. Returns [Some pv] or [None]. *) let probe_for_pv dev = if !debug then eprintf "probing if %s is a PV ...\n%!" dev#name; let rec loop = function | [] -> None | (_, probe) :: rest -> try Some (probe dev) with Not_found -> loop rest in let r = loop !lvms in if !debug then ( match r with | None -> eprintf "no PV found on %s\n%!" dev#name | Some { pv_cb = { lvm_cb_name = name } } -> eprintf "%s contains a %s PV\n%!" dev#name name ); r (* This allows plug-ins to attach their own private data to * the normal plug-in structures (partitions, filesystem, pv, etc.) *) let private_data_functions get_key = let h = Hashtbl.create 13 in (fun struc data -> Hashtbl.replace h (get_key struc) data), (fun struc -> try Hashtbl.find h (get_key struc) with Not_found -> assert false (* internal error in the plug-in *)) (*----------------------------------------------------------------------*) (* Create machine description. *) let open_machine_from_devices name disks = let disks = List.map ( fun (name, dev) -> { d_name = name; d_dev = dev; d_content = `Unknown } ) disks in { m_name = name; m_disks = disks; m_lv_filesystems = [] } let open_machine name disks = let disks = List.map ( fun (name, path) -> let dev = new block_device path disk_block_size (* XXX *) in name, dev ) disks in open_machine_from_devices name disks let close_machine { m_disks = m_disks } = (* Only close the disks, assume all other devices are derived from them. *) List.iter (fun { d_dev = d_dev } -> d_dev#close ()) m_disks (* Main scanning function for filesystems. *) let scan_machine ({ m_disks = m_disks } as machine) = let m_disks = List.map ( fun ({ d_dev = dev } as disk) -> let dev = (dev :> device) in (* See if it is partitioned first. *) let parts = probe_for_partitions dev in match parts with | Some parts -> { disk with d_content = `Partitions parts } | None -> (* Not partitioned. Does it contain a filesystem? *) let fs = probe_for_filesystem dev in match fs with | Some fs -> { disk with d_content = `Filesystem fs } | None -> (* Not partitioned, no filesystem, is it a PV? *) let pv = probe_for_pv dev in match pv with | Some pv -> { disk with d_content = `PhysicalVolume pv } | None -> disk (* Spare/unknown. *) ) m_disks in (* Now we have either detected partitions or a filesystem on each * physical device (or perhaps neither). See what is on those * partitions. *) let m_disks = List.map ( function | ({ d_dev = dev; d_content = `Partitions parts } as disk) -> let ps = List.map ( fun p -> if p.part_status = Bootable || p.part_status = Nonbootable then ( let fs = probe_for_filesystem p.part_dev in match fs with | Some fs -> { p with part_content = `Filesystem fs } | None -> (* Is it a PV? *) let pv = probe_for_pv p.part_dev in match pv with | Some lvm_name -> { p with part_content = `PhysicalVolume lvm_name } | None -> p (* Spare/unknown. *) ) else p ) parts.parts in let parts = { parts with parts = ps } in { disk with d_content = `Partitions parts } | disk -> disk ) m_disks in (* LVM filesystem detection * * Look for all disks/partitions which have been identified as PVs * and pass those back to the respective LVM plugin for LV detection. * * (Note - a two-stage process because an LV can be spread over * several PVs, so we have to detect all PVs belonging to a * domain first). * * XXX To deal with RAID (ie. md devices) we will need to loop * around here because RAID is like LVM except that they normally * present as block devices which can be used by LVM. *) (* First: LV detection. * Find all physical volumes, can be disks or partitions. *) let pvs_on_disks = List.filter_map ( function | { d_content = `PhysicalVolume pv } -> Some pv | _ -> None ) m_disks in let pvs_on_partitions = List.map ( function | { d_content = `Partitions { parts = parts } } -> List.filter_map ( function | { part_content = `PhysicalVolume pv } -> Some pv | _ -> None ) parts | _ -> [] ) m_disks in let lvs = List.concat (pvs_on_disks :: pvs_on_partitions) in (* Second: filesystem on LV detection. * Group the LVs by LVM plug-in ID. *) let lvs = List.map (fun ({pv_cb = {lvm_cb_name = name}} as pv) -> name, pv) lvs in let lvs = List.sort lvs in let lvs = group_by lvs in let lvs = List.map (fun (name, pvs) -> let pv = List.hd pvs in pv.pv_cb.lvm_cb_list_lvs pvs) lvs in let lvs = List.concat lvs in (* lvs is a list of potential LV devices. Now run them through the * probes to see if any contain filesystems. *) let filesystems = List.filter_map ( fun ({ lv_dev = dev } as lv) -> match probe_for_filesystem dev with | Some fs -> Some (lv, fs) | None -> None ) lvs in { machine with m_disks = m_disks; m_lv_filesystems = filesystems } (*----------------------------------------------------------------------*) (* We describe the ownership of each part of the disk using a * segment tree. http://en.wikipedia.org/wiki/Segment_tree * * Note that each part can (and usually is) owned multiple times * (eg. by a filesystem and by the partition that the filesystem * lies inside). Also, the segment tree is effectively read-only. * We build it up as a final step given the flat list of segments * identified by the algorithm in 'iter_over_machine'. *) (* General binary tree type. Data 'a is stored in the leaves and 'b * is stored in the nodes. *) type ('a,'b) binary_tree = | Leaf of 'a | Node of ('a,'b) binary_tree * 'b * ('a,'b) binary_tree (* This prints out the binary tree in graphviz dot format. *) let print_binary_tree leaf_printer node_printer tree = (* Assign a unique, fixed label to each node. *) let label = let i = ref 0 in let hash = Hashtbl.create 13 in fun node -> try Hashtbl.find hash node with Not_found -> let i = incr i; !i in let label = "n" ^ string_of_int i in Hashtbl.add hash node label; label in (* Recursively generate the graphviz file. *) let rec print = function | (Leaf a as leaf) -> eprintf " %s [shape=box, label=\"%s\"];\n" (label leaf) (leaf_printer a) | (Node (left,b,right) as node) -> eprintf " %s [label=\"%s\"];\n" (label node) (node_printer b); eprintf " %s -> %s [tailport=sw];\n" (label node) (label left); eprintf " %s -> %s [tailport=se];\n" (label node) (label right); print left; print right; in eprintf "/* Use 'dot -Tpng foo.dot > foo.png' to convert to a png file. */\n"; eprintf "digraph G {\n"; print tree; eprintf "}\n%!"; type owner = [ `Filesystem of filesystem | `Partitions of partitions | `PhysicalVolume of pv ] (* A segment describes the owner of a range of disk addresses. *) type segment = owner * int63 (* owner, owner offset *) type interval = int63 * int63 (* start point, end point (bytes) *) (* The special segment tree structure that we construct in create_ownership. *) type segment_tree = (interval * segment list, interval * segment list) binary_tree type ownership = (device * (* block_device (disk) *) segment_tree) list (* segment tree for this disk *) (* List of owned segments before we build the segment tree. *) type ownership_list = (device * (* block_device (disk) *) (int63 * int63 * (* disk offset, size of segment *) owner * int63 (* owner, owner offset *) ) ) list (* Ownership tables. *) let create_ownership machine = (* Iterate over all the things which can claim ownership of a * disk block (filesystems, partitions, PVs). *) let rec iter_over_machine ({m_disks = disks; m_lv_filesystems = lv_filesystems} as machine) = (* No segments to begin with. *) let ownership = [] in (* Iterate over disks. *) let ownership = List.fold_left ( fun ownership -> function | { d_content = (`Filesystem fs as owner) } -> iter_over_filesystem machine ownership fs owner | { d_content = (`Partitions parts as owner) } -> iter_over_partitions machine ownership parts owner | { d_content = (`PhysicalVolume pv as owner) } -> iter_over_pv machine ownership pv owner | { d_content = `Unknown } -> ownership ) ownership disks in (* Iterate over LV filesystems. *) let ownership = List.fold_left ( fun ownership (lv, fs) -> let owner = `Filesystem fs in iter_over_filesystem machine ownership fs owner ) ownership lv_filesystems in ownership (* Iterate over the blocks in a single filesystem. *) and iter_over_filesystem machine ownership {fs_dev = dev} owner = iter_over_device machine ownership dev owner (* Iterate over the blocks in a set of partitions, then * iterate over the contents of the partitions. *) and iter_over_partitions machine ownership {parts = parts; parts_dev = parts_dev} owner = let ownership = iter_over_device machine ownership parts_dev owner in let ownership = List.fold_left ( fun ownership -> function | { part_content = (`Filesystem fs as owner) } -> iter_over_filesystem machine ownership fs owner | { part_content = (`PhysicalVolume pv as owner) } -> iter_over_pv machine ownership pv owner | { part_content = `Unknown } -> ownership ) ownership parts in ownership (* Iterate over the blocks in a PV. *) and iter_over_pv machine ownership {pv_dev = dev} owner = iter_over_device machine ownership dev owner (* Iterate over the blocks in a device, assigning ownership to 'owner' * * In reality (1): There can be several owners for each block, so we * incrementally add ownership to the ownership_list (which eventually * will be turned into a segment tree). * In reality (2): Iterating over blocks would take ages and result * in a very inefficient ownership representation. Instead we look * at minimum contiguous extents. *) and iter_over_device { m_disks = disks } ownership dev owner = let size = dev#size in let disks = List.map (fun {d_dev = dev} -> (dev :> device)) disks in let rec loop ownership offset = if offset < size then ( let devs, extent = get_next_extent disks dev offset in if devs = [] then eprintf "warning: no device found under %s\n" (string_of_owner owner); let ownership = List.fold_left ( fun ownership (disk, disk_offset) -> let elem = disk, (disk_offset, extent, owner, offset) in elem :: ownership ) ownership devs in loop ownership (offset +^ extent) ) else ownership in loop ownership ~^0 (* Return the length of the next contiguous region in the device starting * at the given byte offset. Also return the underlying block device(s) * if there is one. *) and get_next_extent disks (dev : device) offset = let this_extent = dev#contiguous offset in (* If this disk is a block_device (a member of the 'disks' list) * then we've hit the bottom layer of devices, so just return it. *) if List.memq dev disks then [dev, offset], this_extent else ( let blocksize = dev#blocksize in let block = offset /^ blocksize in let offset_in_block = offset -^ block *^ blocksize in (* Map from this block to the devices one layer down. *) let devs = dev#map_block block in (* Get the real device offsets, adding the offset from start of block. *) let devs = List.map (fun (dev, dev_offset) -> dev, dev_offset +^ offset_in_block) devs in let devs = List.map (fun (dev, dev_offset) -> get_next_extent disks dev dev_offset) devs in (* Work out the minimum contiguous extent from this offset. *) let devs, extent = let extents = List.map snd devs in let devs = List.concat (List.map fst devs) in let extent = List.fold_left min this_extent extents in devs, extent in devs, extent ) and string_of_owner = function | `Filesystem {fs_cb = {fs_cb_name = name}; fs_dev = fs_dev} -> sprintf "%s(%s)" fs_dev#name name | `PhysicalVolume { pv_uuid = pv_uuid } -> "PV:" ^ pv_uuid | `Partitions { parts_cb = {parts_cb_name = name} } -> name in (* Build the list of segments. *) let ownership : ownership_list = iter_over_machine machine in (* Group the segments together by disk. *) let ownership = let ownership = List.sort ownership in group_by ownership in (* If debugging, print the segments that we found. *) if !debug then ( List.iter ( fun (disk, segments) -> eprintf "ownership segment list of %s %s:\n" machine.m_name disk#name; List.iter ( fun (disk_offset, size, owner, owner_offset) -> let blocksize = disk#blocksize in let disk_offset_in_blocks, disk_offset_in_block = disk_offset /^ blocksize, disk_offset %^ blocksize in let size_in_blocks, size_in_block = size /^ blocksize, size %^ blocksize in eprintf " %s[%s:%s] %s[%s:%s] %s@%s\n" (Int63.to_string disk_offset) (Int63.to_string disk_offset_in_blocks) (Int63.to_string disk_offset_in_block) (Int63.to_string size) (Int63.to_string size_in_blocks) (Int63.to_string size_in_block) (string_of_owner owner) (Int63.to_string owner_offset) ) segments ) ownership ); (* Build the segment tree from the ownership list (of segments). * For an explanation of this process see: * http://en.wikipedia.org/wiki/Segment_tree *) let ownership = List.map ( fun (disk, segments) -> (* Construct the list of distinct endpoints. *) let eps = List.map (fun (start, size, _, _) -> [start; start +^ size]) segments in let eps = sort_uniq (List.concat eps) in (* Construct the elementary intervals. *) let elints = let elints, lastpoint = List.fold_left ( fun (elints, prevpoint) point -> ((point, point) :: (prevpoint, point) :: elints), point ) ([], Int63.min_int) eps in let elints = (lastpoint, Int63.max_int) :: elints in List.rev elints in if !debug then ( eprintf "elementary intervals for %s (%d in total):\n" disk#name (List.length elints); List.iter ( fun (startpoint, endpoint) -> eprintf " %s %s\n" (Int63.to_string startpoint) (Int63.to_string endpoint) ) elints ); (* Construct the binary tree of elementary intervals. *) let tree = (* Each elementary interval becomes a leaf. *) let elints = List.map (fun elint -> Leaf elint) elints in (* Recursively build this into a binary tree. *) let rec make_layer = function | [] -> [] | ([_] as x) -> x (* Turn pairs of leaves at the bottom level into nodes. *) | (Leaf _ as a) :: (Leaf _ as b) :: xs -> let xs = make_layer xs in Node (a, (), b) :: xs (* Turn pairs of nodes at higher levels into nodes. *) | (Node _ as left) :: ((Node _|Leaf _) as right) :: xs -> let xs = make_layer xs in Node (left, (), right) :: xs | Leaf _ :: _ -> assert false (* never happens??? (I think) *) in let rec loop = function | [] -> assert false | [x] -> x | xs -> loop (make_layer xs) in loop elints in if !debug then ( let leaf_printer (startpoint, endpoint) = sprintf "%s-%s" (Int63.to_string startpoint) (Int63.to_string endpoint) in let node_printer () = "" in print_binary_tree leaf_printer node_printer tree ); (* Insert the segments into the tree one by one. *) let tree = (* For each node/leaf in the tree, add its interval and an * empty list which will be used to store the segments. *) let rec interval_tree = function | Leaf elint -> Leaf (elint, []) | Node (left, (), right) -> let left = interval_tree left in let right = interval_tree right in let (leftstart, _) = interval_of_node left in let (_, rightend) = interval_of_node right in let interval = leftstart, rightend in Node (left, (interval, []), right) and interval_of_node = function | Leaf (elint, _) -> elint | Node (_, (interval, _), _) -> interval in let tree = interval_tree tree in (* This should always be true: *) assert (interval_of_node tree = (Int63.min_int, Int63.max_int)); (* "Contained in" operator. * 'a <-< b' iff 'a' is a subinterval of 'b'. * |<---- a ---->| * |<----------- b ----------->| *) let (<-<) (a1, a2) (b1, b2) = b1 <= a1 && a2 <= b2 in (* "Intersects" operator. * 'a /\ b' iff intervals 'a' and 'b' overlap, eg: * |<---- a ---->| * |<----------- b ----------->| *) let ( /\ ) (a1, a2) (b1, b2) = a2 > b1 || b2 > a1 in let rec insert_segment tree segment = let start, size, owner, owner_offset = segment in let seginterval = start, start +^ size in let seg = owner, owner_offset in match tree with (* Test if we should insert into this leaf or node: *) | Leaf (interval, segs) when interval <-< seginterval -> Leaf (interval, seg :: segs) | Node (left, (interval, segs), right) when interval <-< seginterval -> Node (left, (interval, seg :: segs), right) | (Leaf _) as leaf -> leaf (* Else, should we insert into left or right subtrees? *) | Node (left, i, right) -> let left = if seginterval /\ interval_of_node left then insert_segment left segment else left in let right = if seginterval /\ interval_of_node right then insert_segment right segment else right in Node (left, i, right) in let tree = List.fold_left insert_segment tree segments in tree in if !debug then ( let printer ((sp, ep), segments) = sprintf "[%s-%s] " (Int63.to_string sp) (Int63.to_string ep) ^ String.concat ";" (List.map (fun (owner,_) -> string_of_owner owner) segments) in print_binary_tree printer printer tree ); (disk, tree) ) ownership in (* Return the ownership structure. *) ownership let get_owners_lookup machine ownership (disk : block_device) = (* Get the correct tree. *) let tree = List.assoc (disk :> device) ownership in fun offset -> (* Warning: This 'hot' code was carefully optimized based on * feedback from 'gprof'. Avoid fiddling with it. *) let rec query = function | Leaf (_, segments) -> segments (* Try to avoid expensive '@' operator if node segments is empty: *) | Node ((Leaf ((_, leftend), _) | Node (_, ((_, leftend), _), _) as left), (_, []), right) -> let subsegments = if offset < leftend then query left else query right in subsegments (* ... or a singleton: *) | Node ((Leaf ((_, leftend), _) | Node (_, ((_, leftend), _), _) as left), (_, [segment]), right) -> let subsegments = if offset < leftend then query left else query right in segment :: subsegments (* Normal recursive case: *) | Node ((Leaf ((_, leftend), _) | Node (_, ((_, leftend), _), _) as left), (_, segments), right) -> let subsegments = if offset < leftend then query left else query right in segments @ subsegments in let owners = query tree in List.map ( fun (owner, owner_offset) -> (owner, offset -^ owner_offset) ) owners (* Find out if a disk offset is free. * Current algorithm just checks that at least one owner says * it is free. We could be smarter about this. *) let offset_is_free owners = List.exists ( function | `Filesystem fs, offset -> fs.fs_cb.fs_cb_offset_is_free fs offset | `Partitions parts, offset -> parts.parts_cb.parts_cb_offset_is_free parts offset | `PhysicalVolume pv, offset -> pv.pv_cb.lvm_cb_offset_is_free pv offset ) owners