From: Richard W.M. Jones <"Richard W.M. Jones "> Date: Thu, 1 May 2008 12:41:24 +0000 (+0100) Subject: Build the segment tree! Commit this quick before something bad happens ... X-Git-Url: http://git.annexia.org/?a=commitdiff_plain;h=85e0fa36cd0d2ff50e4d6c5811fc9761553be1c3;p=virt-df.git Build the segment tree! Commit this quick before something bad happens ... --- diff --git a/lib/diskimage.ml b/lib/diskimage.ml index c5923cd..71f56b4 100644 --- a/lib/diskimage.ml +++ b/lib/diskimage.ml @@ -264,17 +264,67 @@ let scan_machine ({ m_disks = m_disks } as machine) = * identified by the algorithm in 'iter_over_machine'. *) -type ownership = unit +(* General binary tree type. Data 'a is stored in the leaves and 'b + * is stored in the nodes. + *) +type ('a,'b) binary_tree = + | Leaf of 'a + | Node of ('a,'b) binary_tree * 'b * ('a,'b) binary_tree + +(* This prints out the binary tree in graphviz dot format. *) +let print_binary_tree leaf_printer node_printer tree = + (* Assign a unique, fixed label to each node. *) + let label = + let i = ref 0 in + let hash = Hashtbl.create 13 in + fun node -> + try Hashtbl.find hash node + with Not_found -> + let i = incr i; !i in + let label = "n" ^ string_of_int i in + Hashtbl.add hash node label; + label + in + (* Recursively generate the graphviz file. *) + let rec print = function + | (Leaf a as leaf) -> + eprintf " %s [shape=box, label=\"%s\"];\n" + (label leaf) (leaf_printer a) + | (Node (left,b,right) as node) -> + eprintf " %s [label=\"%s\"];\n" + (label node) (node_printer b); + eprintf " %s -> %s [tailport=sw];\n" (label node) (label left); + eprintf " %s -> %s [tailport=se];\n" (label node) (label right); + print left; + print right; + in + eprintf "/* Use 'dot -Tpng foo.dot > foo.png' to convert to a png file. */\n"; + eprintf "digraph G {\n"; + print tree; + eprintf "}\n"; + +(* A segment describes the owner of a range of disk addresses. *) +type segment = + int63 * int63 * (* disk offset, size of segment *) + [ `Filesystem of filesystem + | `Partitions of partitions + | `PhysicalVolume of pv ] * (* owner *) + int63 (* owner offset *) + +type interval = int63 * int63 (* start point, end point (bytes) *) + +(* The special segment tree structure that we construct in create_ownership. *) +type segment_tree = + (interval * segment list, interval * segment list) binary_tree + +type ownership = + (device * (* block_device (disk) *) + segment_tree) list (* segment tree for this disk *) (* List of owned segments before we build the segment tree. *) type ownership_list = (device * (* block_device (disk) *) - int63 * int63 * (* disk offset, size of segment *) - [ `Filesystem of filesystem - | `Partitions of partitions - | `PhysicalVolume of pv ] * (* owner *) - int63 (* owner offset *) - ) list + segment) list (* Ownership tables. *) let create_ownership machine = @@ -361,7 +411,7 @@ let create_ownership machine = let ownership = List.fold_left ( fun ownership (disk, disk_offset) -> - let elem = disk, disk_offset, extent, owner, offset in + let elem = disk, (disk_offset, extent, owner, offset) in elem :: ownership ) ownership devs in loop ownership (offset +^ extent) @@ -420,29 +470,190 @@ let create_ownership machine = | `Partitions { parts_plugin_id = parts_plugin_id } -> parts_plugin_id in - let ownership = iter_over_machine machine in + + (* Build the list of segments. *) + let ownership : ownership_list = iter_over_machine machine in + + (* Group the segments together by disk. *) + let ownership = + let ownership = List.sort ownership in + group_by ownership in (* If debugging, print the segments that we found. *) if !debug then ( - let ownership = List.rev ownership in - eprintf "ownership segment list of %s:\n" machine.m_name; List.iter ( - fun (disk, disk_offset, size, owner, owner_offset) -> - let blocksize = disk#blocksize in - let disk_offset_in_blocks, disk_offset_in_block = - disk_offset /^ blocksize, disk_offset %^ blocksize in - let size_in_blocks, size_in_block = - size /^ blocksize, size %^ blocksize in - - eprintf "%s %s[%s:%s] %s[%s:%s] %s@%s\n" - disk#name - (Int63.to_string disk_offset) - (Int63.to_string disk_offset_in_blocks) - (Int63.to_string disk_offset_in_block) - (Int63.to_string size) - (Int63.to_string size_in_blocks) - (Int63.to_string size_in_block) - (string_of_owner owner) - (Int63.to_string owner_offset) + fun (disk, segments) -> + eprintf "ownership segment list of %s %s:\n" machine.m_name disk#name; + List.iter ( + fun (disk_offset, size, owner, owner_offset) -> + let blocksize = disk#blocksize in + let disk_offset_in_blocks, disk_offset_in_block = + disk_offset /^ blocksize, disk_offset %^ blocksize in + let size_in_blocks, size_in_block = + size /^ blocksize, size %^ blocksize in + + eprintf " %s[%s:%s] %s[%s:%s] %s@%s\n" + (Int63.to_string disk_offset) + (Int63.to_string disk_offset_in_blocks) + (Int63.to_string disk_offset_in_block) + (Int63.to_string size) + (Int63.to_string size_in_blocks) + (Int63.to_string size_in_block) + (string_of_owner owner) + (Int63.to_string owner_offset) + ) segments ) ownership - ) + ); + + (* Build the segment tree from the ownership list (of segments). + * For an explanation of this process see: + * http://en.wikipedia.org/wiki/Segment_tree + *) + let ownership = + List.map ( + fun (disk, segments) -> + (* Construct the list of distinct endpoints. *) + let eps = + List.map + (fun (start, size, _, _) -> [start; start +^ size]) + segments in + let eps = sort_uniq (List.concat eps) in + + (* Construct the elementary intervals. *) + let elints = + let elints, lastpoint = + List.fold_left ( + fun (elints, prevpoint) point -> + ((point, point) :: (prevpoint, point) :: elints), point + ) ([], Int63.min_int) eps in + let elints = (lastpoint, Int63.max_int) :: elints in + List.rev elints in + + if !debug then ( + eprintf "elementary intervals for %s (%d in total):\n" + disk#name (List.length elints); + List.iter ( + fun (startpoint, endpoint) -> + eprintf " %s %s\n" + (Int63.to_string startpoint) (Int63.to_string endpoint) + ) elints + ); + + (* Construct the binary tree of elementary intervals. *) + let tree = + (* Each elementary interval becomes a leaf. *) + let elints = List.map (fun elint -> Leaf elint) elints in + (* Recursively build this into a binary tree. *) + let rec make_layer = function + | [] -> [] + | ([_] as x) -> x + (* Turn pairs of leaves at the bottom level into nodes. *) + | (Leaf _ as a) :: (Leaf _ as b) :: xs -> + let xs = make_layer xs in + Node (a, (), b) :: xs + (* Turn pairs of nodes at higher levels into nodes. *) + | (Node _ as left) :: ((Node _|Leaf _) as right) :: xs -> + let xs = make_layer xs in + Node (left, (), right) :: xs + | Leaf _ :: _ -> assert false (* never happens??? (I think) *) + in + let rec loop = function + | [] -> assert false + | [x] -> x + | xs -> loop (make_layer xs) + in + loop elints in + + if !debug then ( + let leaf_printer (startpoint, endpoint) = + sprintf "%s-%s" + (Int63.to_string startpoint) (Int63.to_string endpoint) + in + let node_printer () = "" in + print_binary_tree leaf_printer node_printer tree + ); + + (* Insert the segments into the tree one by one. *) + let tree = + (* For each node/leaf in the tree, add its interval and an + * empty list which will be used to store the segments. + *) + let rec interval_tree = function + | Leaf elint -> Leaf (elint, []) + | Node (left, (), right) -> + let left = interval_tree left in + let right = interval_tree right in + let (leftstart, _) = interval_of_node left in + let (_, rightend) = interval_of_node right in + let interval = leftstart, rightend in + Node (left, (interval, []), right) + and interval_of_node = function + | Leaf (elint, _) -> elint + | Node (_, (interval, _), _) -> interval + in + + let tree = interval_tree tree in + (* This should always be true: *) + assert (interval_of_node tree = (Int63.min_int, Int63.max_int)); + + (* "Contained in" operator. + * 'a <-< b' iff 'a' is a subinterval of 'b'. + * |<---- a ---->| + * |<----------- b ----------->| + *) + let (<-<) (a1, a2) (b1, b2) = b1 <= a1 && a2 <= b2 in + + (* "Intersects" operator. + * 'a /\ b' iff intervals 'a' and 'b' overlap, eg: + * |<---- a ---->| + * |<----------- b ----------->| + *) + let ( /\ ) (a1, a2) (b1, b2) = a2 > b1 || b2 > a1 in + + let rec insert_segment tree segment = + let start, size, owner, owner_offset = segment in + let seginterval = start, start +^ size in + eprintf "inserting (%s,%s) ...\n" + (Int63.to_string (fst seginterval)) + (Int63.to_string (snd seginterval)); + match tree with + (* Test if we should insert into this leaf or node: *) + | Leaf (interval, segs) when interval <-< seginterval -> + Leaf (interval, segment :: segs) + | Node (left, (interval, segs), right) + when interval <-< seginterval -> + Node (left, (interval, segment :: segs), right) + + | (Leaf _) as leaf -> leaf + + (* Else, should we insert into left or right subtrees? *) + | Node (left, i, right) -> + let left = + if seginterval /\ interval_of_node left then + insert_segment left segment + else + left in + let right = + if seginterval /\ interval_of_node right then + insert_segment right segment + else + right in + Node (left, i, right) + in + let tree = List.fold_left insert_segment tree segments in + tree in + + if !debug then ( + let printer ((sp, ep), segments) = + sprintf "[%s-%s] " (Int63.to_string sp) (Int63.to_string ep) ^ + String.concat ";" + (List.map (fun (_, _, owner,_) -> string_of_owner owner) + segments) + in + print_binary_tree printer printer tree + ); + (disk, tree) + ) ownership in + + (* Return the ownership structure. *) + ownership