1 (* Diskimage library for reading disk images.
2 (C) Copyright 2007-2008 Richard W.M. Jones, Red Hat Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 include Diskimage_utils
28 (* Use as the natural block size for disk images, but really we should
29 * use the 'blockdev -getbsz' command to find the real block size.
31 let disk_block_size = ~^512
33 (*----------------------------------------------------------------------*)
35 let partition_types = [
36 Diskimage_mbr.plugin_id,
37 ("MBR", Diskimage_mbr.callbacks);
40 let filesystem_types = [
41 Diskimage_ext2.plugin_id,
42 ("Linux ext2/3", Diskimage_ext2.callbacks);
43 Diskimage_linux_swap.plugin_id,
44 ("Linux swap", Diskimage_linux_swap.callbacks);
45 Diskimage_linux_swsuspend.plugin_id,
46 ("Linux s/w suspend", Diskimage_linux_swsuspend.callbacks);
50 Diskimage_lvm2.plugin_id,
51 ("Linux LVM2", Diskimage_lvm2.callbacks);
54 let name_of_parts id =
55 let name, _ = List.assoc id partition_types in
57 let name_of_filesystem id =
58 let name, _ = List.assoc id filesystem_types in
61 let name, _ = List.assoc id lvm_types in
64 (* Probe a device for partitions. Returns [Some parts] or [None]. *)
65 let probe_for_partitions dev =
66 if !debug then eprintf "probing for partitions on %s ...\n%!" dev#name;
67 let rec loop = function
69 | (parts_plugin_id, (_, cb)) :: rest ->
70 try Some (cb.parts_cb_probe dev)
71 with Not_found -> loop rest
73 let r = loop partition_types in
76 | None -> eprintf "no partitions found on %s\n%!" dev#name
77 | Some { parts_plugin_id = name; parts = parts } ->
78 eprintf "found %d %s partitions on %s\n"
79 (List.length parts) name dev#name
83 let parts_offset_is_free ({ parts_plugin_id = parts_name } as parts) offset =
84 let _, cb = List.assoc parts_name partition_types in
85 cb.parts_cb_offset_is_free parts offset
87 (* Probe a device for a filesystem. Returns [Some fs] or [None]. *)
88 let probe_for_filesystem dev =
89 if !debug then eprintf "probing for a filesystem on %s ...\n%!" dev#name;
90 let rec loop = function
92 | (fs_name, (_, cb)) :: rest ->
93 try Some (cb.fs_cb_probe dev)
94 with Not_found -> loop rest
96 let r = loop filesystem_types in
99 | None -> eprintf "no filesystem found on %s\n%!" dev#name
101 eprintf "found a filesystem on %s:\n" dev#name;
102 eprintf "\t%s\n%!" fs.fs_plugin_id
106 let fs_offset_is_free ({ fs_plugin_id = fs_name } as fs) offset =
107 let _, cb = List.assoc fs_name filesystem_types in
108 cb.fs_cb_offset_is_free fs offset
110 (* Probe a device for a PV. Returns [Some lvm_name] or [None]. *)
111 let probe_for_pv dev =
112 if !debug then eprintf "probing if %s is a PV ...\n%!" dev#name;
113 let rec loop = function
115 | (lvm_name, (_, cb)) :: rest ->
116 try Some (cb.lvm_cb_probe lvm_name dev)
117 with Not_found -> loop rest
119 let r = loop lvm_types in
122 | None -> eprintf "no PV found on %s\n%!" dev#name
123 | Some { lvm_plugin_id = name } ->
124 eprintf "%s contains a %s PV\n%!" dev#name name
128 let list_lvs lvm_name devs =
129 let _, cb = List.assoc lvm_name lvm_types in
130 cb.lvm_cb_list_lvs devs
132 let lvm_offset_is_free ({ lvm_plugin_id = lvm_name } as pv) offset =
133 let _, cb = List.assoc lvm_name lvm_types in
134 cb.lvm_cb_offset_is_free pv offset
136 (*----------------------------------------------------------------------*)
137 (* Create machine description. *)
138 let open_machine name disks =
139 let disks = List.map (
141 let dev = new block_device path disk_block_size (* XXX *) in
142 { d_name = name; d_dev = dev; d_content = `Unknown }
144 { m_name = name; m_disks = disks; m_lv_filesystems = [] }
146 let close_machine { m_disks = m_disks } =
147 (* Only close the disks, assume all other devices are derived from them. *)
148 List.iter (fun { d_dev = d_dev } -> d_dev#close ()) m_disks
150 (* Main scanning function for filesystems. *)
151 let scan_machine ({ m_disks = m_disks } as machine) =
152 let m_disks = List.map (
153 fun ({ d_dev = dev } as disk) ->
154 let dev = (dev :> device) in
155 (* See if it is partitioned first. *)
156 let parts = probe_for_partitions dev in
159 { disk with d_content = `Partitions parts }
161 (* Not partitioned. Does it contain a filesystem? *)
162 let fs = probe_for_filesystem dev in
165 { disk with d_content = `Filesystem fs }
167 (* Not partitioned, no filesystem, is it a PV? *)
168 let pv = probe_for_pv dev in
171 { disk with d_content = `PhysicalVolume lvm_name }
173 disk (* Spare/unknown. *)
176 (* Now we have either detected partitions or a filesystem on each
177 * physical device (or perhaps neither). See what is on those
180 let m_disks = List.map (
182 | ({ d_dev = dev; d_content = `Partitions parts } as disk) ->
185 if p.part_status = Bootable || p.part_status = Nonbootable then (
186 let fs = probe_for_filesystem p.part_dev in
189 { p with part_content = `Filesystem fs }
192 let pv = probe_for_pv p.part_dev in
195 { p with part_content = `PhysicalVolume lvm_name }
197 p (* Spare/unknown. *)
200 let parts = { parts with parts = ps } in
201 { disk with d_content = `Partitions parts }
205 (* LVM filesystem detection
207 * Look for all disks/partitions which have been identified as PVs
208 * and pass those back to the respective LVM plugin for LV detection.
210 * (Note - a two-stage process because an LV can be spread over
211 * several PVs, so we have to detect all PVs belonging to a
214 * XXX To deal with RAID (ie. md devices) we will need to loop
215 * around here because RAID is like LVM except that they normally
216 * present as block devices which can be used by LVM.
218 (* First: LV detection.
219 * Find all physical volumes, can be disks or partitions.
221 let pvs_on_disks = List.filter_map (
224 d_content = `PhysicalVolume pv } -> Some (pv, (d_dev :> device))
227 let pvs_on_partitions = List.map (
229 | { d_content = `Partitions { parts = parts } } ->
232 | { part_dev = part_dev;
233 part_content = `PhysicalVolume pv } ->
239 let lvs = List.concat (pvs_on_disks :: pvs_on_partitions) in
241 (* Second: filesystem on LV detection.
242 * Group the LVs by plug-in type.
244 let cmp (a,_) (b,_) = compare a b in
245 let lvs = List.sort ~cmp lvs in
246 let lvs = group_by lvs in
249 List.map (fun (pv, devs) -> list_lvs pv.lvm_plugin_id devs) lvs in
250 let lvs = List.concat lvs in
252 (* lvs is a list of potential LV devices. Now run them through the
253 * probes to see if any contain filesystems.
257 fun ({ lv_dev = dev } as lv) ->
258 match probe_for_filesystem dev with
259 | Some fs -> Some (lv, fs)
265 m_lv_filesystems = filesystems }
267 (*----------------------------------------------------------------------*)
269 (* We describe the ownership of each part of the disk using a
270 * segment tree. http://en.wikipedia.org/wiki/Segment_tree
272 * Note that each part can (and usually is) owned multiple times
273 * (eg. by a filesystem and by the partition that the filesystem
274 * lies inside). Also, the segment tree is effectively read-only.
275 * We build it up as a final step given the flat list of segments
276 * identified by the algorithm in 'iter_over_machine'.
279 (* General binary tree type. Data 'a is stored in the leaves and 'b
280 * is stored in the nodes.
282 type ('a,'b) binary_tree =
284 | Node of ('a,'b) binary_tree * 'b * ('a,'b) binary_tree
286 (* This prints out the binary tree in graphviz dot format. *)
287 let print_binary_tree leaf_printer node_printer tree =
288 (* Assign a unique, fixed label to each node. *)
291 let hash = Hashtbl.create 13 in
293 try Hashtbl.find hash node
295 let i = incr i; !i in
296 let label = "n" ^ string_of_int i in
297 Hashtbl.add hash node label;
300 (* Recursively generate the graphviz file. *)
301 let rec print = function
302 | (Leaf a as leaf) ->
303 eprintf " %s [shape=box, label=\"%s\"];\n"
304 (label leaf) (leaf_printer a)
305 | (Node (left,b,right) as node) ->
306 eprintf " %s [label=\"%s\"];\n"
307 (label node) (node_printer b);
308 eprintf " %s -> %s [tailport=sw];\n" (label node) (label left);
309 eprintf " %s -> %s [tailport=se];\n" (label node) (label right);
313 eprintf "/* Use 'dot -Tpng foo.dot > foo.png' to convert to a png file. */\n";
314 eprintf "digraph G {\n";
319 [ `Filesystem of filesystem
320 | `Partitions of partitions
321 | `PhysicalVolume of pv ]
323 (* A segment describes the owner of a range of disk addresses. *)
324 type segment = owner * int63 (* owner, owner offset *)
326 type interval = int63 * int63 (* start point, end point (bytes) *)
328 (* The special segment tree structure that we construct in create_ownership. *)
330 (interval * segment list, interval * segment list) binary_tree
333 (device * (* block_device (disk) *)
334 segment_tree) list (* segment tree for this disk *)
336 (* List of owned segments before we build the segment tree. *)
337 type ownership_list =
338 (device * (* block_device (disk) *)
339 (int63 * int63 * (* disk offset, size of segment *)
340 owner * int63 (* owner, owner offset *)
344 (* Ownership tables. *)
345 let create_ownership machine =
346 (* Iterate over all the things which can claim ownership of a
347 * disk block (filesystems, partitions, PVs).
349 let rec iter_over_machine
350 ({m_disks = disks; m_lv_filesystems = lv_filesystems} as machine) =
352 (* No segments to begin with. *)
353 let ownership = [] in
355 (* Iterate over disks. *)
360 | { d_content = (`Filesystem fs as owner) } ->
361 iter_over_filesystem machine ownership fs owner
362 | { d_content = (`Partitions parts as owner) } ->
363 iter_over_partitions machine ownership parts owner
364 | { d_content = (`PhysicalVolume pv as owner) } ->
365 iter_over_pv machine ownership pv owner
366 | { d_content = `Unknown } -> ownership
369 (* Iterate over LV filesystems. *)
372 fun ownership (lv, fs) ->
373 let owner = `Filesystem fs in
374 iter_over_filesystem machine ownership fs owner
375 ) ownership lv_filesystems in
379 (* Iterate over the blocks in a single filesystem. *)
380 and iter_over_filesystem machine ownership {fs_dev = dev} owner =
381 iter_over_device machine ownership dev owner
383 (* Iterate over the blocks in a set of partitions, then
384 * iterate over the contents of the partitions.
386 and iter_over_partitions machine ownership
387 {parts = parts; parts_dev = parts_dev} owner =
388 let ownership = iter_over_device machine ownership parts_dev owner in
394 | { part_content = (`Filesystem fs as owner) } ->
395 iter_over_filesystem machine ownership fs owner
396 | { part_content = (`PhysicalVolume pv as owner) } ->
397 iter_over_pv machine ownership pv owner
398 | { part_content = `Unknown } -> ownership
403 (* Iterate over the blocks in a PV. *)
404 and iter_over_pv machine ownership {pv_dev = dev} owner =
405 iter_over_device machine ownership dev owner
407 (* Iterate over the blocks in a device, assigning ownership to 'owner'
409 * In reality (1): There can be several owners for each block, so we
410 * incrementally add ownership to the ownership_list (which eventually
411 * will be turned into a segment tree).
412 * In reality (2): Iterating over blocks would take ages and result
413 * in a very inefficient ownership representation. Instead we look
414 * at minimum contiguous extents.
416 and iter_over_device { m_disks = disks } ownership dev owner =
417 let size = dev#size in
418 let disks = List.map (fun {d_dev = dev} -> (dev :> device)) disks in
420 let rec loop ownership offset =
421 if offset < size then (
422 let devs, extent = get_next_extent disks dev offset in
424 eprintf "warning: no device found under %s\n"
425 (string_of_owner owner);
428 fun ownership (disk, disk_offset) ->
429 let elem = disk, (disk_offset, extent, owner, offset) in
432 loop ownership (offset +^ extent)
438 (* Return the length of the next contiguous region in the device starting
439 * at the given byte offset. Also return the underlying block device(s)
442 and get_next_extent disks (dev : device) offset =
443 let this_extent = dev#contiguous offset in
445 (* If this disk is a block_device (a member of the 'disks' list)
446 * then we've hit the bottom layer of devices, so just return it.
448 if List.memq dev disks then
449 [dev, offset], this_extent
451 let blocksize = dev#blocksize in
452 let block = offset /^ blocksize in
453 let offset_in_block = offset -^ block *^ blocksize in
455 (* Map from this block to the devices one layer down. *)
456 let devs = dev#map_block block in
458 (* Get the real device offsets, adding the offset from start of block. *)
461 (fun (dev, dev_offset) -> dev, dev_offset +^ offset_in_block)
466 (fun (dev, dev_offset) ->
467 get_next_extent disks dev dev_offset)
470 (* Work out the minimum contiguous extent from this offset. *)
472 let extents = List.map snd devs in
473 let devs = List.concat (List.map fst devs) in
474 let extent = List.fold_left min this_extent extents in
480 and string_of_owner = function
481 | `Filesystem {fs_plugin_id = fs_plugin_id; fs_dev = fs_dev} ->
482 sprintf "%s(%s)" fs_dev#name fs_plugin_id
483 | `PhysicalVolume { pv_uuid = pv_uuid } ->
485 | `Partitions { parts_plugin_id = parts_plugin_id } ->
489 (* Build the list of segments. *)
490 let ownership : ownership_list = iter_over_machine machine in
492 (* Group the segments together by disk. *)
494 let ownership = List.sort ownership in
495 group_by ownership in
497 (* If debugging, print the segments that we found. *)
500 fun (disk, segments) ->
501 eprintf "ownership segment list of %s %s:\n" machine.m_name disk#name;
503 fun (disk_offset, size, owner, owner_offset) ->
504 let blocksize = disk#blocksize in
505 let disk_offset_in_blocks, disk_offset_in_block =
506 disk_offset /^ blocksize, disk_offset %^ blocksize in
507 let size_in_blocks, size_in_block =
508 size /^ blocksize, size %^ blocksize in
510 eprintf " %s[%s:%s] %s[%s:%s] %s@%s\n"
511 (Int63.to_string disk_offset)
512 (Int63.to_string disk_offset_in_blocks)
513 (Int63.to_string disk_offset_in_block)
514 (Int63.to_string size)
515 (Int63.to_string size_in_blocks)
516 (Int63.to_string size_in_block)
517 (string_of_owner owner)
518 (Int63.to_string owner_offset)
523 (* Build the segment tree from the ownership list (of segments).
524 * For an explanation of this process see:
525 * http://en.wikipedia.org/wiki/Segment_tree
529 fun (disk, segments) ->
530 (* Construct the list of distinct endpoints. *)
533 (fun (start, size, _, _) -> [start; start +^ size])
535 let eps = sort_uniq (List.concat eps) in
537 (* Construct the elementary intervals. *)
539 let elints, lastpoint =
541 fun (elints, prevpoint) point ->
542 ((point, point) :: (prevpoint, point) :: elints), point
543 ) ([], Int63.min_int) eps in
544 let elints = (lastpoint, Int63.max_int) :: elints in
548 eprintf "elementary intervals for %s (%d in total):\n"
549 disk#name (List.length elints);
551 fun (startpoint, endpoint) ->
553 (Int63.to_string startpoint) (Int63.to_string endpoint)
557 (* Construct the binary tree of elementary intervals. *)
559 (* Each elementary interval becomes a leaf. *)
560 let elints = List.map (fun elint -> Leaf elint) elints in
561 (* Recursively build this into a binary tree. *)
562 let rec make_layer = function
565 (* Turn pairs of leaves at the bottom level into nodes. *)
566 | (Leaf _ as a) :: (Leaf _ as b) :: xs ->
567 let xs = make_layer xs in
568 Node (a, (), b) :: xs
569 (* Turn pairs of nodes at higher levels into nodes. *)
570 | (Node _ as left) :: ((Node _|Leaf _) as right) :: xs ->
571 let xs = make_layer xs in
572 Node (left, (), right) :: xs
573 | Leaf _ :: _ -> assert false (* never happens??? (I think) *)
575 let rec loop = function
578 | xs -> loop (make_layer xs)
583 let leaf_printer (startpoint, endpoint) =
585 (Int63.to_string startpoint) (Int63.to_string endpoint)
587 let node_printer () = "" in
588 print_binary_tree leaf_printer node_printer tree
591 (* Insert the segments into the tree one by one. *)
593 (* For each node/leaf in the tree, add its interval and an
594 * empty list which will be used to store the segments.
596 let rec interval_tree = function
597 | Leaf elint -> Leaf (elint, [])
598 | Node (left, (), right) ->
599 let left = interval_tree left in
600 let right = interval_tree right in
601 let (leftstart, _) = interval_of_node left in
602 let (_, rightend) = interval_of_node right in
603 let interval = leftstart, rightend in
604 Node (left, (interval, []), right)
605 and interval_of_node = function
606 | Leaf (elint, _) -> elint
607 | Node (_, (interval, _), _) -> interval
610 let tree = interval_tree tree in
611 (* This should always be true: *)
612 assert (interval_of_node tree = (Int63.min_int, Int63.max_int));
614 (* "Contained in" operator.
615 * 'a <-< b' iff 'a' is a subinterval of 'b'.
617 * |<----------- b ----------->|
619 let (<-<) (a1, a2) (b1, b2) = b1 <= a1 && a2 <= b2 in
621 (* "Intersects" operator.
622 * 'a /\ b' iff intervals 'a' and 'b' overlap, eg:
624 * |<----------- b ----------->|
626 let ( /\ ) (a1, a2) (b1, b2) = a2 > b1 || b2 > a1 in
628 let rec insert_segment tree segment =
629 let start, size, owner, owner_offset = segment in
630 let seginterval = start, start +^ size in
631 let seg = owner, owner_offset in
634 (* Test if we should insert into this leaf or node: *)
635 | Leaf (interval, segs) when interval <-< seginterval ->
636 Leaf (interval, seg :: segs)
637 | Node (left, (interval, segs), right)
638 when interval <-< seginterval ->
639 Node (left, (interval, seg :: segs), right)
641 | (Leaf _) as leaf -> leaf
643 (* Else, should we insert into left or right subtrees? *)
644 | Node (left, i, right) ->
646 if seginterval /\ interval_of_node left then
647 insert_segment left segment
651 if seginterval /\ interval_of_node right then
652 insert_segment right segment
655 Node (left, i, right)
657 let tree = List.fold_left insert_segment tree segments in
661 let printer ((sp, ep), segments) =
662 sprintf "[%s-%s] " (Int63.to_string sp) (Int63.to_string ep) ^
664 (List.map (fun (owner,_) -> string_of_owner owner)
667 print_binary_tree printer printer tree
672 (* Return the ownership structure. *)
675 let get_owners_lookup machine ownership (disk : block_device) =
676 (* Get the correct tree. *)
677 let tree = List.assoc (disk :> device) ownership in
680 (* Warning: This 'hot' code was carefully optimized based on
681 * feedback from 'gprof'. Avoid fiddling with it.
683 let rec query = function
684 | Leaf (_, segments) -> segments
686 (* Try to avoid expensive '@' operator if node segments is empty: *)
687 | Node ((Leaf ((_, leftend), _) | Node (_, ((_, leftend), _), _) as left),
691 if offset < leftend then query left else query right in
694 (* ... or a singleton: *)
695 | Node ((Leaf ((_, leftend), _) | Node (_, ((_, leftend), _), _) as left),
699 if offset < leftend then query left else query right in
700 segment :: subsegments
702 (* Normal recursive case: *)
703 | Node ((Leaf ((_, leftend), _) | Node (_, ((_, leftend), _), _) as left),
707 if offset < leftend then query left else query right in
708 segments @ subsegments
710 let owners = query tree in
713 fun (owner, owner_offset) -> (owner, offset -^ owner_offset)
716 (* Find out if a disk offset is free.
717 * Current algorithm just checks that at least one owner says
718 * it is free. We could be smarter about this.
720 let offset_is_free owners =
723 | `Filesystem fs, offset -> fs_offset_is_free fs offset
724 | `Partitions parts, offset -> parts_offset_is_free parts offset
725 | `PhysicalVolume pv, offset -> lvm_offset_is_free pv offset