(* Memory info command for virtual domains. (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. http://libvirt.org/ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. Functions for making a memory map of a virtual machine from various sources. The memory map will most certainly have holes. *) open Unix open Printf open Bigarray open Virt_mem_utils let debug = true (* An address. *) type addr = int64 (* A range of addresses (start and start+size). *) type interval = addr * addr (* A mapping. *) type mapping = { start : addr; size : addr; (* Bigarray mmap(2)'d region with byte addressing: *) arr : (char,int8_unsigned_elt,c_layout) Array1.t; (* The order that the mappings were added, 0 for the first mapping, * 1 for the second mapping, etc. *) order : int; } (* A memory map. *) type ('ws,'e,'hm) t = { (* List of mappings, kept in reverse order they were added (new * mappings are added at the head of this list). *) mappings : mapping list; (* Segment tree for fast access to a mapping at a particular address. * This is rebuilt each time a new mapping is added. * NB! If mappings = [], ignore contents of this field. (This is * enforced by the 'hm phantom type). *) tree : (interval * mapping list, interval * mapping list) binary_tree; (* Word size, endianness. * Phantom types enforce that these are set before being used. *) wordsize : wordsize; endian : Bitstring.endian; } let create () = { mappings = []; tree = Leaf ((0L,0L),[]); wordsize = W32; endian = Bitstring.LittleEndian; } let set_wordsize t ws = { t with wordsize = ws } let set_endian t e = { t with endian = e } let get_wordsize t = t.wordsize let get_endian t = t.endian (* Build the segment tree from the list of mappings. This code * is taken from virt-df. For an explanation of the process see: * http://en.wikipedia.org/wiki/Segment_tree *) let tree_of_mappings mappings = (* Construct the list of distinct endpoints. *) let eps = List.map (fun { start = start; size = size } -> [start; start +^ size]) mappings in let eps = sort_uniq (List.concat eps) in (* Construct the elementary intervals. *) let elints = let elints, lastpoint = List.fold_left ( fun (elints, prevpoint) point -> ((point, point) :: (prevpoint, point) :: elints), point ) ([], 0L) eps in let elints = (lastpoint, Int64.max_int(*XXX*)) :: elints in List.rev elints in if debug then ( eprintf "elementary intervals (%d in total):\n" (List.length elints); List.iter ( fun (startpoint, endpoint) -> eprintf " %Lx %Lx\n" startpoint endpoint ) elints ); (* Construct the binary tree of elementary intervals. *) let tree = (* Each elementary interval becomes a leaf. *) let elints = List.map (fun elint -> Leaf elint) elints in (* Recursively build this into a binary tree. *) let rec make_layer = function | [] -> [] | ([_] as x) -> x (* Turn pairs of leaves at the bottom level into nodes. *) | (Leaf _ as a) :: (Leaf _ as b) :: xs -> let xs = make_layer xs in Node (a, (), b) :: xs (* Turn pairs of nodes at higher levels into nodes. *) | (Node _ as left) :: ((Node _|Leaf _) as right) :: xs -> let xs = make_layer xs in Node (left, (), right) :: xs | Leaf _ :: _ -> assert false (* never happens??? (I think) *) in let rec loop = function | [] -> assert false | [x] -> x | xs -> loop (make_layer xs) in loop elints in if debug then ( let leaf_printer (startpoint, endpoint) = sprintf "%Lx-%Lx" startpoint endpoint in let node_printer () = "" in print_binary_tree leaf_printer node_printer tree ); (* Insert the mappings into the tree one by one. *) let tree = (* For each node/leaf in the tree, add its interval and an * empty list which will be used to store the mappings. *) let rec interval_tree = function | Leaf elint -> Leaf (elint, []) | Node (left, (), right) -> let left = interval_tree left in let right = interval_tree right in let (leftstart, _) = interval_of_node left in let (_, rightend) = interval_of_node right in let interval = leftstart, rightend in Node (left, (interval, []), right) and interval_of_node = function | Leaf (elint, _) -> elint | Node (_, (interval, _), _) -> interval in let tree = interval_tree tree in (* This should always be true: *) assert (interval_of_node tree = (0L, Int64.max_int(*XXX*))); (* "Contained in" operator. * 'a <-< b' iff 'a' is a subinterval of 'b'. * |<---- a ---->| * |<----------- b ----------->| *) let (<-<) (a1, a2) (b1, b2) = b1 <= a1 && a2 <= b2 in (* "Intersects" operator. * 'a /\ b' iff intervals 'a' and 'b' overlap, eg: * |<---- a ---->| * |<----------- b ----------->| *) let ( /\ ) (a1, a2) (b1, b2) = a2 > b1 || b2 > a1 in let rec insert_mapping tree mapping = let { start = start; size = size } = mapping in let seginterval = start, start +^ size in match tree with (* Test if we should insert into this leaf or node: *) | Leaf (interval, mappings) when interval <-< seginterval -> Leaf (interval, mapping :: mappings) | Node (left, (interval, mappings), right) when interval <-< seginterval -> Node (left, (interval, mapping :: mappings), right) | (Leaf _) as leaf -> leaf (* Else, should we insert into left or right subtrees? *) | Node (left, i, right) -> let left = if seginterval /\ interval_of_node left then insert_mapping left mapping else left in let right = if seginterval /\ interval_of_node right then insert_mapping right mapping else right in Node (left, i, right) in let tree = List.fold_left insert_mapping tree mappings in tree in if debug then ( let printer ((sp, ep), mappings) = sprintf "[%Lx-%Lx] " sp ep ^ String.concat ";" (List.map (fun { start = start; size = size } -> sprintf "%Lx+%Lx" start size) mappings) in print_binary_tree printer printer tree ); tree let add_mapping ({ mappings = mappings } as t) start size arr = let order = List.length mappings in let mapping = { start = start; size = size; arr = arr; order = order } in let mappings = mapping :: mappings in let tree = tree_of_mappings mappings in { t with mappings = mappings; tree = tree } let add_file t fd addr = let size = (fstat fd).st_size in (* mmap(2) the file using Bigarray module. *) let arr = Array1.map_file fd char c_layout false size in (* Create the mapping entry. *) add_mapping t addr (Int64.of_int size) arr let add_string ({ mappings = mappings } as t) str addr = let size = String.length str in (* Copy the string data to a Bigarray. *) let arr = Array1.create char c_layout size in for i = 0 to size-1 do Array1.set arr i (String.unsafe_get str i) done; (* Create the mapping entry. *) add_mapping t addr (Int64.of_int size) arr let of_file fd addr = let t = create () in add_file t fd addr let of_string str addr = let t = create () in add_string t str addr (* Look up an address and get the top-most mapping which contains it. * This uses the segment tree, so it's fast. The top-most mapping is * the one with the highest 'order' field. * * Warning: This 'hot' code was carefully optimized based on * feedback from 'gprof'. Avoid fiddling with it. *) let rec get_mapping addr = function | Leaf (_, []) -> None | Leaf (_, [mapping]) -> Some mapping | Leaf (_, mappings) -> Some (find_highest_order mappings) (* Try to avoid expensive search if node mappings is empty: *) | Node ((Leaf ((_, leftend), _) | Node (_, ((_, leftend), _), _) as left), (_, []), right) -> let submapping = if addr < leftend then get_mapping addr left else get_mapping addr right in submapping (* ... or a singleton: *) | Node ((Leaf ((_, leftend), _) | Node (_, ((_, leftend), _), _) as left), (_, [mapping]), right) -> let submapping = if addr < leftend then get_mapping addr left else get_mapping addr right in (match submapping with | None -> Some mapping | Some submapping -> Some (if mapping.order > submapping.order then mapping else submapping) ) (* Normal recursive case: *) | Node ((Leaf ((_, leftend), _) | Node (_, ((_, leftend), _), _) as left), (_, mappings), right) -> let submapping = if addr < leftend then get_mapping addr left else get_mapping addr right in (match submapping with | None -> Some (find_highest_order mappings) | Some submapping -> Some (find_highest_order (submapping :: mappings)) ) and find_highest_order mappings = List.fold_left ( fun mapping1 mapping2 -> if mapping1.order > mapping2.order then mapping1 else mapping2 ) (List.hd mappings) (List.tl mappings) (* Get a single byte. *) let get_byte { tree = tree } addr = (* Get the mapping which applies to this address: *) match get_mapping addr tree with | Some { start = start; size = size; arr = arr } -> let offset = Int64.to_int (addr -^ start) in Char.code (Array1.get arr offset) | None -> invalid_arg "get_byte" (* let rec loop = function | [] -> invalid_arg "get_byte" | { start = start; size = size; arr = arr } :: _ when start <= addr && addr < start +^ size -> let offset = Int64.to_int (addr -^ start) in Char.code (Array1.get arr offset) | _ :: ms -> loop ms in loop mappings *) (* (* Find in mappings and return first predicate match. *) let _find_map { mappings = mappings } pred = let rec loop = function | [] -> None | m :: ms -> match pred m with | Some n -> Some n | None -> loop ms in loop mappings (* The following functions are actually written in C * because memmem(3) is likely to be much faster than anything * we could write in OCaml. * * Also OCaml bigarrays are specifically designed to be accessed * easily from C: * http://caml.inria.fr/pub/docs/manual-ocaml/manual043.html *) (* (* Array+offset = string? *) let string_at arr offset str strlen = let j = ref offset in let rec loop i = if i >= strlen then true else if Array1.get arr !j <> str.[i] then false else ( incr j; loop (i+1) ) in loop 0 (* Find in a single file mapping. * [start] is relative to the mapping and we return an offset relative * to the mapping. *) let _find_in start align str arr = let strlen = String.length str in if strlen > 0 then ( let j = ref start in let e = Array1.dim arr - strlen in let rec loop () = if !j <= e then ( if string_at arr !j str strlen then Some !j else ( j := !j + align; loop () ) ) else None in loop () ) else Some start *) external _find_in : int -> int -> string -> (char,int8_unsigned_elt,c_layout) Array1.t -> int option = "virt_mem_mmap_find_in" (* Generic find function. *) let _find t start align str = _find_map t ( fun { start = mstart; size = msize; arr = arr } -> if mstart >= start then ( (* Check this mapping from the beginning. *) match _find_in 0 align str arr with | Some offset -> Some (mstart +^ Int64.of_int offset) | None -> None ) else if mstart < start && start <= mstart+^msize then ( (* Check this mapping from somewhere in the middle. *) let offset = Int64.to_int (start -^ mstart) in match _find_in offset align str arr with | Some offset -> Some (mstart +^ Int64.of_int offset) | None -> None ) else None ) let find t ?(start=0L) str = _find t start 1 str let find_align t ?(start=0L) str = let align = bytes_of_wordsize (get_wordsize t) in _find t start align str let rec _find_all t start align str = match _find t start align str with | None -> [] | Some offset -> offset :: _find_all t (offset +^ Int64.of_int align) align str let find_all t ?(start=0L) str = _find_all t start 1 str let find_all_align t ?(start=0L) str = let align = bytes_of_wordsize (get_wordsize t) in _find_all t start align str (* NB: Phantom types in the interface ensure that these pointer functions * can only be called once endianness and wordsize have both been set. *) let rec find_pointer t ?start addr = find_align t ?start (string_of_addr t addr) and find_pointer_all t ?start addr = find_all_align t ?start (string_of_addr t addr) (* and string_of_addr t addr = let bits = bits_of_wordsize (get_wordsize t) in let e = get_endian t in let bs = BITSTRING { addr : bits : endian (e) } in Bitstring.string_of_bitstring bs *) (* XXX bitstring is missing 'construct_int64_le_unsigned' so we * have to force this to 32 bits for the moment. *) and string_of_addr t addr = let bits = bits_of_wordsize (get_wordsize t) in assert (bits = 32); let e = get_endian t in let bs = BITSTRING { Int64.to_int32 addr : 32 : endian (e) } in Bitstring.string_of_bitstring bs and addr_of_string t str = let bits = bits_of_wordsize (get_wordsize t) in let e = get_endian t in let bs = Bitstring.bitstring_of_string str in bitmatch bs with | { addr : bits : endian (e) } -> addr | { _ } -> invalid_arg "addr_of_string" (* Take bytes until a condition is not met. This is efficient in that * we stay within the same mapping as long as we can. *) let dowhile { mappings = mappings } addr cond = let rec get_next_mapping addr = function | [] -> invalid_arg "dowhile" | { start = start; size = size; arr = arr } :: _ when start <= addr && addr < start +^ size -> let offset = Int64.to_int (addr -^ start) in let len = Int64.to_int size - offset in arr, offset, len | _ :: ms -> get_next_mapping addr ms in let rec loop addr = let arr, offset, len = get_next_mapping addr mappings in let rec loop2 i = if i < len then ( let c = Array1.get arr (offset+i) in if cond c then loop2 (i+1) ) else loop (addr +^ Int64.of_int len) in loop2 0 in loop addr let get_bytes t addr len = let str = String.create len in let i = ref 0 in try dowhile t addr ( fun c -> str.[!i] <- c; incr i; !i < len ); str with Invalid_argument _ -> invalid_arg "get_bytes" let get_int32 t addr = let e = get_endian t in let str = get_bytes t addr 4 in let bs = Bitstring.bitstring_of_string str in bitmatch bs with | { addr : 32 : endian (e) } -> addr | { _ } -> invalid_arg "follow_pointer" let get_int64 t addr = let e = get_endian t in let str = get_bytes t addr 8 in let bs = Bitstring.bitstring_of_string str in bitmatch bs with | { addr : 64 : endian (e) } -> addr | { _ } -> invalid_arg "follow_pointer" let get_C_int = get_int32 let get_C_long t addr = let ws = get_wordsize t in match ws with | W32 -> Int64.of_int32 (get_int32 t addr) | W64 -> get_int64 t addr let get_string t addr = let chars = ref [] in try dowhile t addr ( fun c -> if c <> '\000' then ( chars := c :: !chars; true ) else false ); let chars = List.rev !chars in let len = List.length chars in let str = String.create len in let i = ref 0 in List.iter (fun c -> str.[!i] <- c; incr i) chars; str with Invalid_argument _ -> invalid_arg "get_string" let is_string t addr = try dowhile t addr (fun c -> c <> '\000'); true with Invalid_argument _ -> false let is_C_identifier t addr = let i = ref 0 in let r = ref true in try dowhile t addr ( fun c -> let b = if !i = 0 then ( c = '_' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' ) else ( if c = '\000' then false else ( if c = '_' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c >= '0' && c <= '9' then true else ( r := false; false ) ) ) in incr i; b ); !r with Invalid_argument _ -> false let is_mapped { mappings = mappings } addr = let rec loop = function | [] -> false | { start = start; size = size; arr = arr } :: _ when start <= addr && addr < start +^ size -> true | _ :: ms -> loop ms in loop mappings let follow_pointer t addr = let ws = get_wordsize t in let e = get_endian t in let bits = bits_of_wordsize ws in let str = get_bytes t addr (bytes_of_wordsize ws) in let bs = Bitstring.bitstring_of_string str in bitmatch bs with | { addr : bits : endian (e) } -> addr | { _ } -> invalid_arg "follow_pointer" let succ_long t addr = let ws = get_wordsize t in addr +^ Int64.of_int (bytes_of_wordsize ws) let pred_long t addr = let ws = get_wordsize t in addr -^ Int64.of_int (bytes_of_wordsize ws) let align t addr = let ws = get_wordsize t in let mask = Int64.of_int (bytes_of_wordsize ws - 1) in (addr +^ mask) &^ (Int64.lognot mask) let map { mappings = mappings } f = List.map (fun { start = start; size = size } -> f start size) mappings let iter t f = ignore (map t (fun start size -> let () = f start size in ())) let nr_mappings { mappings = mappings } = List.length mappings *)