(* Memory info command for virtual domains.
(C) Copyright 2008 Richard W.M. Jones, Red Hat Inc.
http://libvirt.org/
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
Functions for making a memory map of a virtual machine from
various sources. The memory map will most certainly have holes.
*)
open Unix
open Printf
open Bigarray
open Virt_mem_utils
let debug = false
(* An address. *)
type addr = int64
(* A range of addresses (start and start+size). *)
type interval = addr * addr
(* A mapping. *)
type mapping = {
start : addr;
size : addr;
(* Bigarray mmap(2)'d region with byte addressing: *)
arr : (char,int8_unsigned_elt,c_layout) Array1.t;
(* The order that the mappings were added, 0 for the first mapping,
* 1 for the second mapping, etc.
*)
order : int;
}
(* A memory map. *)
type ('ws,'e,'hm) t = {
(* List of mappings, kept in reverse order they were added (new
* mappings are added at the head of this list).
*)
mappings : mapping list;
(* Segment tree for fast access to a mapping at a particular address.
* This is rebuilt each time a new mapping is added.
* NB! If mappings = [], ignore contents of this field. (This is
* enforced by the 'hm phantom type).
*)
tree : (interval * mapping option, interval * mapping option) binary_tree;
(* Word size, endianness.
* Phantom types enforce that these are set before being used.
*)
wordsize : wordsize;
endian : Bitstring.endian;
}
let create () = {
mappings = [];
tree = Leaf ((0L,0L),None);
wordsize = W32;
endian = Bitstring.LittleEndian;
}
let set_wordsize t ws = { t with wordsize = ws }
let set_endian t e = { t with endian = e }
let get_wordsize t = t.wordsize
let get_endian t = t.endian
(* Build the segment tree from the list of mappings. This code
* is taken from virt-df. For an explanation of the process see:
* http://en.wikipedia.org/wiki/Segment_tree
*
* See also the 'get_mapping' function below which uses this tree
* to do fast lookups.
*)
let tree_of_mappings mappings =
(* Construct the list of distinct endpoints. *)
let eps =
List.map
(fun { start = start; size = size } -> [start; start +^ size])
mappings in
let eps = sort_uniq (List.concat eps) in
(* Construct the elementary intervals. *)
let elints =
let elints, lastpoint =
List.fold_left (
fun (elints, prevpoint) point ->
((point, point) :: (prevpoint, point) :: elints), point
) ([], 0L) eps in
let elints = (lastpoint, Int64.max_int(*XXX*)) :: elints in
List.rev elints in
if debug then (
eprintf "elementary intervals (%d in total):\n" (List.length elints);
List.iter (
fun (startpoint, endpoint) ->
eprintf " %Lx %Lx\n" startpoint endpoint
) elints
);
(* Construct the binary tree of elementary intervals. *)
let tree =
(* Each elementary interval becomes a leaf. *)
let elints = List.map (fun elint -> Leaf elint) elints in
(* Recursively build this into a binary tree. *)
let rec make_layer = function
| [] -> []
| ([_] as x) -> x
(* Turn pairs of leaves at the bottom level into nodes. *)
| (Leaf _ as a) :: (Leaf _ as b) :: xs ->
let xs = make_layer xs in
Node (a, (), b) :: xs
(* Turn pairs of nodes at higher levels into nodes. *)
| (Node _ as left) :: ((Node _|Leaf _) as right) :: xs ->
let xs = make_layer xs in
Node (left, (), right) :: xs
| Leaf _ :: _ -> assert false (* never happens??? (I think) *)
in
let rec loop = function
| [] -> assert false
| [x] -> x
| xs -> loop (make_layer xs)
in
loop elints in
if debug then (
let leaf_printer (startpoint, endpoint) =
sprintf "%Lx-%Lx" startpoint endpoint
in
let node_printer () = "" in
print_binary_tree leaf_printer node_printer tree
);
(* Insert the mappings into the tree one by one. *)
let tree =
(* For each node/leaf in the tree, add its interval and an
* empty list which will be used to store the mappings.
*)
let rec interval_tree = function
| Leaf elint -> Leaf (elint, None)
| Node (left, (), right) ->
let left = interval_tree left in
let right = interval_tree right in
let (leftstart, _) = interval_of_node left in
let (_, rightend) = interval_of_node right in
let interval = leftstart, rightend in
Node (left, (interval, None), right)
and interval_of_node = function
| Leaf (elint, _) -> elint
| Node (_, (interval, _), _) -> interval
in
let tree = interval_tree tree in
(* This should always be true: *)
assert (interval_of_node tree = (0L, Int64.max_int(*XXX*)));
(* "Contained in" operator.
* 'a <-< b' iff 'a' is a subinterval of 'b'.
* |<---- a ---->|
* |<----------- b ----------->|
*)
let (<-<) (a1, a2) (b1, b2) = b1 <= a1 && a2 <= b2 in
(* "Intersects" operator.
* 'a /\ b' iff intervals 'a' and 'b' overlap, eg:
* |<---- a ---->|
* |<----------- b ----------->|
*)
let ( /\ ) (a1, a2) (b1, b2) = a2 > b1 || b2 > a1 in
let rec insert_mapping tree mapping =
let { start = start; size = size } = mapping in
let seginterval = start, start +^ size in
match tree with
(* Test if we should insert into this leaf or node: *)
| Leaf (interval, None) when interval <-< seginterval ->
Leaf (interval, Some mapping)
| Leaf (interval, Some oldmapping) when interval <-< seginterval ->
let mapping =
if oldmapping.order > mapping.order then oldmapping else mapping in
Leaf (interval, Some mapping)
| Node (left, (interval, None), right) when interval <-< seginterval ->
Node (left, (interval, Some mapping), right)
| Node (left, (interval, Some oldmapping), right)
when interval <-< seginterval ->
let mapping =
if oldmapping.order > mapping.order then oldmapping else mapping in
Node (left, (interval, Some mapping), right)
| (Leaf _) as leaf -> leaf
(* Else, should we insert into left or right subtrees? *)
| Node (left, i, right) ->
let left =
if seginterval /\ interval_of_node left then
insert_mapping left mapping
else
left in
let right =
if seginterval /\ interval_of_node right then
insert_mapping right mapping
else
right in
Node (left, i, right)
in
let tree = List.fold_left insert_mapping tree mappings in
tree in
if debug then (
let printer ((sp, ep), mapping) =
sprintf "[%Lx-%Lx] " sp ep ^
match mapping with
| None -> "(none)"
| Some { start = start; size = size; order = order } ->
sprintf "%Lx..%Lx(%d)" start (start+^size-^1L) order
in
print_binary_tree printer printer tree
);
tree
let add_mapping ({ mappings = mappings } as t) start size arr =
let order = List.length mappings in
let mapping = { start = start; size = size; arr = arr; order = order } in
let mappings = mapping :: mappings in
let tree = tree_of_mappings mappings in
{ t with mappings = mappings; tree = tree }
let add_file t fd addr =
let size = (fstat fd).st_size in
(* mmap(2) the file using Bigarray module. *)
let arr = Array1.map_file fd char c_layout false size in
(* Create the mapping entry. *)
add_mapping t addr (Int64.of_int size) arr
let add_string ({ mappings = mappings } as t) str addr =
let size = String.length str in
(* Copy the string data to a Bigarray. *)
let arr = Array1.create char c_layout size in
for i = 0 to size-1 do
Array1.set arr i (String.unsafe_get str i)
done;
(* Create the mapping entry. *)
add_mapping t addr (Int64.of_int size) arr
let of_file fd addr =
let t = create () in
add_file t fd addr
let of_string str addr =
let t = create () in
add_string t str addr
(* 'get_mapping' is the crucial, fast lookup function for address -> mapping.
* It searches the tree (hence fast) to work out the topmost mapping which
* applies to an address.
*
* Returns (rightend * mapping option)
* where 'mapping option' is the mapping (or None if it's a hole)
* and 'rightend' is the next address at which there is a different
* mapping/hole. In other words, this mapping result is good for
* addresses [addr .. rightend-1].
*)
let rec get_mapping addr = function
| Leaf ((_, rightend), mapping) -> rightend, mapping
| Node ((Leaf ((_, leftend), _) | Node (_, ((_, leftend), _), _) as left),
(_, None),
right) ->
let subrightend, submapping =
if addr < leftend then get_mapping addr left
else get_mapping addr right in
subrightend, submapping
| Node ((Leaf ((_, leftend), _) | Node (_, ((_, leftend), _), _) as left),
(_, Some mapping),
right) ->
let subrightend, submapping =
if addr < leftend then get_mapping addr left
else get_mapping addr right in
(match submapping with
| None -> subrightend, Some mapping
| Some submapping ->
subrightend,
Some (if mapping.order > submapping.order then mapping
else submapping)
)
(* Use the tree to quickly check if an address is mapped (returns false
* if it's a hole).
*)
let is_mapped { mappings = mappings; tree = tree } addr =
(* NB: No [`HasMapping] in the type so we have to check mappings <> []. *)
match mappings with
| [] -> false
| _ ->
let _, mapping = get_mapping addr tree in
mapping <> None
(* Get a single byte. *)
let get_byte { tree = tree } addr =
(* Get the mapping which applies to this address: *)
let _, mapping = get_mapping addr tree in
match mapping with
| Some { start = start; size = size; arr = arr } ->
let offset = Int64.to_int (addr -^ start) in
Char.code (Array1.get arr offset)
| None ->
invalid_arg "get_byte"
(* Get a range of bytes, possibly across several intervals. *)
let get_bytes { tree = tree } addr len =
let str = String.create len in
let rec loop addr pos len =
if len > 0 then (
let rightend, mapping = get_mapping addr tree in
match mapping with
| Some { start = start; size = size; arr = arr } ->
(* Offset within this mapping. *)
let offset = Int64.to_int (addr -^ start) in
(* Number of bytes to read before we either get to the end
* of our 'len' or we fall off the end of this interval.
*)
let n = min len (Int64.to_int (rightend -^ addr)) in
for i = 0 to n-1 do
String.unsafe_set str (pos + i) (Array1.get arr (offset + i))
done;
let len = len - n in
loop (addr +^ Int64.of_int n) (pos + n) len
| None ->
invalid_arg "get_bytes"
)
in
loop addr 0 len;
str
let get_int32 t addr =
let e = get_endian t in
let str = get_bytes t addr 4 in
let bs = Bitstring.bitstring_of_string str in
bitmatch bs with
| { addr : 32 : endian (e) } -> addr
| { _ } -> invalid_arg "get_int32"
let get_int64 t addr =
let e = get_endian t in
let str = get_bytes t addr 8 in
let bs = Bitstring.bitstring_of_string str in
bitmatch bs with
| { addr : 64 : endian (e) } -> addr
| { _ } -> invalid_arg "get_int64"
let get_C_int = get_int32
let get_C_long t addr =
let ws = get_wordsize t in
match ws with
| W32 -> Int64.of_int32 (get_int32 t addr)
| W64 -> get_int64 t addr
(* Take bytes until a condition is not met. This is efficient
* in that we stay within the same mapping as long as we can.
*
* If we hit a hole, raises Invalid_argument "dowhile".
*)
let dowhile { tree = tree } addr cond =
let rec loop addr =
let rightend, mapping = get_mapping addr tree in
match mapping with
| Some { start = start; size = size; arr = arr } ->
(* Offset within this mapping. *)
let offset = Int64.to_int (addr -^ start) in
(* Number of bytes before we fall off the end of this interval. *)
let n = Int64.to_int (rightend -^ addr) in
let rec loop2 addr offset n =
if n > 0 then (
let c = Array1.get arr offset in
if cond addr c then
loop2 (addr +^ 1L) (offset + 1) (n - 1)
else
false (* stop now, finish outer loop too *)
)
else true (* fell off the end, so continue outer loop *)
in
if loop2 addr offset n then
loop (addr +^ Int64.of_int n)
| None ->
invalid_arg "dowhile"
in
loop addr
let is_mapped_range ({ mappings = mappings } as t) addr size =
match mappings with
(* NB: No [`HasMapping] in the type so we have to check mappings <> []. *)
| [] -> false
| _ ->
(* Quick and dirty. It's possible to make a much faster
* implementation of this which doesn't call the closure for every
* byte.
*)
let size = ref size in
try dowhile t addr (fun _ _ -> decr size; !size > 0); true
with Invalid_argument "dowhile" -> false
(* Get a string, ending at ASCII NUL character. *)
let get_string t addr =
let chars = ref [] in
try
dowhile t addr (
fun _ c ->
if c <> '\000' then (
chars := c :: !chars;
true
) else false
);
let chars = List.rev !chars in
let len = List.length chars in
let str = String.create len in
let i = ref 0 in
List.iter (fun c -> String.unsafe_set str !i c; incr i) chars;
str
with
Invalid_argument _ -> invalid_arg "get_string"
let is_string t addr =
try dowhile t addr (fun _ c -> c <> '\000'); true
with Invalid_argument _ -> false
let is_C_identifier t addr =
let i = ref 0 in
let r = ref true in
try
dowhile t addr (
fun _ c ->
let b =
if !i = 0 then (
c = '_' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z'
) else (
if c = '\000' then false
else (
if c = '_' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' ||
c >= '0' && c <= '9' then
true
else (
r := false;
false
)
)
) in
incr i;
b
);
!r
with
Invalid_argument _ -> false
(* The following functions are actually written in C
* because memmem(3) is likely to be much faster than anything
* we could write in OCaml.
*
* Also OCaml bigarrays are specifically designed to be accessed
* easily from C:
* http://caml.inria.fr/pub/docs/manual-ocaml/manual043.html
*)
(*
(* Array+offset = string? *)
let string_at arr offset str strlen =
let j = ref offset in
let rec loop i =
if i >= strlen then true
else
if Array1.get arr !j <> str.[i] then false
else (
incr j;
loop (i+1)
)
in
loop 0
(* Find in a single file mapping.
* [start] is relative to the mapping and we return an offset relative
* to the mapping.
*)
let _find_in start align str arr =
let strlen = String.length str in
if strlen > 0 then (
let j = ref start in
let e = Array1.dim arr - strlen in
let rec loop () =
if !j <= e then (
if string_at arr !j str strlen then Some !j
else (
j := !j + align;
loop ()
)
)
else None
in
loop ()
)
else Some start
*)
external _find_in :
int -> int -> string -> (char,int8_unsigned_elt,c_layout) Array1.t ->
int option = "virt_mem_mmap_find_in"
(* Generic find function. *)
let _find { tree = tree } start align str =
let rec loop addr =
let rightend, mapping = get_mapping addr tree in
match mapping with
| Some { start = start; size = size; arr = arr } ->
(* Offset within this mapping. *)
let offset = Int64.to_int (addr -^ start) in
(match _find_in offset align str arr with
| None -> None
| Some offset -> Some (start +^ Int64.of_int offset)
)
| None ->
(* Find functions all silently skip holes, so: *)
loop rightend
in
loop start
let find t ?(start=0L) str =
_find t start 1 str
let find_align t ?(start=0L) str =
let align = bytes_of_wordsize (get_wordsize t) in
_find t start align str
let rec _find_all t start align str =
match _find t start align str with
| None -> []
| Some offset ->
offset :: _find_all t (offset +^ Int64.of_int align) align str
let find_all t ?(start=0L) str =
_find_all t start 1 str
let find_all_align t ?(start=0L) str =
let align = bytes_of_wordsize (get_wordsize t) in
_find_all t start align str
(* NB: Phantom types in the interface ensure that these pointer functions
* can only be called once endianness and wordsize have both been set.
*)
let rec find_pointer t ?start addr =
find_align t ?start (string_of_addr t addr)
and find_pointer_all t ?start addr =
find_all_align t ?start (string_of_addr t addr)
(*
and string_of_addr t addr =
let bits = bits_of_wordsize (get_wordsize t) in
let e = get_endian t in
let bs = BITSTRING { addr : bits : endian (e) } in
Bitstring.string_of_bitstring bs
*)
(* XXX bitstring is missing 'construct_int64_le_unsigned' so we
* have to force this to 32 bits for the moment.
*)
and string_of_addr t addr =
let bits = bits_of_wordsize (get_wordsize t) in
assert (bits = 32);
let e = get_endian t in
let bs = BITSTRING { Int64.to_int32 addr : 32 : endian (e) } in
Bitstring.string_of_bitstring bs
let follow_pointer t addr =
let ws = get_wordsize t in
let e = get_endian t in
let bits = bits_of_wordsize ws in
let str = get_bytes t addr (bytes_of_wordsize ws) in
let bs = Bitstring.bitstring_of_string str in
bitmatch bs with
| { addr : bits : endian (e) } -> addr
| { _ } -> invalid_arg "follow_pointer"
let succ_long t addr =
let ws = get_wordsize t in
addr +^ Int64.of_int (bytes_of_wordsize ws)
let pred_long t addr =
let ws = get_wordsize t in
addr -^ Int64.of_int (bytes_of_wordsize ws)
let align t addr =
let ws = get_wordsize t in
let mask = Int64.of_int (bytes_of_wordsize ws - 1) in
(addr +^ mask) &^ (Int64.lognot mask)