From c67e17c4ca67b1a53d16394b8f3218665c19642c Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Thu, 1 Jan 1970 00:00:00 +0000 Subject: [PATCH] Revised Virt_mem_mmap handling overlapping mappings efficiently. --- lib/.depend | 6 +- lib/Makefile.in | 5 + lib/test_mmap.ml | 26 ++++ lib/virt_mem.mli | 4 +- lib/virt_mem_mmap.ml | 338 ++++++++++++++++++++++++++++++++++++++++++-------- lib/virt_mem_mmap.mli | 221 ++++++++++++++++++++++++--------- lib/virt_mem_utils.ml | 52 ++++++++ 7 files changed, 538 insertions(+), 114 deletions(-) create mode 100644 lib/test_mmap.ml diff --git a/lib/.depend b/lib/.depend index 552adb7..b5a858d 100644 --- a/lib/.depend +++ b/lib/.depend @@ -1,7 +1,9 @@ virt_mem.cmi: virt_mem_utils.cmo virt_mem_mmap.cmi virt_mem_mmap.cmi: virt_mem_utils.cmo -virt_mem_capture.cmo: virt_mem_gettext.cmo virt_mem.cmi -virt_mem_capture.cmx: virt_mem_gettext.cmx virt_mem.cmx +test_mmap.cmo: virt_mem_mmap.cmi +test_mmap.cmx: virt_mem_mmap.cmx +virt_mem_capture.cmo: virt_mem_mmap.cmi virt_mem_gettext.cmo virt_mem.cmi +virt_mem_capture.cmx: virt_mem_mmap.cmx virt_mem_gettext.cmx virt_mem.cmx virt_mem.cmo: virt_mem_version.cmo virt_mem_utils.cmo virt_mem_mmap.cmi \ virt_mem_gettext.cmo virt_mem.cmi virt_mem.cmx: virt_mem_version.cmx virt_mem_utils.cmx virt_mem_mmap.cmx \ diff --git a/lib/Makefile.in b/lib/Makefile.in index 21ccfde..20036af 100644 --- a/lib/Makefile.in +++ b/lib/Makefile.in @@ -64,6 +64,11 @@ virt_mem.cma: $(OBJS) virt_mem.cmxa: $(XOBJS) ocamlmklib -o virt_mem $^ +# Just for testing Virt_mem_mmap module: +test_mmap: virt_mem_utils.cmx virt_mem_mmap_c.o virt_mem_mmap.cmx test_mmap.cmx + ocamlfind ocamlopt \ + $(OCAMLOPTFLAGS) $(OCAMLOPTPACKAGES) $(OCAMLOPTLIBS) -o $@ $^ + install: include ../Make.rules \ No newline at end of file diff --git a/lib/test_mmap.ml b/lib/test_mmap.ml new file mode 100644 index 0000000..dae44da --- /dev/null +++ b/lib/test_mmap.ml @@ -0,0 +1,26 @@ +(* Test program for Virt_mem_mmap module. Not for general consumption. *) + +open Printf +open Virt_mem_mmap + +let () = + let mem = create () in + let data = String.make 0x1000 '\001' in + let mem = add_string mem data 0x800L in + let data = String.make 0x1000 '\002' in + let mem = add_string mem data 0x1000L in + let data = String.make 0x1800 '\003' in + let mem = add_string mem data 0L in + + List.iter ( + fun addr -> + try + printf "byte @ %Lx = %d\n" addr (get_byte mem addr) + with Invalid_argument "get_byte" -> + printf "byte @ %Lx = HOLE\n" addr + ) [ 0L; 0x1L; + 0x7ffL; 0x800L; 0x801L; + 0xfffL; 0x1000L; 0x1001L; + 0x17ffL; 0x1800L; 0x1801L; + 0x1fffL; 0x2000L; 0x2001L ] + diff --git a/lib/virt_mem.mli b/lib/virt_mem.mli index 58b879e..2f6ba86 100644 --- a/lib/virt_mem.mli +++ b/lib/virt_mem.mli @@ -24,14 +24,14 @@ type image = int option * string * Virt_mem_utils.architecture - * ([`Wordsize], [`Endian]) Virt_mem_mmap.t + * ([`Wordsize], [`Endian], [`HasMapping]) Virt_mem_mmap.t (** A memory image from a domain. *) type image_with_ksyms = int option * string * Virt_mem_utils.architecture - * ([`Wordsize], [`Endian]) Virt_mem_mmap.t + * ([`Wordsize], [`Endian], [`HasMapping]) Virt_mem_mmap.t * (ksym -> Virt_mem_mmap.addr) (** An image after it has been processed to find kernel symbols. diff --git a/lib/virt_mem_mmap.ml b/lib/virt_mem_mmap.ml index 3b84a26..b9013de 100644 --- a/lib/virt_mem_mmap.ml +++ b/lib/virt_mem_mmap.ml @@ -21,82 +21,320 @@ *) open Unix +open Printf open Bigarray open Virt_mem_utils -(* Simple implementation at the moment: Store a list of mappings, - * sorted by start address. We assume that mappings do not overlap. - * We can change the implementation later if we need to. In most cases - * there will only be a small number of mappings (probably 1). - *) -type ('a,'b) t = { - mappings : mapping list; - wordsize : wordsize option; - endian : Bitstring.endian option; -} -and mapping = { +let debug = true + +(* An address. *) +type addr = int64 + +(* A range of addresses (start and start+size). *) +type interval = addr * addr + +(* A mapping. *) +type mapping = { start : addr; size : addr; (* Bigarray mmap(2)'d region with byte addressing: *) arr : (char,int8_unsigned_elt,c_layout) Array1.t; + (* The order that the mappings were added, 0 for the first mapping, + * 1 for the second mapping, etc. + *) + order : int; } -and addr = int64 +(* A memory map. *) +type ('ws,'e,'hm) t = { + (* List of mappings, kept in reverse order they were added (new + * mappings are added at the head of this list). + *) + mappings : mapping list; + + (* Segment tree for fast access to a mapping at a particular address. + * This is rebuilt each time a new mapping is added. + * NB! If mappings = [], ignore contents of this field. (This is + * enforced by the 'hm phantom type). + *) + tree : (interval * mapping list, interval * mapping list) binary_tree; + + (* Word size, endianness. + * Phantom types enforce that these are set before being used. + *) + wordsize : wordsize; + endian : Bitstring.endian; +} let create () = { mappings = []; - wordsize = None; - endian = None + tree = Leaf ((0L,0L),[]); + wordsize = W32; + endian = Bitstring.LittleEndian; } -let set_wordsize t ws = { t with wordsize = Some ws } +let set_wordsize t ws = { t with wordsize = ws } + +let set_endian t e = { t with endian = e } + +let get_wordsize t = t.wordsize + +let get_endian t = t.endian + +(* Build the segment tree from the list of mappings. This code + * is taken from virt-df. For an explanation of the process see: + * http://en.wikipedia.org/wiki/Segment_tree + *) +let tree_of_mappings mappings = + (* Construct the list of distinct endpoints. *) + let eps = + List.map + (fun { start = start; size = size } -> [start; start +^ size]) + mappings in + let eps = sort_uniq (List.concat eps) in + + (* Construct the elementary intervals. *) + let elints = + let elints, lastpoint = + List.fold_left ( + fun (elints, prevpoint) point -> + ((point, point) :: (prevpoint, point) :: elints), point + ) ([], 0L) eps in + let elints = (lastpoint, Int64.max_int(*XXX*)) :: elints in + List.rev elints in + + if debug then ( + eprintf "elementary intervals (%d in total):\n" (List.length elints); + List.iter ( + fun (startpoint, endpoint) -> + eprintf " %Lx %Lx\n" startpoint endpoint + ) elints + ); + + (* Construct the binary tree of elementary intervals. *) + let tree = + (* Each elementary interval becomes a leaf. *) + let elints = List.map (fun elint -> Leaf elint) elints in + (* Recursively build this into a binary tree. *) + let rec make_layer = function + | [] -> [] + | ([_] as x) -> x + (* Turn pairs of leaves at the bottom level into nodes. *) + | (Leaf _ as a) :: (Leaf _ as b) :: xs -> + let xs = make_layer xs in + Node (a, (), b) :: xs + (* Turn pairs of nodes at higher levels into nodes. *) + | (Node _ as left) :: ((Node _|Leaf _) as right) :: xs -> + let xs = make_layer xs in + Node (left, (), right) :: xs + | Leaf _ :: _ -> assert false (* never happens??? (I think) *) + in + let rec loop = function + | [] -> assert false + | [x] -> x + | xs -> loop (make_layer xs) + in + loop elints in -let set_endian t e = { t with endian = Some e } + if debug then ( + let leaf_printer (startpoint, endpoint) = + sprintf "%Lx-%Lx" startpoint endpoint + in + let node_printer () = "" in + print_binary_tree leaf_printer node_printer tree + ); + + (* Insert the mappings into the tree one by one. *) + let tree = + (* For each node/leaf in the tree, add its interval and an + * empty list which will be used to store the mappings. + *) + let rec interval_tree = function + | Leaf elint -> Leaf (elint, []) + | Node (left, (), right) -> + let left = interval_tree left in + let right = interval_tree right in + let (leftstart, _) = interval_of_node left in + let (_, rightend) = interval_of_node right in + let interval = leftstart, rightend in + Node (left, (interval, []), right) + and interval_of_node = function + | Leaf (elint, _) -> elint + | Node (_, (interval, _), _) -> interval + in -let get_wordsize t = Option.get t.wordsize + let tree = interval_tree tree in + (* This should always be true: *) + assert (interval_of_node tree = (0L, Int64.max_int(*XXX*))); + + (* "Contained in" operator. + * 'a <-< b' iff 'a' is a subinterval of 'b'. + * |<---- a ---->| + * |<----------- b ----------->| + *) + let (<-<) (a1, a2) (b1, b2) = b1 <= a1 && a2 <= b2 in + + (* "Intersects" operator. + * 'a /\ b' iff intervals 'a' and 'b' overlap, eg: + * |<---- a ---->| + * |<----------- b ----------->| + *) + let ( /\ ) (a1, a2) (b1, b2) = a2 > b1 || b2 > a1 in + + let rec insert_mapping tree mapping = + let { start = start; size = size } = mapping in + let seginterval = start, start +^ size in + + match tree with + (* Test if we should insert into this leaf or node: *) + | Leaf (interval, mappings) when interval <-< seginterval -> + Leaf (interval, mapping :: mappings) + | Node (left, (interval, mappings), right) + when interval <-< seginterval -> + Node (left, (interval, mapping :: mappings), right) + + | (Leaf _) as leaf -> leaf + + (* Else, should we insert into left or right subtrees? *) + | Node (left, i, right) -> + let left = + if seginterval /\ interval_of_node left then + insert_mapping left mapping + else + left in + let right = + if seginterval /\ interval_of_node right then + insert_mapping right mapping + else + right in + Node (left, i, right) + in + let tree = List.fold_left insert_mapping tree mappings in + tree in + + if debug then ( + let printer ((sp, ep), mappings) = + sprintf "[%Lx-%Lx] " sp ep ^ + String.concat ";" + (List.map (fun { start = start; size = size } -> + sprintf "%Lx+%Lx" start size) + mappings) + in + print_binary_tree printer printer tree + ); -let get_endian t = Option.get t.endian + tree -let sort_mappings mappings = - let cmp { start = s1 } { start = s2 } = compare s1 s2 in - List.sort cmp mappings +let add_mapping ({ mappings = mappings } as t) start size arr = + let order = List.length mappings in + let mapping = { start = start; size = size; arr = arr; order = order } in + let mappings = mapping :: mappings in + let tree = tree_of_mappings mappings in + { t with mappings = mappings; tree = tree } -let add_file ({ mappings = mappings } as t) fd addr = - if addr &^ 7L <> 0L then - invalid_arg "add_file: mapping address must be aligned to 8 bytes"; +let add_file t fd addr = let size = (fstat fd).st_size in (* mmap(2) the file using Bigarray module. *) let arr = Array1.map_file fd char c_layout false size in - (* Create the mapping entry and keep the mappings sorted by start addr. *) - let mappings = - { start = addr; size = Int64.of_int size; arr = arr } :: mappings in - let mappings = sort_mappings mappings in - { t with mappings = mappings } - -let of_file fd addr = - let t = create () in - add_file t fd addr + (* Create the mapping entry. *) + add_mapping t addr (Int64.of_int size) arr let add_string ({ mappings = mappings } as t) str addr = - if addr &^ 7L <> 0L then - invalid_arg "add_file: mapping address must be aligned to 8 bytes"; let size = String.length str in (* Copy the string data to a Bigarray. *) let arr = Array1.create char c_layout size in for i = 0 to size-1 do Array1.set arr i (String.unsafe_get str i) done; - (* Create the mapping entry and keep the mappings sorted by start addr. *) - let mappings = - { start = addr; size = Int64.of_int size; arr = arr } :: mappings in - let mappings = sort_mappings mappings in - { t with mappings = mappings } + (* Create the mapping entry. *) + add_mapping t addr (Int64.of_int size) arr + +let of_file fd addr = + let t = create () in + add_file t fd addr let of_string str addr = let t = create () in add_string t str addr +(* Look up an address and get the top-most mapping which contains it. + * This uses the segment tree, so it's fast. The top-most mapping is + * the one with the highest 'order' field. + * + * Warning: This 'hot' code was carefully optimized based on + * feedback from 'gprof'. Avoid fiddling with it. + *) +let rec get_mapping addr = function + | Leaf (_, []) -> None + | Leaf (_, [mapping]) -> Some mapping + | Leaf (_, mappings) -> Some (find_highest_order mappings) + + (* Try to avoid expensive search if node mappings is empty: *) + | Node ((Leaf ((_, leftend), _) | Node (_, ((_, leftend), _), _) as left), + (_, []), + right) -> + let submapping = + if addr < leftend then get_mapping addr left + else get_mapping addr right in + submapping + + (* ... or a singleton: *) + | Node ((Leaf ((_, leftend), _) | Node (_, ((_, leftend), _), _) as left), + (_, [mapping]), + right) -> + let submapping = + if addr < leftend then get_mapping addr left + else get_mapping addr right in + (match submapping with + | None -> Some mapping + | Some submapping -> + Some (if mapping.order > submapping.order then mapping + else submapping) + ) + + (* Normal recursive case: *) + | Node ((Leaf ((_, leftend), _) | Node (_, ((_, leftend), _), _) as left), + (_, mappings), + right) -> + let submapping = + if addr < leftend then get_mapping addr left + else get_mapping addr right in + (match submapping with + | None -> Some (find_highest_order mappings) + | Some submapping -> Some (find_highest_order (submapping :: mappings)) + ) + +and find_highest_order mappings = + List.fold_left ( + fun mapping1 mapping2 -> + if mapping1.order > mapping2.order then mapping1 else mapping2 + ) (List.hd mappings) (List.tl mappings) + +(* Get a single byte. *) +let get_byte { tree = tree } addr = + (* Get the mapping which applies to this address: *) + match get_mapping addr tree with + | Some { start = start; size = size; arr = arr } -> + let offset = Int64.to_int (addr -^ start) in + Char.code (Array1.get arr offset) + | None -> + invalid_arg "get_byte" +(* + let rec loop = function + | [] -> invalid_arg "get_byte" + | { start = start; size = size; arr = arr } :: _ + when start <= addr && addr < start +^ size -> + let offset = Int64.to_int (addr -^ start) in + Char.code (Array1.get arr offset) + | _ :: ms -> loop ms + in + loop mappings +*) + + +(* + (* Find in mappings and return first predicate match. *) let _find_map { mappings = mappings } pred = let rec loop = function @@ -233,17 +471,6 @@ and addr_of_string t str = | { addr : bits : endian (e) } -> addr | { _ } -> invalid_arg "addr_of_string" -let get_byte { mappings = mappings } addr = - let rec loop = function - | [] -> invalid_arg "get_byte" - | { start = start; size = size; arr = arr } :: _ - when start <= addr && addr < start +^ size -> - let offset = Int64.to_int (addr -^ start) in - Char.code (Array1.get arr offset) - | _ :: ms -> loop ms - in - loop mappings - (* Take bytes until a condition is not met. This is efficient in that * we stay within the same mapping as long as we can. *) @@ -390,3 +617,12 @@ let align t addr = let ws = get_wordsize t in let mask = Int64.of_int (bytes_of_wordsize ws - 1) in (addr +^ mask) &^ (Int64.lognot mask) + +let map { mappings = mappings } f = + List.map (fun { start = start; size = size } -> f start size) mappings + +let iter t f = + ignore (map t (fun start size -> let () = f start size in ())) + +let nr_mappings { mappings = mappings } = List.length mappings +*) diff --git a/lib/virt_mem_mmap.mli b/lib/virt_mem_mmap.mli index 594f0cf..bdca7c5 100644 --- a/lib/virt_mem_mmap.mli +++ b/lib/virt_mem_mmap.mli @@ -1,3 +1,4 @@ +(** Memory map. *) (* Memory info command for virtual domains. (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. http://libvirt.org/ @@ -15,128 +16,230 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - Functions for making a memory map of a virtual machine from - various sources. The memory map will most certainly have holes. *) -type ('a,'b) t -(** Memory map. *) +(** {2 Memory maps} + + Memory maps represent the virtual memory of a virtual machine. + + We are mostly interested in the kernel memory and kernel data + structures. In Linux this stays at the same virtual memory + address whichever task is actually running (eg. on i386 machines, + the kernel is often found at virtual address 0xC0100000). Kernel + memory is spread out over several ranges of addresses, with gaps + of uninteresting or non-existant virtual addresses in between, and + this structure captures that. + + A memory map is a range of 64 bit addresses from [0] to [2^64-1]. + (Note that 64 bit addresses are used even for 32 bit virtual + machines - just ignore everything above 0xFFFFFFFF). + + A memory map consists of zero or more {b mappings} of data. A + mapping starts at some address and has some size, and the data for + a mapping can come from some source such as a file or OCaml + string. Use {!of_file}, {!of_string}, {!add_file}, {!add_string} + to create a memory map from mappings. + + {3 Overlapping mappings and holes} + + If mappings overlap, then the mapping which was added later + overrides/overwrites earlier mappings at any addresses which + coincide. + + Where there is no mapping for a particular address, the memory map + is said to have a hole. (Typically almost all of a memory map is + holes). In general, the searching functions such as {!find} skip + over holes, while the accessor functions such as {!get_bytes} + raise an error if you try to read a hole, but read the individual + function documentation. + + {3 Word size and endianness} + + Memory maps may (or may not) have an associated word size and + endianness for the whole map. These are used when we look at + integers and pointers in the memory. See {!get_endian}, + {!set_endian}, {!get_wordsize} and {!set_wordsize}, and accessor + functions such as {!get_int32} and {!follow_pointer}. + + {3 Efficiency} + + Mappings' data are stored in 1D Bigarrays. The advantages of + using a Bigarray are: (a) hidden from the garbage collector, (b) + easily accessible from C, (c) uses mmap(2) where possible. + + Some low level functions are written in C for speed. + + Mappings are stored in a segment tree for efficient access, but + the segment tree has to be rebuilt from scratch each time you add + a new mapping (it is not known if there is a more efficient way to + incrementally update a segment tree). +*) + +(** {2 Types} *) + +type ('ws,'e,'hm) t +(** Memory map. + + The ['ws], ['e] and ['hm] type parameters are phantom types + designed to ensure you don't try illegal operations before + initializing certain parts of the memory map. If you are not + familiar with phantom types, you can just ignore them. + + See also: + [http://camltastic.blogspot.com/2008/05/phantom-types.html] *) type addr = int64 (** Virtual memory addresses (even on 32 bit machines). *) -val create : unit -> ([`NoWordsize], [`NoEndian]) t +(** {2 Create a memory map, add mappings} *) + +val create : unit -> ([`NoWordsize], [`NoEndian], [`NoMappings]) t (** Create a new, empty memory map. *) -val set_wordsize : ([`NoWordsize], 'b) t -> Virt_mem_utils.wordsize -> - ([`Wordsize], 'b) t +val of_file : Unix.file_descr -> addr -> + ([`NoWordsize], [`NoEndian], [`HasMapping]) t +(** Create a new memory map, mapping file [fd] at address [addr]. *) + +val add_file : ('ws, 'e, 'hm) t -> Unix.file_descr -> addr -> + ('ws, 'e, [`HasMapping]) t +(** Add file [fd] at address [addr] to an existing memory map. + The new mapping can overwrite all or part of an existing mapping. *) + +val of_string : string -> addr -> ([`NoWordsize], [`NoEndian], [`HasMapping]) t +(** Create a new memory map, mapping string at address [addr]. *) + +val add_string : ('ws, 'e, 'hm) t -> string -> addr -> + ('ws, 'e, [`HasMapping]) t +(** Add string at address [addr] to an existing memory map. + The new mapping can overwrite all or part of an existing mapping. *) + +val set_wordsize : ([`NoWordsize], 'e, 'hm) t -> Virt_mem_utils.wordsize -> + ([`Wordsize], 'e, 'hm) t (** Set the natural wordsize of the memory map. This is used - for matching pointers within the map and can be set only once. *) + for matching integers and pointers within the map and can be + set only once. *) -val set_endian : ('a, [`NoEndian]) t -> Bitstring.endian -> - ('a, [`Endian]) t +val set_endian : ('ws, [`NoEndian], 'hm) t -> Bitstring.endian -> + ('ws, [`Endian], 'hm) t (** Set the natural endianness of the memory map. This is used - for matching pointers within the map and can be set only once. *) + for matching integers and pointers within the map and can be + set only once. *) -val get_wordsize : ([`Wordsize], 'b) t -> Virt_mem_utils.wordsize +val get_wordsize : ([`Wordsize], 'e, 'hm) t -> Virt_mem_utils.wordsize (** Return the wordsize previously set for this memory map. *) -val get_endian : ('a, [`Endian]) t -> Bitstring.endian +val get_endian : ('ws, [`Endian], 'hm) t -> Bitstring.endian (** Return the endianness previously set for this memory map. *) -val of_file : Unix.file_descr -> addr -> ([`NoWordsize], [`NoEndian]) t -(** Create a new memory map, mapping file [fd] at address [addr]. *) +(* +(** {2 Searching} *) -val add_file : ('a, 'b) t -> Unix.file_descr -> addr -> ('a, 'b) t -(** Add file [fd] at address [addr] to an existing memory map. - Behaviour is undefined if memory mappings overlap. *) +val find : ('ws, 'e, [`HasMapping]) t -> ?start:addr -> string -> addr option +(** Find string in a memory map and return its address (if found) + or [None] (if not found). You can pass an optional starting + address. If no start address is given, we begin searching at + the beginning of the first mapping. -val of_string : string -> addr -> ([`NoWordsize], [`NoEndian]) t -(** Create a new memory map, mapping string at address [addr]. *) + Any holes in the memory map are skipped automatically. -val add_string : ('a, 'b) t -> string -> addr -> ('a, 'b) t -(** Add string at address [addr] to an existing memory map. - Behaviour is undefined if memory mappings overlap. *) + Note that this doesn't find strings which straddle the + boundary of two adjacent or overlapping mappings. -val find : ('a, 'b) t -> ?start:addr -> string -> addr option -(** Find string in a memory map and return its address (if found). - You can pass an optional starting address. Any holes in - the memory map are skipped automatically. *) + Note that because the string being matched is an OCaml + string it may contain NULs (zero bytes) and those are matched + properly. *) -val find_align : ([`Wordsize], 'b) t -> ?start:addr -> string -> addr option -(** Find a string aligned to the wordsize in the memory map. *) +val find_align : ([`Wordsize], 'e, [`HasMapping]) t -> ?start:addr -> string -> + addr option +(** Same as {!find}, but the string must be aligned to the word size of + the memory map. *) -val find_all : ('a, 'b) t -> ?start:addr -> string -> addr list -(** Find all occurrences of a string in a memory map. *) +val find_all : ('ws, 'e, [`HasMapping]) t -> ?start:addr -> string -> addr list +(** Same as {!find}, but returns all occurrences of a string in a memory map. *) -val find_all_align : ([`Wordsize], 'b) t -> ?start:addr -> string -> addr list -(** Find all occurrences of a string in a memory map. *) +val find_all_align : ([`Wordsize], 'e, [`HasMapping]) t -> ?start:addr -> + string -> addr list +(** Same as {!find_all}, but the strings must be aligned to the word size. *) -val find_pointer : ([`Wordsize], [`Endian]) t -> ?start:addr -> addr -> - addr option +val find_pointer : ([`Wordsize], [`Endian], [`HasMapping]) t -> ?start:addr -> + addr -> addr option (** Find a pointer (address) in the memory map. The pointer must be aligned to a word. *) -val find_pointer_all : ([`Wordsize], [`Endian]) t -> ?start:addr -> addr -> - addr list +val find_pointer_all : ([`Wordsize], [`Endian], [`HasMapping]) t -> + ?start:addr -> addr -> addr list (** Find all occurrences of a pointer in the memory map. *) -val get_byte : ('a, 'b) t -> addr -> int +(** {2 Get bytes and ranges of bytes} *) +*) + +val get_byte : ('ws, 'e, [`HasMapping]) t -> addr -> int (** Return the byte at the given address. - This may raise [Invalid_argument "get_byte"] if the address is - not mapped. *) + This will raise [Invalid_argument "get_byte"] if the address is + a hole (not mapped). *) -val get_bytes : ('a, 'b) t -> addr -> int -> string +(* +val get_bytes : ('ws, 'e, [`HasMapping]) t -> addr -> int -> string (** Return the sequence of bytes starting at the given address. - This may raise [Invalid_argument "get_bytes"] if the address range - is not fully mapped. *) + This will raise [Invalid_argument "get_bytes"] if the address range + contains holes. *) -val get_int32 : ('a, [`Endian]) t -> addr -> int32 +val get_int32 : ('ws, [`Endian], [`HasMapping]) t -> addr -> int32 (** Return the 32-bit int at [addr]. *) -val get_int64 : ('a, [`Endian]) t -> addr -> int64 +val get_int64 : ('ws, [`Endian], [`HasMapping]) t -> addr -> int64 (** Return the 64-bit int at [addr]. *) -val get_C_int : ([`Wordsize], [`Endian]) t -> addr -> int32 +val get_C_int : ([`Wordsize], [`Endian], [`HasMapping]) t -> addr -> int32 (** Return the C 32-bit int at [addr]. *) -val get_C_long : ([`Wordsize], [`Endian]) t -> addr -> int64 +val get_C_long : ([`Wordsize], [`Endian], [`HasMapping]) t -> addr -> int64 (** Return the C 32 or 64-bit long at [addr]. *) -val get_string : ('a, 'b) t -> addr -> string +val get_string : ('ws, 'e, [`HasMapping]) t -> addr -> string (** Return the sequence of bytes starting at [addr] up to (but not including) the first ASCII NUL character. In other words, this returns a C-style string. - This may raise [Invalid_argument "get_string"] if we reach an - unmapped address before finding the end of the string. + This may raise [Invalid_argument "get_string"] if we reach a + hole (unmapped address) before finding the end of the string. - See also {!is_string} and {!is_C_identifier}. *) + See also {!get_bytes}, {!is_string} and {!is_C_identifier}. *) -val is_string : ('a, 'b) t -> addr -> bool +val is_string : ('ws, 'e, [`HasMapping]) t -> addr -> bool (** Return true or false if the address contains an ASCII NUL-terminated string. *) -val is_C_identifier : ('a, 'b) t -> addr -> bool +val is_C_identifier : ('ws, 'e, [`HasMapping]) t -> addr -> bool (** Return true or false if the address contains a NUL-terminated C identifier. *) -val is_mapped : ('a, 'b) t -> addr -> bool +val is_mapped : ('ws, 'e, [`HasMapping]) t -> addr -> bool (** Return true if the single address [addr] is mapped. *) -val follow_pointer : ([`Wordsize], [`Endian]) t -> addr -> addr +val follow_pointer : ([`Wordsize], [`Endian], [`HasMapping]) t -> addr -> addr (** Follow (dereference) the pointer at [addr] and return the address pointed to. *) -val succ_long : ([`Wordsize], 'b) t -> addr -> addr +val succ_long : ([`Wordsize], 'e, [`HasMapping]) t -> addr -> addr (** Add wordsize bytes to [addr] and return it. *) -val pred_long : ([`Wordsize], 'b) t -> addr -> addr +val pred_long : ([`Wordsize], 'e, [`HasMapping]) t -> addr -> addr (** Subtract wordsize bytes from [addr] and return it. *) -val align : ([`Wordsize], 'b) t -> addr -> addr +val align : ([`Wordsize], 'e, [`HasMapping]) t -> addr -> addr (** Align the [addr] to the next wordsize boundary. If it already aligned, this just returns [addr]. *) + +(** {2 Save and load memory maps} *) + +(*val to_channel : ('ws, 'e, [`HasMapping]) t -> out_channel -> unit*) +(** Write the memory map and data to the given output channel in + a reasonably efficient and stable binary format. *) + +(*val from_channel : in_channel -> ('?, '?, [`HasMapping]) t*) +(** Read a previously saved memory map. If the input channel does + not contain a memory map, this raises [Invalid_argument]. *) +*) diff --git a/lib/virt_mem_utils.ml b/lib/virt_mem_utils.ml index 2beb131..8eb312a 100644 --- a/lib/virt_mem_utils.ml +++ b/lib/virt_mem_utils.ml @@ -96,8 +96,60 @@ let frequency xs = let xs = loop xs in List.rev (List.sort compare xs) +let rec uniq ?(cmp = Pervasives.compare) = function + | [] -> [] + | [x] -> [x] + | x :: y :: xs when cmp x y = 0 -> + uniq (x :: xs) + | x :: y :: xs -> + x :: uniq (y :: xs) + +let sort_uniq ?cmp xs = + let xs = ExtList.List.sort ?cmp xs in + let xs = uniq ?cmp xs in + xs + (* Pad a string to a fixed width (from virt-top, but don't truncate). *) let pad width str = let n = String.length str in if n >= width then str else (* if n < width then *) str ^ String.make (width-n) ' ' + +(* General binary tree type. Data 'a is stored in the leaves and 'b + * is stored in the nodes. + *) +type ('a,'b) binary_tree = + | Leaf of 'a + | Node of ('a,'b) binary_tree * 'b * ('a,'b) binary_tree + +(* This prints out the binary tree in graphviz dot format. *) +let print_binary_tree leaf_printer node_printer tree = + (* Assign a unique, fixed label to each node. *) + let label = + let i = ref 0 in + let hash = Hashtbl.create 13 in + fun node -> + try Hashtbl.find hash node + with Not_found -> + let i = incr i; !i in + let label = "n" ^ string_of_int i in + Hashtbl.add hash node label; + label + in + (* Recursively generate the graphviz file. *) + let rec print = function + | (Leaf a as leaf) -> + eprintf " %s [shape=box, label=\"%s\"];\n" + (label leaf) (leaf_printer a) + | (Node (left,b,right) as node) -> + eprintf " %s [label=\"%s\"];\n" + (label node) (node_printer b); + eprintf " %s -> %s [tailport=sw];\n" (label node) (label left); + eprintf " %s -> %s [tailport=se];\n" (label node) (label right); + print left; + print right; + in + eprintf "/* Use 'dot -Tpng foo.dot > foo.png' to convert to a png file. */\n"; + eprintf "digraph G {\n"; + print tree; + eprintf "}\n%!" -- 1.8.3.1