+(** Memory map. *)
(* Memory info command for virtual domains.
(C) Copyright 2008 Richard W.M. Jones, Red Hat Inc.
http://libvirt.org/
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
- Functions for making a memory map of a virtual machine from
- various sources. The memory map will most certainly have holes.
*)
-type ('a,'b) t
-(** Memory map. *)
+(** {2 Memory maps}
+
+ Memory maps represent the virtual memory of a virtual machine.
+
+ We are mostly interested in the kernel memory and kernel data
+ structures. In Linux this stays at the same virtual memory
+ address whichever task is actually running (eg. on i386 machines,
+ the kernel is often found at virtual address 0xC0100000). Kernel
+ memory is spread out over several ranges of addresses, with gaps
+ of uninteresting or non-existant virtual addresses in between, and
+ this structure captures that.
+
+ A memory map is a range of 64 bit addresses from [0] to [2^64-1].
+ (Note that 64 bit addresses are used even for 32 bit virtual
+ machines - just ignore everything above 0xFFFFFFFF).
+
+ A memory map consists of zero or more {b mappings} of data. A
+ mapping starts at some address and has some size, and the data for
+ a mapping can come from some source such as a file or OCaml
+ string. Use {!of_file}, {!of_string}, {!add_file}, {!add_string}
+ to create a memory map from mappings.
+
+ {3 Overlapping mappings and holes}
+
+ If mappings overlap, then the mapping which was added later
+ overrides/overwrites earlier mappings at any addresses which
+ coincide.
+
+ Where there is no mapping for a particular address, the memory map
+ is said to have a hole. (Typically almost all of a memory map is
+ holes). In general, the searching functions such as {!find} skip
+ over holes, while the accessor functions such as {!get_bytes}
+ raise an error if you try to read a hole, but read the individual
+ function documentation.
+
+ {3 Word size and endianness}
+
+ Memory maps may (or may not) have an associated word size and
+ endianness for the whole map. These are used when we look at
+ integers and pointers in the memory. See {!get_endian},
+ {!set_endian}, {!get_wordsize} and {!set_wordsize}, and accessor
+ functions such as {!get_int32} and {!follow_pointer}.
+
+ {3 Efficiency}
+
+ Mappings' data are stored in 1D Bigarrays. The advantages of
+ using a Bigarray are: (a) hidden from the garbage collector, (b)
+ easily accessible from C, (c) uses mmap(2) where possible.
+
+ Some low level functions are written in C for speed.
+
+ Mappings are stored in a segment tree for efficient access, but
+ the segment tree has to be rebuilt from scratch each time you add
+ a new mapping. It is not known if there is a more efficient way
+ to incrementally update a segment tree. In any case, as long as
+ you are mainly doing lookups / searches / getting bytes, this is
+ very fast.
+*)
+
+(** {2 Types} *)
+
+type ('ws,'e,'hm) t
+(** Memory map.
+
+ The ['ws], ['e] and ['hm] type parameters are phantom types
+ designed to ensure you don't try illegal operations before
+ initializing certain parts of the memory map. If you are not
+ familiar with phantom types, you can just ignore them.
+
+ See also
+ {{:http://camltastic.blogspot.com/2008/05/phantom-types.html}this
+ posting about the phantom types used in virt-mem}.
+
+ The memory map structure is an example of a
+ {{:http://en.wikipedia.org/wiki/Purely_functional}persistent
+ data structure}.
+*)
type addr = int64
(** Virtual memory addresses (even on 32 bit machines). *)
-val create : unit -> ([`NoWordsize], [`NoEndian]) t
+(** {2 Create a memory map, add mappings} *)
+
+val create : unit -> ([`NoWordsize], [`NoEndian], [`NoMappings]) t
(** Create a new, empty memory map. *)
-val set_wordsize : ([`NoWordsize], 'b) t -> Virt_mem_utils.wordsize ->
- ([`Wordsize], 'b) t
+val of_file : Unix.file_descr -> addr ->
+ ([`NoWordsize], [`NoEndian], [`HasMapping]) t
+(** Create a new memory map, mapping file [fd] at address [addr]. *)
+
+val add_file : ('ws, 'e, 'hm) t -> Unix.file_descr -> addr ->
+ ('ws, 'e, [`HasMapping]) t
+(** Add file [fd] at address [addr] to an existing memory map.
+ The new mapping can overwrite all or part of an existing mapping. *)
+
+val of_string : string -> addr -> ([`NoWordsize], [`NoEndian], [`HasMapping]) t
+(** Create a new memory map, mapping string at address [addr]. *)
+
+val add_string : ('ws, 'e, 'hm) t -> string -> addr ->
+ ('ws, 'e, [`HasMapping]) t
+(** Add string at address [addr] to an existing memory map.
+ The new mapping can overwrite all or part of an existing mapping. *)
+
+val set_wordsize : ([`NoWordsize], 'e, 'hm) t -> Virt_mem_utils.wordsize ->
+ ([`Wordsize], 'e, 'hm) t
(** Set the natural wordsize of the memory map. This is used
- for matching pointers within the map and can be set only once. *)
+ for matching integers and pointers within the map and can be
+ set only once. *)
-val set_endian : ('a, [`NoEndian]) t -> Bitmatch.endian ->
- ('a, [`Endian]) t
+val set_endian : ('ws, [`NoEndian], 'hm) t -> Bitstring.endian ->
+ ('ws, [`Endian], 'hm) t
(** Set the natural endianness of the memory map. This is used
- for matching pointers within the map and can be set only once. *)
+ for matching integers and pointers within the map and can be
+ set only once. *)
-val get_wordsize : ([`Wordsize], 'b) t -> Virt_mem_utils.wordsize
+val get_wordsize : ([`Wordsize], 'e, 'hm) t -> Virt_mem_utils.wordsize
(** Return the wordsize previously set for this memory map. *)
-val get_endian : ('a, [`Endian]) t -> Bitmatch.endian
+val get_endian : ('ws, [`Endian], 'hm) t -> Bitstring.endian
(** Return the endianness previously set for this memory map. *)
-val of_file : Unix.file_descr -> addr -> ([`NoWordsize], [`NoEndian]) t
-(** Create a new memory map, mapping file [fd] at address [addr]. *)
+(** {2 Searching} *)
-val add_file : ('a, 'b) t -> Unix.file_descr -> addr -> ('a, 'b) t
-(** Add file [fd] at address [addr] to an existing memory map.
- Behaviour is undefined if memory mappings overlap. *)
-
-val find : ('a, 'b) t -> ?start:addr -> string -> addr option
-(** Find string in a memory map and return its address (if found).
- You can pass an optional starting address. Any holes in
- the memory map are skipped automatically. *)
+val find : ('ws, 'e, [`HasMapping]) t -> ?start:addr -> string -> addr option
+(** Find string in a memory map and return its address (if found)
+ or [None] (if not found). You can pass an optional starting
+ address. If no start address is given, we begin searching at
+ the beginning of the first mapping.
-val find_align : ([`Wordsize], 'b) t -> ?start:addr -> string -> addr option
-(** Find a string aligned to the wordsize in the memory map. *)
+ Any holes in the memory map are skipped automatically.
-val find_all : ('a, 'b) t -> ?start:addr -> string -> addr list
-(** Find all occurrences of a string in a memory map. *)
+ Note that this doesn't find strings which straddle the
+ boundary of two adjacent or overlapping mappings.
-val find_all_align : ([`Wordsize], 'b) t -> ?start:addr -> string -> addr list
-(** Find all occurrences of a string in a memory map. *)
+ Note that because the string being matched is an OCaml
+ string it may contain NULs (zero bytes) and those are matched
+ properly. *)
-val find_pointer : ([`Wordsize], [`Endian]) t -> ?start:addr -> addr ->
+val find_align : ([`Wordsize], 'e, [`HasMapping]) t -> ?start:addr -> string ->
addr option
+(** Same as {!find}, but the string must be aligned to the word size of
+ the memory map. *)
+
+val find_all : ('ws, 'e, [`HasMapping]) t -> ?start:addr -> string -> addr list
+(** Same as {!find}, but returns all occurrences of a string in a memory map. *)
+
+val find_all_align : ([`Wordsize], 'e, [`HasMapping]) t -> ?start:addr ->
+ string -> addr list
+(** Same as {!find_all}, but the strings must be aligned to the word size. *)
+
+val find_pointer : ([`Wordsize], [`Endian], [`HasMapping]) t -> ?start:addr ->
+ addr -> addr option
(** Find a pointer (address) in the memory map.
The pointer must be aligned to a word. *)
-val find_pointer_all : ([`Wordsize], [`Endian]) t -> ?start:addr -> addr ->
- addr list
+val find_pointer_all : ([`Wordsize], [`Endian], [`HasMapping]) t ->
+ ?start:addr -> addr -> addr list
(** Find all occurrences of a pointer in the memory map. *)
-val get_byte : ('a, 'b) t -> addr -> int
+(** {2 Get bytes and ranges of bytes} *)
+
+val get_byte : ('ws, 'e, [`HasMapping]) t -> addr -> int
(** Return the byte at the given address.
- This may raise [Invalid_argument "get_byte"] if the address is
- not mapped. *)
+ This will raise [Invalid_argument "get_byte"] if the address is
+ a hole (not mapped). *)
-val get_bytes : ('a, 'b) t -> addr -> int -> string
+val get_bytes : ('ws, 'e, [`HasMapping]) t -> addr -> int -> string
(** Return the sequence of bytes starting at the given address.
- This may raise [Invalid_argument "get_bytes"] if the address range
- is not fully mapped. *)
+ This will raise [Invalid_argument "get_bytes"] if the address range
+ contains holes. *)
+
+val get_int32 : ('ws, [`Endian], [`HasMapping]) t -> addr -> int32
+(** Return the 32-bit int at [addr]. *)
+
+val get_int64 : ('ws, [`Endian], [`HasMapping]) t -> addr -> int64
+(** Return the 64-bit int at [addr]. *)
-val get_string : ('a, 'b) t -> addr -> string
+val get_C_int : ([`Wordsize], [`Endian], [`HasMapping]) t -> addr -> int32
+(** Return the C 32-bit int at [addr]. *)
+
+val get_C_long : ([`Wordsize], [`Endian], [`HasMapping]) t -> addr -> int64
+(** Return the C 32 or 64-bit long at [addr]. *)
+
+val get_string : ('ws, 'e, [`HasMapping]) t -> addr -> string
(** Return the sequence of bytes starting at [addr] up to (but not
including) the first ASCII NUL character. In other words, this
returns a C-style string.
- This may raise [Invalid_argument "get_string"] if we reach an
- unmapped address before finding the end of the string.
+ This may raise [Invalid_argument "get_string"] if we reach a
+ hole (unmapped address) before finding the end of the string.
- See also {!is_string} and {!is_C_identifier}. *)
+ See also {!get_bytes}, {!is_string} and {!is_C_identifier}. *)
-val is_string : ('a, 'b) t -> addr -> bool
+val is_string : ('ws, 'e, [`HasMapping]) t -> addr -> bool
(** Return true or false if the address contains an ASCII NUL-terminated
string. *)
-val is_C_identifier : ('a, 'b) t -> addr -> bool
+val is_C_identifier : ('ws, 'e, [`HasMapping]) t -> addr -> bool
(** Return true or false if the address contains a NUL-terminated
C identifier. *)
-val follow_pointer : ([`Wordsize], [`Endian]) t -> addr -> addr
+val is_mapped : ('ws, 'e, 'hm) t -> addr -> bool
+(** Return true if the single address [addr] is mapped. *)
+
+val is_mapped_range : ('ws, 'e, 'hm) t -> addr -> int -> bool
+(** Return true if all addresses in the range [addr] to [addr+size-1]
+ are mapped. *)
+
+val follow_pointer : ([`Wordsize], [`Endian], [`HasMapping]) t -> addr -> addr
(** Follow (dereference) the pointer at [addr] and return
the address pointed to. *)
-val succ_long : ([`Wordsize], 'b) t -> addr -> addr
+val succ_long : ([`Wordsize], 'e, [`HasMapping]) t -> addr -> addr
(** Add wordsize bytes to [addr] and return it. *)
-val pred_long : ([`Wordsize], 'b) t -> addr -> addr
+val pred_long : ([`Wordsize], 'e, [`HasMapping]) t -> addr -> addr
(** Subtract wordsize bytes from [addr] and return it. *)
+
+val align : ([`Wordsize], 'e, [`HasMapping]) t -> addr -> addr
+(** Align the [addr] to the next wordsize boundary. If it already
+ aligned, this just returns [addr]. *)
+
+(** {2 Save and load memory maps} *)
+
+(*val to_channel : ('ws, 'e, [`HasMapping]) t -> out_channel -> unit*)
+(** Write the memory map and data to the given output channel in
+ a reasonably efficient and stable binary format. *)
+
+(*val from_channel : in_channel -> ('?, '?, [`HasMapping]) t*)
+(** Read a previously saved memory map. If the input channel does
+ not contain a memory map, this raises [Invalid_argument]. *)