X-Git-Url: http://git.annexia.org/?p=virt-mem.git;a=blobdiff_plain;f=lib%2Fvirt_mem_mmap.mli;h=0294efd94865999ad6ec93cb16612c86d201c96e;hp=68c7b2f7529da4bb40f52d7810f3001a7190bfd8;hb=b8aef502747b039b20877f99f3d8986b6a9329d2;hpb=46037cd89c23b0f94dc691006ee1d9cd0fec24f0 diff --git a/lib/virt_mem_mmap.mli b/lib/virt_mem_mmap.mli index 68c7b2f..0294efd 100644 --- a/lib/virt_mem_mmap.mli +++ b/lib/virt_mem_mmap.mli @@ -1,3 +1,4 @@ +(** Memory map. *) (* Memory info command for virtual domains. (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. http://libvirt.org/ @@ -15,109 +16,238 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - Functions for making a memory map of a virtual machine from - various sources. The memory map will most certainly have holes. *) -type ('a,'b) t -(** Memory map. *) +(** {2 Memory maps} + + Memory maps represent the virtual memory of a virtual machine. + + We are mostly interested in the kernel memory and kernel data + structures. In Linux this stays at the same virtual memory + address whichever task is actually running (eg. on i386 machines, + the kernel is often found at virtual address 0xC0100000). Kernel + memory is spread out over several ranges of addresses, with gaps + of uninteresting or non-existant virtual addresses in between, and + this structure captures that. + + A memory map is a range of 64 bit addresses from [0] to [2^64-1]. + (Note that 64 bit addresses are used even for 32 bit virtual + machines - just ignore everything above 0xFFFFFFFF). + + A memory map consists of zero or more {b mappings} of data. A + mapping starts at some address and has some size, and the data for + a mapping can come from some source such as a file or OCaml + string. Use {!of_file}, {!of_string}, {!add_file}, {!add_string} + to create a memory map from mappings. + + {3 Overlapping mappings and holes} + + If mappings overlap, then the mapping which was added later + overrides/overwrites earlier mappings at any addresses which + coincide. + + Where there is no mapping for a particular address, the memory map + is said to have a hole. (Typically almost all of a memory map is + holes). In general, the searching functions such as {!find} skip + over holes, while the accessor functions such as {!get_bytes} + raise an error if you try to read a hole, but read the individual + function documentation. + + {3 Word size and endianness} + + Memory maps may (or may not) have an associated word size and + endianness for the whole map. These are used when we look at + integers and pointers in the memory. See {!get_endian}, + {!set_endian}, {!get_wordsize} and {!set_wordsize}, and accessor + functions such as {!get_int32} and {!follow_pointer}. + + {3 Efficiency} + + Mappings' data are stored in 1D Bigarrays. The advantages of + using a Bigarray are: (a) hidden from the garbage collector, (b) + easily accessible from C, (c) uses mmap(2) where possible. + + Some low level functions are written in C for speed. + + Mappings are stored in a segment tree for efficient access, but + the segment tree has to be rebuilt from scratch each time you add + a new mapping. It is not known if there is a more efficient way + to incrementally update a segment tree. In any case, as long as + you are mainly doing lookups / searches / getting bytes, this is + very fast. +*) + +(** {2 Types} *) + +type ('ws,'e,'hm) t +(** Memory map. + + The ['ws], ['e] and ['hm] type parameters are phantom types + designed to ensure you don't try illegal operations before + initializing certain parts of the memory map. If you are not + familiar with phantom types, you can just ignore them. + + See also + {{:http://camltastic.blogspot.com/2008/05/phantom-types.html}this + posting about the phantom types used in virt-mem}. + + The memory map structure is an example of a + {{:http://en.wikipedia.org/wiki/Purely_functional}persistent + data structure}. +*) type addr = int64 (** Virtual memory addresses (even on 32 bit machines). *) -val create : unit -> ([`NoWordsize], [`NoEndian]) t +(** {2 Create a memory map, add mappings} *) + +val create : unit -> ([`NoWordsize], [`NoEndian], [`NoMappings]) t (** Create a new, empty memory map. *) -val set_wordsize : ([`NoWordsize], 'b) t -> Virt_mem_utils.wordsize -> - ([`Wordsize], 'b) t +val of_file : Unix.file_descr -> addr -> + ([`NoWordsize], [`NoEndian], [`HasMapping]) t +(** Create a new memory map, mapping file [fd] at address [addr]. *) + +val add_file : ('ws, 'e, 'hm) t -> Unix.file_descr -> addr -> + ('ws, 'e, [`HasMapping]) t +(** Add file [fd] at address [addr] to an existing memory map. + The new mapping can overwrite all or part of an existing mapping. *) + +val of_string : string -> addr -> ([`NoWordsize], [`NoEndian], [`HasMapping]) t +(** Create a new memory map, mapping string at address [addr]. *) + +val add_string : ('ws, 'e, 'hm) t -> string -> addr -> + ('ws, 'e, [`HasMapping]) t +(** Add string at address [addr] to an existing memory map. + The new mapping can overwrite all or part of an existing mapping. *) + +val set_wordsize : ([`NoWordsize], 'e, 'hm) t -> Virt_mem_utils.wordsize -> + ([`Wordsize], 'e, 'hm) t (** Set the natural wordsize of the memory map. This is used - for matching pointers within the map and can be set only once. *) + for matching integers and pointers within the map and can be + set only once. *) -val set_endian : ('a, [`NoEndian]) t -> Bitmatch.endian -> - ('a, [`Endian]) t +val set_endian : ('ws, [`NoEndian], 'hm) t -> Bitstring.endian -> + ('ws, [`Endian], 'hm) t (** Set the natural endianness of the memory map. This is used - for matching pointers within the map and can be set only once. *) + for matching integers and pointers within the map and can be + set only once. *) -val get_wordsize : ([`Wordsize], 'b) t -> Virt_mem_utils.wordsize +val get_wordsize : ([`Wordsize], 'e, 'hm) t -> Virt_mem_utils.wordsize (** Return the wordsize previously set for this memory map. *) -val get_endian : ('a, [`Endian]) t -> Bitmatch.endian +val get_endian : ('ws, [`Endian], 'hm) t -> Bitstring.endian (** Return the endianness previously set for this memory map. *) -val of_file : Unix.file_descr -> addr -> ([`NoWordsize], [`NoEndian]) t -(** Create a new memory map, mapping file [fd] at address [addr]. *) - -val add_file : ('a, 'b) t -> Unix.file_descr -> addr -> ('a, 'b) t -(** Add file [fd] at address [addr] to an existing memory map. - Behaviour is undefined if memory mappings overlap. *) +(** {2 Searching} *) -val find : ('a, 'b) t -> ?start:addr -> string -> addr option -(** Find string in a memory map and return its address (if found). - You can pass an optional starting address. Any holes in - the memory map are skipped automatically. *) +val find : ('ws, 'e, [`HasMapping]) t -> ?start:addr -> string -> addr option +(** Find string in a memory map and return its address (if found) + or [None] (if not found). You can pass an optional starting + address. If no start address is given, we begin searching at + the beginning of the first mapping. -val find_align : ([`Wordsize], 'b) t -> ?start:addr -> string -> addr option -(** Find a string aligned to the wordsize in the memory map. *) + Any holes in the memory map are skipped automatically. -val find_all : ('a, 'b) t -> ?start:addr -> string -> addr list -(** Find all occurrences of a string in a memory map. *) + Note that this doesn't find strings which straddle the + boundary of two adjacent or overlapping mappings. -val find_all_align : ([`Wordsize], 'b) t -> ?start:addr -> string -> addr list -(** Find all occurrences of a string in a memory map. *) + Note that because the string being matched is an OCaml + string it may contain NULs (zero bytes) and those are matched + properly. *) -val find_pointer : ([`Wordsize], [`Endian]) t -> ?start:addr -> addr -> +val find_align : ([`Wordsize], 'e, [`HasMapping]) t -> ?start:addr -> string -> addr option +(** Same as {!find}, but the string must be aligned to the word size of + the memory map. *) + +val find_all : ('ws, 'e, [`HasMapping]) t -> ?start:addr -> string -> addr list +(** Same as {!find}, but returns all occurrences of a string in a memory map. *) + +val find_all_align : ([`Wordsize], 'e, [`HasMapping]) t -> ?start:addr -> + string -> addr list +(** Same as {!find_all}, but the strings must be aligned to the word size. *) + +val find_pointer : ([`Wordsize], [`Endian], [`HasMapping]) t -> ?start:addr -> + addr -> addr option (** Find a pointer (address) in the memory map. The pointer must be aligned to a word. *) -val find_pointer_all : ([`Wordsize], [`Endian]) t -> ?start:addr -> addr -> - addr list +val find_pointer_all : ([`Wordsize], [`Endian], [`HasMapping]) t -> + ?start:addr -> addr -> addr list (** Find all occurrences of a pointer in the memory map. *) -val get_byte : ('a, 'b) t -> addr -> int +(** {2 Get bytes and ranges of bytes} *) + +val get_byte : ('ws, 'e, [`HasMapping]) t -> addr -> int (** Return the byte at the given address. - This may raise [Invalid_argument "get_byte"] if the address is - not mapped. *) + This will raise [Invalid_argument "get_byte"] if the address is + a hole (not mapped). *) -val get_bytes : ('a, 'b) t -> addr -> int -> string +val get_bytes : ('ws, 'e, [`HasMapping]) t -> addr -> int -> string (** Return the sequence of bytes starting at the given address. - This may raise [Invalid_argument "get_bytes"] if the address range - is not fully mapped. *) + This will raise [Invalid_argument "get_bytes"] if the address range + contains holes. *) + +val get_int32 : ('ws, [`Endian], [`HasMapping]) t -> addr -> int32 +(** Return the 32-bit int at [addr]. *) + +val get_int64 : ('ws, [`Endian], [`HasMapping]) t -> addr -> int64 +(** Return the 64-bit int at [addr]. *) + +val get_C_int : ([`Wordsize], [`Endian], [`HasMapping]) t -> addr -> int32 +(** Return the C 32-bit int at [addr]. *) -val get_string : ('a, 'b) t -> addr -> string +val get_C_long : ([`Wordsize], [`Endian], [`HasMapping]) t -> addr -> int64 +(** Return the C 32 or 64-bit long at [addr]. *) + +val get_string : ('ws, 'e, [`HasMapping]) t -> addr -> string (** Return the sequence of bytes starting at [addr] up to (but not including) the first ASCII NUL character. In other words, this returns a C-style string. - This may raise [Invalid_argument "get_string"] if we reach an - unmapped address before finding the end of the string. + This may raise [Invalid_argument "get_string"] if we reach a + hole (unmapped address) before finding the end of the string. - See also {!is_string} and {!is_C_identifier}. *) + See also {!get_bytes}, {!is_string} and {!is_C_identifier}. *) -val is_string : ('a, 'b) t -> addr -> bool +val is_string : ('ws, 'e, [`HasMapping]) t -> addr -> bool (** Return true or false if the address contains an ASCII NUL-terminated string. *) -val is_C_identifier : ('a, 'b) t -> addr -> bool +val is_C_identifier : ('ws, 'e, [`HasMapping]) t -> addr -> bool (** Return true or false if the address contains a NUL-terminated C identifier. *) -val is_mapped : ('a, 'b) t -> addr -> bool +val is_mapped : ('ws, 'e, 'hm) t -> addr -> bool (** Return true if the single address [addr] is mapped. *) -val follow_pointer : ([`Wordsize], [`Endian]) t -> addr -> addr +val is_mapped_range : ('ws, 'e, 'hm) t -> addr -> int -> bool +(** Return true if all addresses in the range [addr] to [addr+size-1] + are mapped. *) + +val follow_pointer : ([`Wordsize], [`Endian], [`HasMapping]) t -> addr -> addr (** Follow (dereference) the pointer at [addr] and return the address pointed to. *) -val succ_long : ([`Wordsize], 'b) t -> addr -> addr +val succ_long : ([`Wordsize], 'e, [`HasMapping]) t -> addr -> addr (** Add wordsize bytes to [addr] and return it. *) -val pred_long : ([`Wordsize], 'b) t -> addr -> addr +val pred_long : ([`Wordsize], 'e, [`HasMapping]) t -> addr -> addr (** Subtract wordsize bytes from [addr] and return it. *) -val align : ([`Wordsize], 'b) t -> addr -> addr +val align : ([`Wordsize], 'e, [`HasMapping]) t -> addr -> addr (** Align the [addr] to the next wordsize boundary. If it already aligned, this just returns [addr]. *) + +(** {2 Save and load memory maps} *) + +(*val to_channel : ('ws, 'e, [`HasMapping]) t -> out_channel -> unit*) +(** Write the memory map and data to the given output channel in + a reasonably efficient and stable binary format. *) + +(*val from_channel : in_channel -> ('?, '?, [`HasMapping]) t*) +(** Read a previously saved memory map. If the input channel does + not contain a memory map, this raises [Invalid_argument]. *)