From: Richard W.M. Jones <"Richard W.M. Jones "> Date: Wed, 23 Jul 2008 14:13:22 +0000 (+0100) Subject: Split out the kernel symbol detection code. X-Git-Url: http://git.annexia.org/?p=virt-mem.git;a=commitdiff_plain;h=89d41f7614612cf991d6005e7c1e20cd502a5404 Split out the kernel symbol detection code. --- diff --git a/HACKING b/HACKING index 52b9829..12cb191 100644 --- a/HACKING +++ b/HACKING @@ -7,13 +7,14 @@ lib/ - The common core of all the tools. Library, kernel symbols, command line handling, memory images, etc. - lib/virt_mem.ml contains most of the important code. + lib/virt_mem.ml contains most of the important 'glue' code. uname/ dmesg/ etc. - - The code specific to each tool, usually rather small. + - The code specific to each tool. This is usually rather small because + the code in lib/ does the hard work. mem/ diff --git a/Makefile.in b/Makefile.in index 28e7858..62e2fed 100644 --- a/Makefile.in +++ b/Makefile.in @@ -26,7 +26,12 @@ mandir = @mandir@ OCAMLDOCFLAGS = -html -sort -package bitstring,extlib -I lib OCAMLDOC = @OCAMLDOC@ -OCAMLDOCFILES = lib/virt_mem_utils.ml lib/virt_mem_mmap.mli lib/virt_mem.mli +OCAMLDOCFILES = lib/virt_mem_utils.ml \ + lib/virt_mem_mmap.mli \ + lib/virt_mem_types.ml \ + lib/virt_mem_ksyms.mli \ + lib/virt_mem_kallsyms.mli \ + lib/virt_mem.mli HAVE_PERLDOC = @HAVE_PERLDOC@ diff --git a/lib/.depend b/lib/.depend index e0b0a96..6eb3e7f 100644 --- a/lib/.depend +++ b/lib/.depend @@ -1,12 +1,28 @@ -virt_mem.cmi: virt_mem_utils.cmo virt_mem_mmap.cmi +virt_mem_kallsyms.cmi: virt_mem_types.cmo +virt_mem_ksyms.cmi: virt_mem_types.cmo +virt_mem.cmi: virt_mem_types.cmo virt_mem_mmap.cmi: virt_mem_utils.cmo test_mmap.cmo: virt_mem_mmap.cmi test_mmap.cmx: virt_mem_mmap.cmx virt_mem_capture.cmo: virt_mem_gettext.cmo virt_mem.cmi virt_mem_capture.cmx: virt_mem_gettext.cmx virt_mem.cmx -virt_mem.cmo: virt_mem_version.cmo virt_mem_utils.cmo virt_mem_mmap.cmi \ +virt_mem_kallsyms.cmo: virt_mem_utils.cmo virt_mem_types.cmo \ + virt_mem_mmap.cmi virt_mem_ksyms.cmi virt_mem_gettext.cmo \ + virt_mem_kallsyms.cmi +virt_mem_kallsyms.cmx: virt_mem_utils.cmx virt_mem_types.cmx \ + virt_mem_mmap.cmx virt_mem_ksyms.cmx virt_mem_gettext.cmx \ + virt_mem_kallsyms.cmi +virt_mem_ksyms.cmo: virt_mem_utils.cmo virt_mem_types.cmo virt_mem_mmap.cmi \ + virt_mem_gettext.cmo virt_mem_ksyms.cmi +virt_mem_ksyms.cmx: virt_mem_utils.cmx virt_mem_types.cmx virt_mem_mmap.cmx \ + virt_mem_gettext.cmx virt_mem_ksyms.cmi +virt_mem.cmo: virt_mem_version.cmo virt_mem_utils.cmo virt_mem_types.cmo \ + virt_mem_mmap.cmi virt_mem_ksyms.cmi virt_mem_kallsyms.cmi \ virt_mem_gettext.cmo virt_mem.cmi -virt_mem.cmx: virt_mem_version.cmx virt_mem_utils.cmx virt_mem_mmap.cmx \ +virt_mem.cmx: virt_mem_version.cmx virt_mem_utils.cmx virt_mem_types.cmx \ + virt_mem_mmap.cmx virt_mem_ksyms.cmx virt_mem_kallsyms.cmx \ virt_mem_gettext.cmx virt_mem.cmi virt_mem_mmap.cmo: virt_mem_utils.cmo virt_mem_mmap.cmi virt_mem_mmap.cmx: virt_mem_utils.cmx virt_mem_mmap.cmi +virt_mem_types.cmo: virt_mem_utils.cmo virt_mem_mmap.cmi +virt_mem_types.cmx: virt_mem_utils.cmx virt_mem_mmap.cmx diff --git a/lib/Makefile.in b/lib/Makefile.in index 1b46ebe..2edc175 100644 --- a/lib/Makefile.in +++ b/lib/Makefile.in @@ -50,6 +50,9 @@ OBJS = virt_mem_gettext.cmo \ virt_mem_utils.cmo \ virt_mem_mmap_c.o \ virt_mem_mmap.cmo \ + virt_mem_types.cmo \ + virt_mem_ksyms.cmo \ + virt_mem_kallsyms.cmo \ virt_mem.cmo \ virt_mem_capture.cmo XOBJS = $(OBJS:%.cmo=%.cmx) diff --git a/lib/virt_mem.ml b/lib/virt_mem.ml index d1c4d2b..0a956f1 100644 --- a/lib/virt_mem.ml +++ b/lib/virt_mem.ml @@ -27,10 +27,7 @@ module D = Libvirt.Domain open Virt_mem_gettext.Gettext open Virt_mem_utils -module MMap = Virt_mem_mmap - -let min_kallsyms_tabsize = 1_000L -let max_kallsyms_tabsize = 250_000L +open Virt_mem_types (* Make the kernel size around 16 MB, but just a bit smaller than * maximum string length so we can still run this on a 32 bit platform. @@ -40,25 +37,6 @@ let kernel_size = else 0x100_0000 let max_memory_peek = 65536 (* XXX Use D.max_peek function *) -type ksym = string - -type image = - int option - * string - * Virt_mem_utils.architecture - * ([`Wordsize], [`Endian], [`HasMapping]) Virt_mem_mmap.t - -type image_with_ksyms = - int option - * string - * Virt_mem_utils.architecture - * ([`Wordsize], [`Endian], [`HasMapping]) Virt_mem_mmap.t - * (ksym -> MMap.addr) - -type kallsyms_compr = - | Compressed of (string * MMap.addr) list * MMap.addr - | Uncompressed of (string * MMap.addr) list - (* When tools register themselves, they are added to this list. * Later, we will alphabetize the list. *) @@ -170,7 +148,7 @@ Options:") cmd summary description in (* Now begin proper parsing of the command line arguments. *) let debug = ref false in - let images = ref [] in + let testimages = ref [] in let uri = ref "" in let anon_args = ref [] in @@ -216,9 +194,9 @@ Options:") cmd summary description in (* Handle -t option. *) let memory_image filename = - images := + testimages := (!def_wordsize, !def_endian, !def_architecture, !def_text_addr, filename) - :: !images + :: !testimages in (* Handle --version option. *) @@ -276,7 +254,7 @@ Options:") cmd summary description in (* Parse the command line. This will exit if --version or --help found. *) Arg.parse argspec anon_arg usage_msg; - let images = !images in + let testimages = !testimages in let debug = !debug in let uri = if !uri = "" then None else Some !uri in @@ -311,7 +289,7 @@ Use 'virt-mem --help' for more help or read the manual page virt-mem(1)"); (* Get the kernel images. *) let images = - if images = [] then ( + if testimages = [] then ( let conn = let name = uri in try C.connect_readonly ?name () @@ -425,13 +403,13 @@ Use 'virt-mem --help' for more help or read the manual page virt-mem(1)"); ); (* Map the virtual memory. *) - let mem = MMap.of_string str !def_text_addr in + let mem = Virt_mem_mmap.of_string str !def_text_addr in (* Force the wordsize and endianness. *) - let mem = MMap.set_wordsize mem wordsize in - let mem = MMap.set_endian mem endian in + let mem = Virt_mem_mmap.set_wordsize mem wordsize in + let mem = Virt_mem_mmap.set_endian mem endian in - ((Some id, name, arch, mem) : image) + ((Some id, name, arch, mem) : image0) ) xmls ) else ( (* One or more -t options passed. *) @@ -473,14 +451,14 @@ Use 'virt-mem --help' for more help or read the manual page virt-mem(1)"); (* Map the virtual memory. *) let fd = openfile filename [O_RDONLY] 0 in - let mem = MMap.of_file fd text_addr in + let mem = Virt_mem_mmap.of_file fd text_addr in (* Force the wordsize and endianness. *) - let mem = MMap.set_wordsize mem wordsize in - let mem = MMap.set_endian mem endian in + let mem = Virt_mem_mmap.set_wordsize mem wordsize in + let mem = Virt_mem_mmap.set_endian mem endian in - ((None, filename, arch, mem) : image) - ) images + ((None, filename, arch, mem) : image0) + ) testimages ) in (* Optional callback into the tool before we start looking for @@ -496,400 +474,22 @@ Use 'virt-mem --help' for more help or read the manual page virt-mem(1)"); *) if run = None then exit 0; - (* Now kernel symbol analysis starts ... *) + (* Do the kernel symbol analysis. *) let images = List.map ( - fun (domid, name, arch, mem) -> - (* Look for some common entries in the exported symbol table and - * from that find the symbol table itself. These are just - * supposed to be symbols which are very likely to be present - * in any Linux kernel, although we only need one of them to be - * present to find the symbol table. - * - * NB. Must not be __initdata, must be in EXPORT_SYMBOL. - *) - let common_ksyms = [ - "init_task"; (* first task_struct *) - "root_mountflags"; (* flags for mounting root fs *) - "init_uts_ns"; (* uname strings *) - "sys_open"; (* open(2) entry point *) - "sys_chdir"; (* chdir(2) entry point *) - "sys_chroot"; (* chroot(2) entry point *) - "sys_umask"; (* umask(2) entry point *) - "schedule"; (* scheduler entry point *) - ] in - (* Searching for string *) - let common_ksyms_nul = List.map (sprintf "\000%s\000") common_ksyms in - - let start_t = gettimeofday () in - - (* Search for these strings in the memory image. *) - let ksym_strings = List.map (MMap.find_all mem) common_ksyms_nul in - let ksym_strings = List.concat ksym_strings in - (* Adjust found addresses to start of the string (skip ). *) - let ksym_strings = List.map Int64.succ ksym_strings in - - if debug then ( - let end_t = gettimeofday () in - eprintf "timing: searching for common_ksyms took %f seconds\n%!" - (end_t -. start_t) - ); - - let start_t = gettimeofday () in - - (* For any we found, try to look up the symbol table - * base addr and size. - *) - let ksymtabs = List.map ( - fun addr -> - (* Search for 'addr' appearing in the image. *) - let addrs = MMap.find_pointer_all mem addr in - - (* Now consider each of these addresses and search back - * until we reach the beginning of the (possible) symbol - * table. - * - * Kernel symbol table struct is: - * struct kernel_symbol { - * unsigned long value; - * const char *name; <-- initial pointer - * } symbols[]; - *) - let pred_long2 addr = - MMap.pred_long mem (MMap.pred_long mem addr) - in - let base_addrs = List.map ( - fun addr -> - let rec loop addr = - (* '*addr' should point to a C identifier. If it does, - * step backwards to the previous symbol table entry. - *) - let addrp = MMap.follow_pointer mem addr in - if MMap.is_C_identifier mem addrp then - loop (pred_long2 addr) - else - MMap.succ_long mem addr - in - loop addr - ) addrs in - - (* Also look for the end of the symbol table and - * calculate its size. - *) - let base_addrs_sizes = List.map ( - fun base_addr -> - let rec loop addr = - let addr2 = MMap.succ_long mem addr in - let addr2p = MMap.follow_pointer mem addr2 in - if MMap.is_C_identifier mem addr2p then - loop (MMap.succ_long mem addr2) - else - addr - in - let end_addr = loop base_addr in - base_addr, end_addr -^ base_addr - ) base_addrs in - - base_addrs_sizes - ) ksym_strings in - let ksymtabs = List.concat ksymtabs in - - (* Simply ignore any symbol table candidates which are too small. *) - let ksymtabs = List.filter (fun (_, size) -> size > 64L) ksymtabs in - - if debug then ( - eprintf "%s: candidate symbol tables at:\n" name; - List.iter ( - fun (addr, size) -> - eprintf "\t%Lx\t%Lx\t%!" addr size; - eprintf "first symbol: %s\n%!" - (MMap.get_string mem - (MMap.follow_pointer mem - (MMap.succ_long mem addr))) - ) ksymtabs - ); - - (* Vote for the most popular symbol table candidate and from this - * generate a function to look up ksyms. - *) - let lookup_ksym = - let freqs = frequency ksymtabs in - match freqs with - | [] -> - eprintf (f_"%s: cannot find start of kernel symbol table\n") name; - (fun _ -> raise Not_found) - - | (_, (ksymtab_addr, ksymtab_size)) :: _ -> - if debug then - eprintf - "%s: Kernel symbol table found at %Lx, size %Lx bytes\n%!" - name ksymtab_addr ksymtab_size; - - (* Load the whole symbol table as a bitstring. *) - let ksymtab = - Bitstring.bitstring_of_string - (MMap.get_bytes mem ksymtab_addr - (Int64.to_int ksymtab_size)) in - - (* Function to look up an address in the symbol table. *) - let lookup_ksym sym = - let bits = bits_of_wordsize (MMap.get_wordsize mem) in - let e = MMap.get_endian mem in - let rec loop bs = - bitmatch bs with - | { value : bits : endian(e); - name_ptr : bits : endian(e) } - when MMap.get_string mem name_ptr = sym -> - value - | { _ : bits : endian(e); - _ : bits : endian(e); - bs : -1 : bitstring } -> - loop bs - | { _ } -> raise Not_found - in - loop ksymtab - in - - lookup_ksym - in - - if debug then ( - let end_t = gettimeofday () in - eprintf "timing: searching for ordinary ksyms took %f seconds\n%!" - (end_t -. start_t) - ); - - let start_t = gettimeofday () in - - (* Now try to find the /proc/kallsyms table. This is in an odd - * compressed format (but not a very successful compression - * format). However if it exists we know that it will contain - * addresses of the common ksyms above, and it has some - * characteristics which make it easy to detect in the - * memory. - * - * kallsyms contains a complete list of symbols so is much - * more useful than the basic list of exports. - *) - let ksym_addrs = List.filter_map ( - fun ksym -> try Some (lookup_ksym ksym) with Not_found -> None - ) common_ksyms in + fun image -> - (* Search for those kernel addresses in the image. We're looking - * for the table kallsyms_addresses followed by kallsyms_num_syms - * (number of symbols in the table). - *) - let ksym_addrs = List.map (MMap.find_pointer_all mem) ksym_addrs in - let ksym_addrs = List.concat ksym_addrs in + (* Look for ordinary kernel symbols: *) + let image = Virt_mem_ksyms.find_kernel_symbols debug image in + (* Look for kallsyms: *) + let image = Virt_mem_kallsyms.find_kallsyms debug image in - (* Test each one to see if it's a candidate list of kernel - * addresses followed by length of list. - *) - let kallsymtabs = List.filter_map ( - fun addr -> - (* Search upwards from address until we find the length field. - * If found, jump backwards by length and check all addresses. - *) - if debug then - eprintf "%s: testing candidate kallsyms at %Lx\n" name addr; - let rec loop addr = - let addrp = MMap.follow_pointer mem addr in - if MMap.is_mapped mem addrp then - loop (MMap.succ_long mem addr) (* continue up the table *) - else - if addrp >= min_kallsyms_tabsize && - addrp <= max_kallsyms_tabsize then ( - (* addrp might be the symbol count. Count backwards and - * check the full table. - *) - let num_entries = Int64.to_int addrp in - let entry_size = bytes_of_wordsize (MMap.get_wordsize mem) in - let start_addr = - addr -^ Int64.of_int (entry_size * num_entries) in - let end_addr = addr in - let rec loop2 addr = - if addr < end_addr then ( - let addrp = MMap.follow_pointer mem addr in - if MMap.is_mapped mem addrp then - loop2 (MMap.succ_long mem addr) - else - None (* can't verify the full address table *) - ) else - (* ok! *) - let names_addr = MMap.succ_long mem end_addr in - if debug then - eprintf "%s: candidate kallsyms found at %Lx (names_addr at %Lx, num_entries %d)\n" - name start_addr names_addr num_entries; - Some (start_addr, num_entries, names_addr) - in - loop2 start_addr - ) - else - None (* forget it *) - in - match loop addr with - | None -> None - | Some (start_addr, num_entries, names_addr) -> - (* As an additional verification, check the list of - * kallsyms_names. - *) - try - (* If the first byte is '\000' and is followed by a - * C identifier, then this is old-school list of - * symbols with prefix compression as in 2.6.9. - * Otherwise Huffman-compressed kallsyms as in - * 2.6.25. - *) - if MMap.get_byte mem names_addr = 0 && - MMap.is_C_identifier mem (names_addr+^1L) then ( - let names = ref [] in - let prev = ref "" in - let rec loop names_addr start_addr num = - if num > 0 then ( - let prefix = MMap.get_byte mem names_addr in - let prefix = String.sub !prev 0 prefix in - let name = MMap.get_string mem (names_addr+^1L) in - let len = String.length name in - let name = prefix ^ name in - prev := name; - let names_addr = names_addr +^ Int64.of_int len +^ 2L in - let sym_value = MMap.follow_pointer mem start_addr in - let start_addr = MMap.succ_long mem start_addr in - (*eprintf "%S -> %Lx\n" name sym_value;*) - names := (name, sym_value) :: !names; - loop names_addr start_addr (num-1) - ) - in - loop names_addr start_addr num_entries; - let names = List.rev !names in - - Some (start_addr, num_entries, names_addr, - Uncompressed names) - ) - else ( (* new-style "compressed" names. *) - let compressed_names = ref [] in - let rec loop names_addr start_addr num = - if num > 0 then ( - let len = MMap.get_byte mem names_addr in - let name = MMap.get_bytes mem (names_addr+^1L) len in - let names_addr = names_addr +^ Int64.of_int len +^ 1L in - let sym_value = MMap.follow_pointer mem start_addr in - let start_addr = MMap.succ_long mem start_addr in - compressed_names := - (name, sym_value) :: !compressed_names; - loop names_addr start_addr (num-1) - ) else - names_addr - in - let markers_addr = loop names_addr start_addr num_entries in - let markers_addr = MMap.align mem markers_addr in - let compressed_names = List.rev !compressed_names in - - Some (start_addr, num_entries, names_addr, - Compressed (compressed_names, markers_addr)) - ) - with - Invalid_argument _ -> None (* bad names list *) - ) ksym_addrs in - - if debug then ( - eprintf "%s: candidate kallsyms at:\n" name; - List.iter ( - function - | (start_addr, num_entries, names_addr, Uncompressed _) -> - eprintf "\t%Lx %d entries names_addr=%Lx old-style\n%!" - start_addr num_entries names_addr - | (start_addr, num_entries, names_addr, - Compressed (_, markers_addr)) -> - eprintf "\t%Lx %d entries names_addr=%Lx markers_addr=%Lx\n%!" - start_addr num_entries names_addr markers_addr - ) kallsymtabs - ); - - (* Vote for the most popular symbol table candidate and - * enhance the function for looking up ksyms. - *) - let lookup_ksym = - let freqs = frequency kallsymtabs in - match freqs with - | [] -> - (* Can't find any kallsymtabs, just return the lookup_ksym - * function generated previously from the exported symbols. - *) - lookup_ksym - - | (_, (_, _, _, Uncompressed names)) :: _ -> - let lookup_ksym name = - try (* first look it up in kallsyms table. *) - List.assoc name names - with Not_found -> (* try the old exports table instead *) - lookup_ksym name - in - lookup_ksym - - | (_, (start_addr, num_entries, names_addr, - Compressed (compressed_names, markers_addr))) :: _ -> - (* Skip the markers and look for the token table. *) - let num_markers = Int64.of_int ((num_entries + 255) / 256) in - let marker_size = - Int64.of_int (bytes_of_wordsize (MMap.get_wordsize mem)) in - let tokens_addr = markers_addr +^ marker_size *^ num_markers in - - (* Now read out the compression tokens, which are just - * 256 ASCIIZ strings that map bytes in the compression - * names to substrings. - *) - let tokens = Array.make 256 "" in - let rec loop i addr = - if i < 256 then ( - let str = MMap.get_string mem addr in - let len = String.length str in - let addr = addr +^ Int64.of_int (len+1) in - tokens.(i) <- str; - loop (i+1) addr - ) - in - loop 0 tokens_addr; - - (* Expand the compressed names using the tokens. *) - let names = List.filter_map ( - fun (name, sym_value) -> - let f c = tokens.(Char.code c) in - let name = String.replace_chars f name in - (* First character in uncompressed output is the symbol - * type, eg. 'T'/'t' for text etc. - *) - (* NOTE: Symbol names are NOT unique - * (eg. 'con_start' is both a function and data in - * some kernels). XXX We need to handle this situation - * better. - *) - (*let typ = name.[0] in*) - let name = String.sub name 1 (String.length name - 1) in - (*eprintf "%S -> %Lx\n" name sym_value;*) - Some (name, sym_value) - ) compressed_names in - - let lookup_ksym name = - try (* first look it up in kallsyms table. *) - List.assoc name names - with Not_found -> (* try the old exports table instead *) - lookup_ksym name - in - - lookup_ksym in - - if debug then ( - let end_t = gettimeofday () in - eprintf "timing: searching for kallsyms took %f seconds\n%!" - (end_t -. start_t) - ); - - (* Just wrap the lookup_ksym call in something which prints - * the query when debug is set. + (* Finally, just wrap the lookup_ksym call in something + * which prints the query when debug is set. *) - let lookup_ksym = + let image = if debug then + let (domid, name, arch, mem, lookup_ksym) = image in let lookup_ksym sym = try let value = lookup_ksym sym in @@ -899,12 +499,11 @@ Use 'virt-mem --help' for more help or read the manual page virt-mem(1)"); eprintf "lookup_ksym %S failed\n%!" sym; raise Not_found in - lookup_ksym + (domid, name, arch, mem, lookup_ksym) else - lookup_ksym - in + image in - ((domid, name, arch, mem, lookup_ksym) : image_with_ksyms) + image ) images in (* Run the tool's main function. *) diff --git a/lib/virt_mem.mli b/lib/virt_mem.mli index d0ead0d..eae488e 100644 --- a/lib/virt_mem.mli +++ b/lib/virt_mem.mli @@ -18,40 +18,12 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *) -type ksym = string - (** A kernel symbol name. *) - -type image = - int option - * string - * Virt_mem_utils.architecture - * ([`Wordsize], [`Endian], [`HasMapping]) Virt_mem_mmap.t - (** A memory image from a domain. *) - -type image_with_ksyms = - int option - * string - * Virt_mem_utils.architecture - * ([`Wordsize], [`Endian], [`HasMapping]) Virt_mem_mmap.t - * (ksym -> Virt_mem_mmap.addr) - (** An image after it has been processed to find kernel symbols. - - The tuple fields are: - - domain ID (if known) - - name, usually the domain name - - architecture (eg. I386) - - kernel memory map (wordsize & endianness already determined) - - a function to look up kernel symbols. It raises [Not_found] - if a kernel symbol could not be found or if the kernel symbol - table could not be found at all. - *) - val register : ?external_cmd:bool -> ?extra_args:(Arg.key * Arg.spec * Arg.doc) list -> ?argcheck:(bool -> unit) -> - ?beforeksyms:(bool -> image list -> unit) -> - ?run:(bool -> image_with_ksyms list -> unit) -> + ?beforeksyms:(bool -> Virt_mem_types.image0 list -> unit) -> + ?run:(bool -> Virt_mem_types.image1 list -> unit) -> string -> string -> Arg.usage_msg -> unit (** Tools register themselves with this call. diff --git a/lib/virt_mem_kallsyms.ml b/lib/virt_mem_kallsyms.ml new file mode 100644 index 0000000..87035e8 --- /dev/null +++ b/lib/virt_mem_kallsyms.ml @@ -0,0 +1,276 @@ +(* Memory info command for virtual domains. + (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. + http://libvirt.org/ + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Find kallsyms in a kernel image. + *) + +open Unix +open Printf + +open ExtList +open ExtString + +open Virt_mem_gettext.Gettext +open Virt_mem_utils +open Virt_mem_types + +let min_kallsyms_tabsize = 1_000L +let max_kallsyms_tabsize = 250_000L + +type kallsyms_compr = + | Compressed of (string * Virt_mem_mmap.addr) list * Virt_mem_mmap.addr + | Uncompressed of (string * Virt_mem_mmap.addr) list + +let find_kallsyms debug (domid, name, arch, mem, lookup_ksym) = + let start_t = gettimeofday () in + + (* Now try to find the /proc/kallsyms table. This is in an odd + * compressed format (but not a very successful compression + * format). However if it exists we know that it will contain + * addresses of the ordinary ksyms, and it has some + * characteristics which make it easy to detect in the + * memory. + * + * kallsyms contains a complete list of symbols so is much + * more useful than the basic list of exports. + *) + let ksym_addrs = List.filter_map ( + fun ksym -> try Some (lookup_ksym ksym) with Not_found -> None + ) Virt_mem_ksyms.common_ksyms in + + (* Search for those kernel addresses in the image. We're looking + * for the table kallsyms_addresses followed by kallsyms_num_syms + * (number of symbols in the table). + *) + let ksym_addrs = + List.map (Virt_mem_mmap.find_pointer_all mem) ksym_addrs in + let ksym_addrs = List.concat ksym_addrs in + + (* Test each one to see if it's a candidate list of kernel + * addresses followed by length of list. + *) + let kallsymtabs = List.filter_map ( + fun addr -> + (* Search upwards from address until we find the length field. + * If found, jump backwards by length and check all addresses. + *) + if debug then + eprintf "%s: testing candidate kallsyms at %Lx\n" name addr; + let rec loop addr = + let addrp = Virt_mem_mmap.follow_pointer mem addr in + if Virt_mem_mmap.is_mapped mem addrp then + (* continue up the table *) + loop (Virt_mem_mmap.succ_long mem addr) + else + if addrp >= min_kallsyms_tabsize && + addrp <= max_kallsyms_tabsize then ( + (* addrp might be the symbol count. Count backwards and + * check the full table. + *) + let num_entries = Int64.to_int addrp in + let entry_size = + bytes_of_wordsize (Virt_mem_mmap.get_wordsize mem) in + let start_addr = + addr -^ Int64.of_int (entry_size * num_entries) in + let end_addr = addr in + let rec loop2 addr = + if addr < end_addr then ( + let addrp = Virt_mem_mmap.follow_pointer mem addr in + if Virt_mem_mmap.is_mapped mem addrp then + loop2 (Virt_mem_mmap.succ_long mem addr) + else + None (* can't verify the full address table *) + ) else + (* ok! *) + let names_addr = Virt_mem_mmap.succ_long mem end_addr in + if debug then + eprintf "%s: candidate kallsyms found at %Lx (names_addr at %Lx, num_entries %d)\n" + name start_addr names_addr num_entries; + Some (start_addr, num_entries, names_addr) + in + loop2 start_addr + ) + else + None (* forget it *) + in + match loop addr with + | None -> None + | Some (start_addr, num_entries, names_addr) -> + (* As an additional verification, check the list of + * kallsyms_names. + *) + try + (* If the first byte is '\000' and is followed by a + * C identifier, then this is old-school list of + * symbols with prefix compression as in 2.6.9. + * Otherwise Huffman-compressed kallsyms as in + * 2.6.25. + *) + if Virt_mem_mmap.get_byte mem names_addr = 0 && + Virt_mem_mmap.is_C_identifier mem (names_addr+^1L) then ( + let names = ref [] in + let prev = ref "" in + let rec loop names_addr start_addr num = + if num > 0 then ( + let prefix = Virt_mem_mmap.get_byte mem names_addr in + let prefix = String.sub !prev 0 prefix in + let name = + Virt_mem_mmap.get_string mem (names_addr+^1L) in + let len = String.length name in + let name = prefix ^ name in + prev := name; + let names_addr = names_addr +^ Int64.of_int len +^ 2L in + let sym_value = + Virt_mem_mmap.follow_pointer mem start_addr in + let start_addr = + Virt_mem_mmap.succ_long mem start_addr in + (*eprintf "%S -> %Lx\n" name sym_value;*) + names := (name, sym_value) :: !names; + loop names_addr start_addr (num-1) + ) + in + loop names_addr start_addr num_entries; + let names = List.rev !names in + + Some (start_addr, num_entries, names_addr, + Uncompressed names) + ) + else ( (* new-style "compressed" names. *) + let compressed_names = ref [] in + let rec loop names_addr start_addr num = + if num > 0 then ( + let len = Virt_mem_mmap.get_byte mem names_addr in + let name = + Virt_mem_mmap.get_bytes mem (names_addr+^1L) len in + let names_addr = names_addr +^ Int64.of_int len +^ 1L in + let sym_value = + Virt_mem_mmap.follow_pointer mem start_addr in + let start_addr = + Virt_mem_mmap.succ_long mem start_addr in + compressed_names := + (name, sym_value) :: !compressed_names; + loop names_addr start_addr (num-1) + ) else + names_addr + in + let markers_addr = loop names_addr start_addr num_entries in + let markers_addr = Virt_mem_mmap.align mem markers_addr in + let compressed_names = List.rev !compressed_names in + + Some (start_addr, num_entries, names_addr, + Compressed (compressed_names, markers_addr)) + ) + with + Invalid_argument _ -> None (* bad names list *) + ) ksym_addrs in + + if debug then ( + eprintf "%s: candidate kallsyms at:\n" name; + List.iter ( + function + | (start_addr, num_entries, names_addr, Uncompressed _) -> + eprintf "\t%Lx %d entries names_addr=%Lx old-style\n%!" + start_addr num_entries names_addr + | (start_addr, num_entries, names_addr, + Compressed (_, markers_addr)) -> + eprintf "\t%Lx %d entries names_addr=%Lx markers_addr=%Lx\n%!" + start_addr num_entries names_addr markers_addr + ) kallsymtabs + ); + + (* Vote for the most popular symbol table candidate and + * enhance the function for looking up ksyms. + *) + let lookup_ksym = + let freqs = frequency kallsymtabs in + match freqs with + | [] -> + (* Can't find any kallsymtabs, just return the lookup_ksym + * function generated previously from the exported symbols. + *) + lookup_ksym + + | (_, (_, _, _, Uncompressed names)) :: _ -> + let lookup_ksym name = + try (* first look it up in kallsyms table. *) + List.assoc name names + with Not_found -> (* try the old exports table instead *) + lookup_ksym name + in + lookup_ksym + + | (_, (start_addr, num_entries, names_addr, + Compressed (compressed_names, markers_addr))) :: _ -> + (* Skip the markers and look for the token table. *) + let num_markers = Int64.of_int ((num_entries + 255) / 256) in + let marker_size = + Int64.of_int (bytes_of_wordsize + (Virt_mem_mmap.get_wordsize mem)) in + let tokens_addr = markers_addr +^ marker_size *^ num_markers in + + (* Now read out the compression tokens, which are just + * 256 ASCIIZ strings that map bytes in the compression + * names to substrings. + *) + let tokens = Array.make 256 "" in + let rec loop i addr = + if i < 256 then ( + let str = Virt_mem_mmap.get_string mem addr in + let len = String.length str in + let addr = addr +^ Int64.of_int (len+1) in + tokens.(i) <- str; + loop (i+1) addr + ) + in + loop 0 tokens_addr; + + (* Expand the compressed names using the tokens. *) + let names = List.filter_map ( + fun (name, sym_value) -> + let f c = tokens.(Char.code c) in + let name = String.replace_chars f name in + (* First character in uncompressed output is the symbol + * type, eg. 'T'/'t' for text etc. + *) + (* NOTE: Symbol names are NOT unique + * (eg. 'con_start' is both a function and data in + * some kernels). XXX We need to handle this situation + * better. + *) + (*let typ = name.[0] in*) + let name = String.sub name 1 (String.length name - 1) in + (*eprintf "%S -> %Lx\n" name sym_value;*) + Some (name, sym_value) + ) compressed_names in + + let lookup_ksym name = + try (* first look it up in kallsyms table. *) + List.assoc name names + with Not_found -> (* try the old exports table instead *) + lookup_ksym name + in + + lookup_ksym in + + if debug then ( + let end_t = gettimeofday () in + eprintf "timing: searching for kallsyms took %f seconds\n%!" + (end_t -. start_t) + ); + + ((domid, name, arch, mem, lookup_ksym) : image1) diff --git a/lib/virt_mem_kallsyms.mli b/lib/virt_mem_kallsyms.mli new file mode 100644 index 0000000..7f51831 --- /dev/null +++ b/lib/virt_mem_kallsyms.mli @@ -0,0 +1,24 @@ +(** Find kallsyms in a kernel image. *) +(* Memory info command for virtual domains. + (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. + http://libvirt.org/ + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Find kallsyms in a kernel image. + *) + +val find_kallsyms : bool -> Virt_mem_types.image1 -> Virt_mem_types.image1 +(** Find kallsyms in a kernel image. *) diff --git a/lib/virt_mem_ksyms.ml b/lib/virt_mem_ksyms.ml new file mode 100644 index 0000000..d70ace1 --- /dev/null +++ b/lib/virt_mem_ksyms.ml @@ -0,0 +1,191 @@ +(* Memory info command for virtual domains. + (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. + http://libvirt.org/ + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Ordinary kernel symbol lookups. + *) + +open Unix +open Printf + +open Virt_mem_gettext.Gettext +open Virt_mem_utils +open Virt_mem_types + +(* Look for some common entries in the exported symbol table and + * from that find the symbol table itself. These are just + * supposed to be symbols which are very likely to be present + * in any Linux kernel, although we only need one of them to be + * present to find the symbol table. + * + * NB. Must not be __initdata, must be in EXPORT_SYMBOL. + *) +let common_ksyms = [ + "init_task"; (* first task_struct *) + "root_mountflags"; (* flags for mounting root fs *) + "init_uts_ns"; (* uname strings *) + "sys_open"; (* open(2) entry point *) + "sys_chdir"; (* chdir(2) entry point *) + "sys_chroot"; (* chroot(2) entry point *) + "sys_umask"; (* umask(2) entry point *) + "schedule"; (* scheduler entry point *) +] + +let find_kernel_symbols debug (domid, name, arch, mem) = + (* Searching for string *) + let common_ksyms_nul = List.map (sprintf "\000%s\000") common_ksyms in + + let start_t = gettimeofday () in + + (* Search for these strings in the memory image. *) + let ksym_strings = + List.map (Virt_mem_mmap.find_all mem) common_ksyms_nul in + let ksym_strings = List.concat ksym_strings in + (* Adjust found addresses to start of the string (skip ). *) + let ksym_strings = List.map Int64.succ ksym_strings in + + if debug then ( + let end_t = gettimeofday () in + eprintf "timing: searching for common_ksyms took %f seconds\n%!" + (end_t -. start_t) + ); + + let start_t = gettimeofday () in + + (* For any we found, try to look up the symbol table + * base addr and size. + *) + let ksymtabs = List.map ( + fun addr -> + (* Search for 'addr' appearing in the image. *) + let addrs = Virt_mem_mmap.find_pointer_all mem addr in + + (* Now consider each of these addresses and search back + * until we reach the beginning of the (possible) symbol + * table. + * + * Kernel symbol table struct is: + * struct kernel_symbol { + * unsigned long value; + * const char *name; <-- initial pointer + * } symbols[]; + *) + let pred_long2 addr = + Virt_mem_mmap.pred_long mem (Virt_mem_mmap.pred_long mem addr) + in + let base_addrs = List.map ( + fun addr -> + let rec loop addr = + (* '*addr' should point to a C identifier. If it does, + * step backwards to the previous symbol table entry. + *) + let addrp = Virt_mem_mmap.follow_pointer mem addr in + if Virt_mem_mmap.is_C_identifier mem addrp then + loop (pred_long2 addr) + else + Virt_mem_mmap.succ_long mem addr + in + loop addr + ) addrs in + + (* Also look for the end of the symbol table and + * calculate its size. + *) + let base_addrs_sizes = List.map ( + fun base_addr -> + let rec loop addr = + let addr2 = Virt_mem_mmap.succ_long mem addr in + let addr2p = Virt_mem_mmap.follow_pointer mem addr2 in + if Virt_mem_mmap.is_C_identifier mem addr2p then + loop (Virt_mem_mmap.succ_long mem addr2) + else + addr + in + let end_addr = loop base_addr in + base_addr, end_addr -^ base_addr + ) base_addrs in + + base_addrs_sizes + ) ksym_strings in + let ksymtabs = List.concat ksymtabs in + + (* Simply ignore any symbol table candidates which are too small. *) + let ksymtabs = List.filter (fun (_, size) -> size > 64L) ksymtabs in + + if debug then ( + eprintf "%s: candidate symbol tables at:\n" name; + List.iter ( + fun (addr, size) -> + eprintf "\t%Lx\t%Lx\t%!" addr size; + eprintf "first symbol: %s\n%!" + (Virt_mem_mmap.get_string mem + (Virt_mem_mmap.follow_pointer mem + (Virt_mem_mmap.succ_long mem addr))) + ) ksymtabs + ); + + (* Vote for the most popular symbol table candidate and from this + * generate a function to look up ksyms. + *) + let lookup_ksym = + let freqs = frequency ksymtabs in + match freqs with + | [] -> + eprintf (f_"%s: cannot find start of kernel symbol table\n") name; + (fun _ -> raise Not_found) + + | (_, (ksymtab_addr, ksymtab_size)) :: _ -> + if debug then + eprintf + "%s: Kernel symbol table found at %Lx, size %Lx bytes\n%!" + name ksymtab_addr ksymtab_size; + + (* Load the whole symbol table as a bitstring. *) + let ksymtab = + Bitstring.bitstring_of_string + (Virt_mem_mmap.get_bytes mem ksymtab_addr + (Int64.to_int ksymtab_size)) in + + (* Function to look up an address in the symbol table. *) + let lookup_ksym sym = + let bits = bits_of_wordsize (Virt_mem_mmap.get_wordsize mem) in + let e = Virt_mem_mmap.get_endian mem in + let rec loop bs = + bitmatch bs with + | { value : bits : endian(e); + name_ptr : bits : endian(e) } + when Virt_mem_mmap.get_string mem name_ptr = sym -> + value + | { _ : bits : endian(e); + _ : bits : endian(e); + bs : -1 : bitstring } -> + loop bs + | { _ } -> raise Not_found + in + loop ksymtab + in + + lookup_ksym + in + + if debug then ( + let end_t = gettimeofday () in + eprintf "timing: searching for ordinary ksyms took %f seconds\n%!" + (end_t -. start_t) + ); + + ((domid, name, arch, mem, lookup_ksym) : image1) diff --git a/lib/virt_mem_ksyms.mli b/lib/virt_mem_ksyms.mli new file mode 100644 index 0000000..fb00bb3 --- /dev/null +++ b/lib/virt_mem_ksyms.mli @@ -0,0 +1,28 @@ +(** Ordinary kernel symbol lookups. *) +(* Memory info command for virtual domains. + (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. + http://libvirt.org/ + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Ordinary kernel symbol lookups. + *) + +val common_ksyms : Virt_mem_types.ksym list +(** The list of "common" kernel symbols which we expect to be present + in almost any Linux kernel. *) + +val find_kernel_symbols : bool -> Virt_mem_types.image0 -> Virt_mem_types.image1 +(** Find ordinary kernel symbols in a kernel image. *) diff --git a/lib/virt_mem_types.ml b/lib/virt_mem_types.ml new file mode 100644 index 0000000..2f8b329 --- /dev/null +++ b/lib/virt_mem_types.ml @@ -0,0 +1,39 @@ +(** Common types. *) +(* Memory info command for virtual domains. + (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. + http://libvirt.org/ + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Common types. + *) + +(** A kernel image. *) +type image0 = + int option (* Domain ID, if known. *) + * string (* Domain name. *) + * Virt_mem_utils.architecture (* Architecture, eg. i386. *) + * ([`Wordsize], [`Endian], [`HasMapping]) Virt_mem_mmap.t (* Memory map. *) + +(** A kernel symbol. *) +type ksym = string + +(** A kernel image, after finding kernel symbols. *) +type image1 = + int option (* Domain ID, if known. *) + * string (* Domain name. *) + * Virt_mem_utils.architecture (* Architecture, eg. i386. *) + * ([`Wordsize], [`Endian], [`HasMapping]) Virt_mem_mmap.t (* Memory map. *) + * (ksym -> Virt_mem_mmap.addr) (* Kernel symbol lookup function. *) diff --git a/lib/virt_mem_utils.ml b/lib/virt_mem_utils.ml index 8eb312a..0a3b687 100644 --- a/lib/virt_mem_utils.ml +++ b/lib/virt_mem_utils.ml @@ -1,3 +1,4 @@ +(** Common and utility functions. *) (* Memory info command for virtual domains. (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. http://libvirt.org/ @@ -78,9 +79,8 @@ let bits_of_wordsize = function let bytes_of_wordsize = function | W32 -> 4 | W64 -> 8 -(* Returns (count, value) in order of highest frequency occurring in the - * list. - *) +(** Returns (count, value) in order of highest frequency occurring in the + list. *) let frequency xs = let xs = List.sort compare xs in let rec loop = function @@ -96,6 +96,7 @@ let frequency xs = let xs = loop xs in List.rev (List.sort compare xs) +(** Like the Unix uniq(1) command. *) let rec uniq ?(cmp = Pervasives.compare) = function | [] -> [] | [x] -> [x] @@ -104,25 +105,25 @@ let rec uniq ?(cmp = Pervasives.compare) = function | x :: y :: xs -> x :: uniq (y :: xs) +(** Like the Unix pipeline 'sort|uniq'. *) let sort_uniq ?cmp xs = let xs = ExtList.List.sort ?cmp xs in let xs = uniq ?cmp xs in xs -(* Pad a string to a fixed width (from virt-top, but don't truncate). *) +(** Pad a string to a fixed width (from virt-top, but don't truncate). *) let pad width str = let n = String.length str in if n >= width then str else (* if n < width then *) str ^ String.make (width-n) ' ' -(* General binary tree type. Data 'a is stored in the leaves and 'b - * is stored in the nodes. - *) +(** General binary tree type. Data 'a is stored in the leaves and 'b + is stored in the nodes. *) type ('a,'b) binary_tree = | Leaf of 'a | Node of ('a,'b) binary_tree * 'b * ('a,'b) binary_tree -(* This prints out the binary tree in graphviz dot format. *) +(** Print out the binary tree in graphviz dot format. *) let print_binary_tree leaf_printer node_printer tree = (* Assign a unique, fixed label to each node. *) let label =