X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=lib%2Fvirt_mem.ml;h=9a00ead4cd4bfe4a9f377245c37231fa7d84cbde;hb=c2e9b17c873166f733fda12da1f1a44d3d64755e;hp=8a835ec2427a525f446664f8716e6a81109bd509;hpb=cb0cb33ae593436fd984c105cf5682155704e39f;p=virt-mem.git diff --git a/lib/virt_mem.ml b/lib/virt_mem.ml index 8a835ec..9a00ead 100644 --- a/lib/virt_mem.ml +++ b/lib/virt_mem.ml @@ -20,22 +20,36 @@ open Unix open Printf open ExtList +open ExtString + +module C = Libvirt.Connect +module D = Libvirt.Domain open Virt_mem_gettext.Gettext open Virt_mem_utils module MMap = Virt_mem_mmap +let min_kallsyms_tabsize = 1_000L +let max_kallsyms_tabsize = 250_000L + +let kernel_size = 0x100_0000 +let max_memory_peek = 0x1_000 + type ksym = string type image = string * Virt_mem_utils.architecture * ([`Wordsize], [`Endian]) Virt_mem_mmap.t - * (ksym -> Virt_mem_mmap.addr) + * (ksym -> MMap.addr) + +type kallsyms_compr = + | Compressed of (string * MMap.addr) list * MMap.addr + | Uncompressed of (string * MMap.addr) list let start usage_msg = - (* Verbose messages. *) - let verbose = ref false in + (* Debug messages. *) + let debug = ref false in (* Default wordsize. *) let def_wordsize = ref None in @@ -79,6 +93,7 @@ let start usage_msg = (* List of kernel images. *) let images = ref [] in + let uri = ref "" in let memory_image filename = images := @@ -86,19 +101,35 @@ let start usage_msg = :: !images in + let version () = + printf "virt-mem %s\n" Virt_mem_version.version; + + let major, minor, release = + let v, _ = Libvirt.get_version () in + v / 1_000_000, (v / 1_000) mod 1_000, v mod 1_000 in + printf "libvirt %d.%d.%d\n" major minor release; + exit 0 + in + let argspec = Arg.align [ "-A", Arg.String set_architecture, - "arch " ^ s_"Set kernel architecture, endianness and word size"; + "arch " ^ s_"Set kernel architecture, endianness and word size"; "-E", Arg.String set_endian, - "endian " ^ s_"Set kernel endianness"; + "endian " ^ s_"Set kernel endianness"; "-T", Arg.String set_text_addr, - "addr " ^ s_"Set kernel text address"; + "addr " ^ s_"Set kernel text address"; "-W", Arg.String set_wordsize, - "addr " ^ s_"Set kernel word size"; + "addr " ^ s_"Set kernel word size"; + "-c", Arg.Set_string uri, + "uri " ^ s_ "Connect to URI"; + "--connect", Arg.Set_string uri, + "uri " ^ s_ "Connect to URI"; + "--debug", Arg.Set debug, + " " ^ s_"Debug mode (default: false)"; "-t", Arg.String memory_image, - "image " ^ s_"Use saved kernel memory image"; - "-verbose", Arg.Set verbose, - " " ^ s_"Verbose messages"; + "image " ^ s_"Use saved kernel memory image"; + "--version", Arg.Unit version, + " " ^ s_"Display version and exit"; ] in let anon_fun str = @@ -107,14 +138,104 @@ let start usage_msg = Arg.parse argspec anon_fun usage_msg; let images = !images in - let verbose = !verbose in + let debug = !debug in + let uri = if !uri = "" then None else Some !uri in (* Get the kernel images. *) let images = - if images = [] then - (* XXX use libvirt to get images *) - failwith "libvirt: not yet implemented" - else + if images = [] then ( + let conn = + let name = uri in + try C.connect_readonly ?name () + with Libvirt.Virterror err -> + prerr_endline (Libvirt.Virterror.to_string err); + (* If non-root and no explicit connection URI, print a warning. *) + if Unix.geteuid () <> 0 && name = None then ( + print_endline (s_ "NB: If you want to monitor a local Xen hypervisor, you usually need to be root"); + ); + exit 1 in + + (* List of active domains. *) + let doms = + let nr_active_doms = C.num_of_domains conn in + let active_doms = + Array.to_list (C.list_domains conn nr_active_doms) in + let active_doms = + List.map (D.lookup_by_id conn) active_doms in + active_doms in + + (* Get their XML. *) + let xmls = List.map (fun dom -> dom, D.get_xml_desc dom) doms in + + (* Parse the XML. *) + let xmls = List.map (fun (dom, xml) -> + dom, Xml.parse_string xml) xmls in + + (* XXX Do something with the XML XXX + * such as detecting arch, wordsize, endianness. + * XXXXXXXXXXXXXX + * + * + * + *) + + + List.map ( + fun (dom, _) -> + let name = D.get_name dom in + + let wordsize = + match !def_wordsize with + | None -> + failwith + (sprintf (f_"%s: use -W to define word size for this image") + name); + | Some ws -> ws in + let endian = + match !def_endian with + | None -> + failwith + (sprintf (f_"%s: use -E to define endianness for this image") + name); + | Some e -> e in + + let arch = + match !def_architecture with + | Some I386 -> I386 | Some X86_64 -> X86_64 + | _ -> + failwith + (sprintf (f_"%s: use -A to define architecture (i386/x86-64 only) for this image") name) in + + if !def_text_addr = 0L then + failwith + (sprintf (f_"%s: use -T to define kernel load address for this image") + name); + + (* Read the kernel memory. + * Maximum 64K can be read over remote connections. + *) + let str = String.create kernel_size in + let rec loop i = + let remaining = kernel_size - i in + if remaining > 0 then ( + let size = min remaining max_memory_peek in + D.memory_peek dom [D.Virtual] + (!def_text_addr +^ Int64.of_int i) size str i; + loop (i + size) + ) + in + loop 0; + + (* Map the virtual memory. *) + let mem = MMap.of_string str !def_text_addr in + + (* Force the wordsize and endianness. *) + let mem = MMap.set_wordsize mem wordsize in + let mem = MMap.set_endian mem endian in + + (name, arch, mem) + ) xmls + ) else List.map ( fun (wordsize, endian, arch, text_addr, filename) -> (* Quite a lot of limitations on the kernel images we can @@ -162,13 +283,13 @@ let start usage_msg = let images = List.map ( fun (name, arch, mem) -> - (* Look for some common entries in the symbol table and from - * that find the symbol table itself. These are just supposed to - * be symbols which are very likely to be present in any Linux - * kernel, although we only need one of them to be present to - * find the symbol table. + (* Look for some common entries in the exported symbol table and + * from that find the symbol table itself. These are just + * supposed to be symbols which are very likely to be present + * in any Linux kernel, although we only need one of them to be + * present to find the symbol table. * - * NB. Must not be __initdata. + * NB. Must not be __initdata, must be in EXPORT_SYMBOL. *) let common_ksyms = [ "init_task"; (* first task_struct *) @@ -181,10 +302,10 @@ let start usage_msg = "schedule"; (* scheduler entry point *) ] in (* Searching for string *) - let common_ksyms = List.map (sprintf "\000%s\000") common_ksyms in + let common_ksyms_nul = List.map (sprintf "\000%s\000") common_ksyms in (* Search for these strings in the memory image. *) - let ksym_strings = List.map (MMap.find_all mem) common_ksyms in + let ksym_strings = List.map (MMap.find_all mem) common_ksyms_nul in let ksym_strings = List.concat ksym_strings in (* Adjust found addresses to start of the string (skip ). *) let ksym_strings = List.map Int64.succ ksym_strings in @@ -249,8 +370,8 @@ let start usage_msg = (* Simply ignore any symbol table candidates which are too small. *) let ksymtabs = List.filter (fun (_, size) -> size > 64L) ksymtabs in - if verbose then ( - printf "name %s:\n" name; + if debug then ( + printf "%s: candidate symbol tables at:\n" name; List.iter ( fun (addr, size) -> printf "\t%Lx\t%Lx\t%!" addr size; @@ -272,7 +393,7 @@ let start usage_msg = (fun _ -> raise Not_found) | (_, (ksymtab_addr, ksymtab_size)) :: _ -> - if verbose then + if debug then printf "%s: Kernel symbol table found at %Lx, size %Lx bytes\n%!" name ksymtab_addr ksymtab_size; @@ -304,7 +425,245 @@ let start usage_msg = lookup_ksym in + + (* Now try to find the /proc/kallsyms table. This is in an odd + * compressed format (but not a very successful compression + * format). However if it exists we know that it will contain + * addresses of the common ksyms above, and it has some + * characteristics which make it easy to detect in the + * memory. + * + * kallsyms contains a complete list of symbols so is much + * more useful than the basic list of exports. + *) + let ksym_addrs = List.filter_map ( + fun ksym -> try Some (lookup_ksym ksym) with Not_found -> None + ) common_ksyms in + + (* Search for those kernel addresses in the image. We're looking + * for the table kallsyms_addresses followed by kallsyms_num_syms + * (number of symbols in the table). + *) + let ksym_addrs = List.map (MMap.find_pointer_all mem) ksym_addrs in + let ksym_addrs = List.concat ksym_addrs in + + (* Test each one to see if it's a candidate list of kernel + * addresses followed by length of list. + *) + let kallsymtabs = List.filter_map ( + fun addr -> + (* Search upwards from address until we find the length field. + * If found, jump backwards by length and check all addresses. + *) + if debug then + printf "%s: testing candidate kallsyms at %Lx\n" name addr; + let rec loop addr = + let addrp = MMap.follow_pointer mem addr in + if MMap.is_mapped mem addrp then + loop (MMap.succ_long mem addr) (* continue up the table *) + else + if addrp >= min_kallsyms_tabsize && + addrp <= max_kallsyms_tabsize then ( + (* addrp might be the symbol count. Count backwards and + * check the full table. + *) + let num_entries = Int64.to_int addrp in + let entry_size = bytes_of_wordsize (MMap.get_wordsize mem) in + let start_addr = + addr -^ Int64.of_int (entry_size * num_entries) in + let end_addr = addr in + let rec loop2 addr = + if addr < end_addr then ( + let addrp = MMap.follow_pointer mem addr in + if MMap.is_mapped mem addrp then + loop2 (MMap.succ_long mem addr) + else + None (* can't verify the full address table *) + ) else + (* ok! *) + let names_addr = MMap.succ_long mem end_addr in + if debug then + printf "%s: candidate kallsyms found at %Lx (names_addr at %Lx, num_entries %d)\n" + name start_addr names_addr num_entries; + Some (start_addr, num_entries, names_addr) + in + loop2 start_addr + ) + else + None (* forget it *) + in + match loop addr with + | None -> None + | Some (start_addr, num_entries, names_addr) -> + (* As an additional verification, check the list of + * kallsyms_names. + *) + try + (* If the first byte is '\000' and is followed by a + * C identifier, then this is old-school list of + * symbols with prefix compression as in 2.6.9. + * Otherwise Huffman-compressed kallsyms as in + * 2.6.25. + *) + if MMap.get_byte mem names_addr = 0 && + MMap.is_C_identifier mem (names_addr+^1L) then ( + let names = ref [] in + let prev = ref "" in + let rec loop names_addr start_addr num = + if num > 0 then ( + let prefix = MMap.get_byte mem names_addr in + let prefix = String.sub !prev 0 prefix in + let name = MMap.get_string mem (names_addr+^1L) in + let len = String.length name in + let name = prefix ^ name in + prev := name; + let names_addr = names_addr +^ Int64.of_int len +^ 2L in + let sym_value = MMap.follow_pointer mem start_addr in + let start_addr = MMap.succ_long mem start_addr in + (*printf "%S -> %Lx\n" name sym_value;*) + names := (name, sym_value) :: !names; + loop names_addr start_addr (num-1) + ) + in + loop names_addr start_addr num_entries; + let names = List.rev !names in + + Some (start_addr, num_entries, names_addr, + Uncompressed names) + ) + else ( (* new-style "compressed" names. *) + let compressed_names = ref [] in + let rec loop names_addr start_addr num = + if num > 0 then ( + let len = MMap.get_byte mem names_addr in + let name = MMap.get_bytes mem (names_addr+^1L) len in + let names_addr = names_addr +^ Int64.of_int len +^ 1L in + let sym_value = MMap.follow_pointer mem start_addr in + let start_addr = MMap.succ_long mem start_addr in + compressed_names := + (name, sym_value) :: !compressed_names; + loop names_addr start_addr (num-1) + ) else + names_addr + in + let markers_addr = loop names_addr start_addr num_entries in + let markers_addr = MMap.align mem markers_addr in + let compressed_names = List.rev !compressed_names in + + Some (start_addr, num_entries, names_addr, + Compressed (compressed_names, markers_addr)) + ) + with + Invalid_argument _ -> None (* bad names list *) + ) ksym_addrs in + + if debug then ( + printf "%s: candidate kallsyms at:\n" name; + List.iter ( + function + | (start_addr, num_entries, names_addr, Uncompressed _) -> + printf "\t%Lx %d entries names_addr=%Lx old-style\n%!" + start_addr num_entries names_addr + | (start_addr, num_entries, names_addr, + Compressed (_, markers_addr)) -> + printf "\t%Lx %d entries names_addr=%Lx markers_addr=%Lx\n%!" + start_addr num_entries names_addr markers_addr + ) kallsymtabs + ); + + (* Vote for the most popular symbol table candidate and + * enhance the function for looking up ksyms. + *) + let lookup_ksym = + let freqs = frequency kallsymtabs in + match freqs with + | [] -> + (* Can't find any kallsymtabs, just return the lookup_ksym + * function generated previously from the exported symbols. + *) + lookup_ksym + + | (_, (_, _, _, Uncompressed names)) :: _ -> + let lookup_ksym name = + try (* first look it up in kallsyms table. *) + List.assoc name names + with Not_found -> (* try the old exports table instead *) + lookup_ksym name + in + lookup_ksym + + | (_, (start_addr, num_entries, names_addr, + Compressed (compressed_names, markers_addr))) :: _ -> + (* Skip the markers and look for the token table. *) + let num_markers = Int64.of_int ((num_entries + 255) / 256) in + let marker_size = + Int64.of_int (bytes_of_wordsize (MMap.get_wordsize mem)) in + let tokens_addr = markers_addr +^ marker_size *^ num_markers in + + (* Now read out the compression tokens, which are just + * 256 ASCIIZ strings that map bytes in the compression + * names to substrings. + *) + let tokens = Array.make 256 "" in + let rec loop i addr = + if i < 256 then ( + let str = MMap.get_string mem addr in + let len = String.length str in + let addr = addr +^ Int64.of_int (len+1) in + tokens.(i) <- str; + loop (i+1) addr + ) + in + loop 0 tokens_addr; + + (* Expand the compressed names using the tokens. *) + let names = List.filter_map ( + fun (name, sym_value) -> + let f c = tokens.(Char.code c) in + let name = String.replace_chars f name in + (* First character in uncompressed output is the symbol + * type, eg. 'T'/'t' for text etc. + *) + (* NOTE: Symbol names are NOT unique + * (eg. 'con_start' is both a function and data in + * some kernels). XXX We need to handle this situation + * better. + *) + (*let typ = name.[0] in*) + let name = String.sub name 1 (String.length name - 1) in + (*printf "%S -> %Lx\n" name sym_value;*) + Some (name, sym_value) + ) compressed_names in + + let lookup_ksym name = + try (* first look it up in kallsyms table. *) + List.assoc name names + with Not_found -> (* try the old exports table instead *) + lookup_ksym name + in + + lookup_ksym in + + (* Just wrap the lookup_ksym call in something which prints + * the query when debug is set. + *) + let lookup_ksym = + if debug then + let lookup_ksym sym = + try + let value = lookup_ksym sym in + printf "lookup_ksym %S = %Lx\n%!" sym value; + value + with Not_found -> + printf "lookup_ksym %S failed\n%!" sym; + raise Not_found + in + lookup_ksym + else + lookup_ksym + in + ((name, arch, mem, lookup_ksym) : image) ) images in - verbose, images + debug, images