X-Git-Url: http://git.annexia.org/?p=virt-mem.git;a=blobdiff_plain;f=lib%2Fvirt_mem.ml;h=bcee6696b68f8da3300a498e1ff89eb393123e8d;hp=8a835ec2427a525f446664f8716e6a81109bd509;hb=8e753ba58844f0188306fd0a468fab899bd4308d;hpb=cb0cb33ae593436fd984c105cf5682155704e39f diff --git a/lib/virt_mem.ml b/lib/virt_mem.ml index 8a835ec..bcee669 100644 --- a/lib/virt_mem.ml +++ b/lib/virt_mem.ml @@ -20,18 +20,26 @@ open Unix open Printf open ExtList +open ExtString open Virt_mem_gettext.Gettext open Virt_mem_utils module MMap = Virt_mem_mmap +let min_kallsyms_tabsize = 1_000L +let max_kallsyms_tabsize = 250_000L + type ksym = string type image = string * Virt_mem_utils.architecture * ([`Wordsize], [`Endian]) Virt_mem_mmap.t - * (ksym -> Virt_mem_mmap.addr) + * (ksym -> MMap.addr) + +type kallsyms_compr = + | Compressed of (string * MMap.addr) list * MMap.addr + | Uncompressed of (string * MMap.addr) list let start usage_msg = (* Verbose messages. *) @@ -162,13 +170,13 @@ let start usage_msg = let images = List.map ( fun (name, arch, mem) -> - (* Look for some common entries in the symbol table and from - * that find the symbol table itself. These are just supposed to - * be symbols which are very likely to be present in any Linux - * kernel, although we only need one of them to be present to - * find the symbol table. + (* Look for some common entries in the exported symbol table and + * from that find the symbol table itself. These are just + * supposed to be symbols which are very likely to be present + * in any Linux kernel, although we only need one of them to be + * present to find the symbol table. * - * NB. Must not be __initdata. + * NB. Must not be __initdata, must be in EXPORT_SYMBOL. *) let common_ksyms = [ "init_task"; (* first task_struct *) @@ -181,10 +189,10 @@ let start usage_msg = "schedule"; (* scheduler entry point *) ] in (* Searching for string *) - let common_ksyms = List.map (sprintf "\000%s\000") common_ksyms in + let common_ksyms_nul = List.map (sprintf "\000%s\000") common_ksyms in (* Search for these strings in the memory image. *) - let ksym_strings = List.map (MMap.find_all mem) common_ksyms in + let ksym_strings = List.map (MMap.find_all mem) common_ksyms_nul in let ksym_strings = List.concat ksym_strings in (* Adjust found addresses to start of the string (skip ). *) let ksym_strings = List.map Int64.succ ksym_strings in @@ -250,7 +258,7 @@ let start usage_msg = let ksymtabs = List.filter (fun (_, size) -> size > 64L) ksymtabs in if verbose then ( - printf "name %s:\n" name; + printf "%s: candidate symbol tables at:\n" name; List.iter ( fun (addr, size) -> printf "\t%Lx\t%Lx\t%!" addr size; @@ -304,6 +312,244 @@ let start usage_msg = lookup_ksym in + + (* Now try to find the /proc/kallsyms table. This is in an odd + * compressed format (but not a very successful compression + * format). However if it exists we know that it will contain + * addresses of the common ksyms above, and it has some + * characteristics which make it easy to detect in the + * memory. + * + * kallsyms contains a complete list of symbols so is much + * more useful than the basic list of exports. + *) + let ksym_addrs = List.filter_map ( + fun ksym -> try Some (lookup_ksym ksym) with Not_found -> None + ) common_ksyms in + + (* Search for those kernel addresses in the image. We're looking + * for the table kallsyms_addresses followed by kallsyms_num_syms + * (number of symbols in the table). + *) + let ksym_addrs = List.map (MMap.find_pointer_all mem) ksym_addrs in + let ksym_addrs = List.concat ksym_addrs in + + (* Test each one to see if it's a candidate list of kernel + * addresses followed by length of list. + *) + let kallsymtabs = List.filter_map ( + fun addr -> + (* Search upwards from address until we find the length field. + * If found, jump backwards by length and check all addresses. + *) + if verbose then + printf "%s: testing candidate kallsyms at %Lx\n" name addr; + let rec loop addr = + let addrp = MMap.follow_pointer mem addr in + if MMap.is_mapped mem addrp then + loop (MMap.succ_long mem addr) (* continue up the table *) + else + if addrp >= min_kallsyms_tabsize && + addrp <= max_kallsyms_tabsize then ( + (* addrp might be the symbol count. Count backwards and + * check the full table. + *) + let num_entries = Int64.to_int addrp in + let entry_size = bytes_of_wordsize (MMap.get_wordsize mem) in + let start_addr = + addr -^ Int64.of_int (entry_size * num_entries) in + let end_addr = addr in + let rec loop2 addr = + if addr < end_addr then ( + let addrp = MMap.follow_pointer mem addr in + if MMap.is_mapped mem addrp then + loop2 (MMap.succ_long mem addr) + else + None (* can't verify the full address table *) + ) else + (* ok! *) + let names_addr = MMap.succ_long mem end_addr in + if verbose then + printf "%s: candidate kallsyms found at %Lx (names_addr at %Lx, num_entries %d)\n" + name start_addr names_addr num_entries; + Some (start_addr, num_entries, names_addr) + in + loop2 start_addr + ) + else + None (* forget it *) + in + match loop addr with + | None -> None + | Some (start_addr, num_entries, names_addr) -> + (* As an additional verification, check the list of + * kallsyms_names. + *) + try + (* If the first byte is '\000' and is followed by a + * C identifier, then this is old-school list of + * symbols with prefix compression as in 2.6.9. + * Otherwise Huffman-compressed kallsyms as in + * 2.6.25. + *) + if MMap.get_byte mem names_addr = 0 && + MMap.is_C_identifier mem (names_addr+^1L) then ( + let names = ref [] in + let prev = ref "" in + let rec loop names_addr start_addr num = + if num > 0 then ( + let prefix = MMap.get_byte mem names_addr in + let prefix = String.sub !prev 0 prefix in + let name = MMap.get_string mem (names_addr+^1L) in + let len = String.length name in + let name = prefix ^ name in + prev := name; + let names_addr = names_addr +^ Int64.of_int len +^ 2L in + let sym_value = MMap.follow_pointer mem start_addr in + let start_addr = MMap.succ_long mem start_addr in + (*printf "%S -> %Lx\n" name sym_value;*) + names := (name, sym_value) :: !names; + loop names_addr start_addr (num-1) + ) + in + loop names_addr start_addr num_entries; + let names = List.rev !names in + + Some (start_addr, num_entries, names_addr, + Uncompressed names) + ) + else ( (* new-style "compressed" names. *) + let compressed_names = ref [] in + let rec loop names_addr start_addr num = + if num > 0 then ( + let len = MMap.get_byte mem names_addr in + let name = MMap.get_bytes mem (names_addr+^1L) len in + let names_addr = names_addr +^ Int64.of_int len +^ 1L in + let sym_value = MMap.follow_pointer mem start_addr in + let start_addr = MMap.succ_long mem start_addr in + compressed_names := + (name, sym_value) :: !compressed_names; + loop names_addr start_addr (num-1) + ) else + names_addr + in + let markers_addr = loop names_addr start_addr num_entries in + let markers_addr = MMap.align mem markers_addr in + let compressed_names = List.rev !compressed_names in + + Some (start_addr, num_entries, names_addr, + Compressed (compressed_names, markers_addr)) + ) + with + Invalid_argument _ -> None (* bad names list *) + ) ksym_addrs in + + if verbose then ( + printf "%s: candidate kallsyms at:\n" name; + List.iter ( + function + | (start_addr, num_entries, names_addr, Uncompressed _) -> + printf "\t%Lx %d entries names_addr=%Lx old-style\n%!" + start_addr num_entries names_addr + | (start_addr, num_entries, names_addr, + Compressed (_, markers_addr)) -> + printf "\t%Lx %d entries names_addr=%Lx markers_addr=%Lx\n%!" + start_addr num_entries names_addr markers_addr + ) kallsymtabs + ); + + (* Vote for the most popular symbol table candidate and + * enhance the function for looking up ksyms. + *) + let lookup_ksym = + let freqs = frequency kallsymtabs in + match freqs with + | [] -> + (* Can't find any kallsymtabs, just return the lookup_ksym + * function generated previously from the exported symbols. + *) + lookup_ksym + + | (_, (_, _, _, Uncompressed names)) :: _ -> + let lookup_ksym name = + try (* first look it up in kallsyms table. *) + List.assoc name names + with Not_found -> (* try the old exports table instead *) + lookup_ksym name + in + lookup_ksym + + | (_, (start_addr, num_entries, names_addr, + Compressed (compressed_names, markers_addr))) :: _ -> + (* Skip the markers and look for the token table. *) + let num_markers = Int64.of_int ((num_entries + 255) / 256) in + let marker_size = + Int64.of_int (bytes_of_wordsize (MMap.get_wordsize mem)) in + let tokens_addr = markers_addr +^ marker_size *^ num_markers in + + (* Now read out the compression tokens, which are just + * 256 ASCIIZ strings that map bytes in the compression + * names to substrings. + *) + let tokens = Array.make 256 "" in + let rec loop i addr = + if i < 256 then ( + let str = MMap.get_string mem addr in + let len = String.length str in + let addr = addr +^ Int64.of_int (len+1) in + tokens.(i) <- str; + loop (i+1) addr + ) + in + loop 0 tokens_addr; + + (* Expand the compressed names using the tokens. *) + let names = List.filter_map ( + fun (name, sym_value) -> + let f c = tokens.(Char.code c) in + let name' = String.replace_chars f name in + (*printf "%S -> %S\n" name name';*) + (* First character in uncompressed output is the symbol + * type, eg. 'T'/'t' for text etc. Since we will never + * be using functions, and since some functions (eg. + * con_start) overlap with data symbols, drop functions. + *) + let typ = name'.[0] in + if typ = 't' || typ = 'T' then None + else ( + let name' = String.sub name' 1 (String.length name' - 1) in + Some (name', sym_value) + ) + ) compressed_names in + + let lookup_ksym name = + try (* first look it up in kallsyms table. *) + List.assoc name names + with Not_found -> (* try the old exports table instead *) + lookup_ksym name + in + + lookup_ksym in + + (* Just wrap the lookup_ksym call in something which prints + * the query when verbose is set. + *) + let lookup_ksym = + if verbose then + let lookup_ksym sym = + try + let value = lookup_ksym sym in + printf "lookup_ksym %S = %Lx\n%!" sym value; + value + with Not_found -> + printf "lookup_ksym %S failed\n%!" sym; + raise Not_found + in + lookup_ksym + else + lookup_ksym + in + ((name, arch, mem, lookup_ksym) : image) ) images in