From 46037cd89c23b0f94dc691006ee1d9cd0fec24f0 Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Thu, 1 Jan 1970 00:00:00 +0000 Subject: [PATCH] Add support for finding/parsing kallsyms. --- lib/virt_mem.ml | 266 ++++++++++++++++++++++++++++++++++++++++++++++++-- lib/virt_mem_mmap.ml | 16 ++- lib/virt_mem_mmap.mli | 7 ++ 3 files changed, 278 insertions(+), 11 deletions(-) diff --git a/lib/virt_mem.ml b/lib/virt_mem.ml index 8a835ec..bcee669 100644 --- a/lib/virt_mem.ml +++ b/lib/virt_mem.ml @@ -20,18 +20,26 @@ open Unix open Printf open ExtList +open ExtString open Virt_mem_gettext.Gettext open Virt_mem_utils module MMap = Virt_mem_mmap +let min_kallsyms_tabsize = 1_000L +let max_kallsyms_tabsize = 250_000L + type ksym = string type image = string * Virt_mem_utils.architecture * ([`Wordsize], [`Endian]) Virt_mem_mmap.t - * (ksym -> Virt_mem_mmap.addr) + * (ksym -> MMap.addr) + +type kallsyms_compr = + | Compressed of (string * MMap.addr) list * MMap.addr + | Uncompressed of (string * MMap.addr) list let start usage_msg = (* Verbose messages. *) @@ -162,13 +170,13 @@ let start usage_msg = let images = List.map ( fun (name, arch, mem) -> - (* Look for some common entries in the symbol table and from - * that find the symbol table itself. These are just supposed to - * be symbols which are very likely to be present in any Linux - * kernel, although we only need one of them to be present to - * find the symbol table. + (* Look for some common entries in the exported symbol table and + * from that find the symbol table itself. These are just + * supposed to be symbols which are very likely to be present + * in any Linux kernel, although we only need one of them to be + * present to find the symbol table. * - * NB. Must not be __initdata. + * NB. Must not be __initdata, must be in EXPORT_SYMBOL. *) let common_ksyms = [ "init_task"; (* first task_struct *) @@ -181,10 +189,10 @@ let start usage_msg = "schedule"; (* scheduler entry point *) ] in (* Searching for string *) - let common_ksyms = List.map (sprintf "\000%s\000") common_ksyms in + let common_ksyms_nul = List.map (sprintf "\000%s\000") common_ksyms in (* Search for these strings in the memory image. *) - let ksym_strings = List.map (MMap.find_all mem) common_ksyms in + let ksym_strings = List.map (MMap.find_all mem) common_ksyms_nul in let ksym_strings = List.concat ksym_strings in (* Adjust found addresses to start of the string (skip ). *) let ksym_strings = List.map Int64.succ ksym_strings in @@ -250,7 +258,7 @@ let start usage_msg = let ksymtabs = List.filter (fun (_, size) -> size > 64L) ksymtabs in if verbose then ( - printf "name %s:\n" name; + printf "%s: candidate symbol tables at:\n" name; List.iter ( fun (addr, size) -> printf "\t%Lx\t%Lx\t%!" addr size; @@ -304,6 +312,244 @@ let start usage_msg = lookup_ksym in + + (* Now try to find the /proc/kallsyms table. This is in an odd + * compressed format (but not a very successful compression + * format). However if it exists we know that it will contain + * addresses of the common ksyms above, and it has some + * characteristics which make it easy to detect in the + * memory. + * + * kallsyms contains a complete list of symbols so is much + * more useful than the basic list of exports. + *) + let ksym_addrs = List.filter_map ( + fun ksym -> try Some (lookup_ksym ksym) with Not_found -> None + ) common_ksyms in + + (* Search for those kernel addresses in the image. We're looking + * for the table kallsyms_addresses followed by kallsyms_num_syms + * (number of symbols in the table). + *) + let ksym_addrs = List.map (MMap.find_pointer_all mem) ksym_addrs in + let ksym_addrs = List.concat ksym_addrs in + + (* Test each one to see if it's a candidate list of kernel + * addresses followed by length of list. + *) + let kallsymtabs = List.filter_map ( + fun addr -> + (* Search upwards from address until we find the length field. + * If found, jump backwards by length and check all addresses. + *) + if verbose then + printf "%s: testing candidate kallsyms at %Lx\n" name addr; + let rec loop addr = + let addrp = MMap.follow_pointer mem addr in + if MMap.is_mapped mem addrp then + loop (MMap.succ_long mem addr) (* continue up the table *) + else + if addrp >= min_kallsyms_tabsize && + addrp <= max_kallsyms_tabsize then ( + (* addrp might be the symbol count. Count backwards and + * check the full table. + *) + let num_entries = Int64.to_int addrp in + let entry_size = bytes_of_wordsize (MMap.get_wordsize mem) in + let start_addr = + addr -^ Int64.of_int (entry_size * num_entries) in + let end_addr = addr in + let rec loop2 addr = + if addr < end_addr then ( + let addrp = MMap.follow_pointer mem addr in + if MMap.is_mapped mem addrp then + loop2 (MMap.succ_long mem addr) + else + None (* can't verify the full address table *) + ) else + (* ok! *) + let names_addr = MMap.succ_long mem end_addr in + if verbose then + printf "%s: candidate kallsyms found at %Lx (names_addr at %Lx, num_entries %d)\n" + name start_addr names_addr num_entries; + Some (start_addr, num_entries, names_addr) + in + loop2 start_addr + ) + else + None (* forget it *) + in + match loop addr with + | None -> None + | Some (start_addr, num_entries, names_addr) -> + (* As an additional verification, check the list of + * kallsyms_names. + *) + try + (* If the first byte is '\000' and is followed by a + * C identifier, then this is old-school list of + * symbols with prefix compression as in 2.6.9. + * Otherwise Huffman-compressed kallsyms as in + * 2.6.25. + *) + if MMap.get_byte mem names_addr = 0 && + MMap.is_C_identifier mem (names_addr+^1L) then ( + let names = ref [] in + let prev = ref "" in + let rec loop names_addr start_addr num = + if num > 0 then ( + let prefix = MMap.get_byte mem names_addr in + let prefix = String.sub !prev 0 prefix in + let name = MMap.get_string mem (names_addr+^1L) in + let len = String.length name in + let name = prefix ^ name in + prev := name; + let names_addr = names_addr +^ Int64.of_int len +^ 2L in + let sym_value = MMap.follow_pointer mem start_addr in + let start_addr = MMap.succ_long mem start_addr in + (*printf "%S -> %Lx\n" name sym_value;*) + names := (name, sym_value) :: !names; + loop names_addr start_addr (num-1) + ) + in + loop names_addr start_addr num_entries; + let names = List.rev !names in + + Some (start_addr, num_entries, names_addr, + Uncompressed names) + ) + else ( (* new-style "compressed" names. *) + let compressed_names = ref [] in + let rec loop names_addr start_addr num = + if num > 0 then ( + let len = MMap.get_byte mem names_addr in + let name = MMap.get_bytes mem (names_addr+^1L) len in + let names_addr = names_addr +^ Int64.of_int len +^ 1L in + let sym_value = MMap.follow_pointer mem start_addr in + let start_addr = MMap.succ_long mem start_addr in + compressed_names := + (name, sym_value) :: !compressed_names; + loop names_addr start_addr (num-1) + ) else + names_addr + in + let markers_addr = loop names_addr start_addr num_entries in + let markers_addr = MMap.align mem markers_addr in + let compressed_names = List.rev !compressed_names in + + Some (start_addr, num_entries, names_addr, + Compressed (compressed_names, markers_addr)) + ) + with + Invalid_argument _ -> None (* bad names list *) + ) ksym_addrs in + + if verbose then ( + printf "%s: candidate kallsyms at:\n" name; + List.iter ( + function + | (start_addr, num_entries, names_addr, Uncompressed _) -> + printf "\t%Lx %d entries names_addr=%Lx old-style\n%!" + start_addr num_entries names_addr + | (start_addr, num_entries, names_addr, + Compressed (_, markers_addr)) -> + printf "\t%Lx %d entries names_addr=%Lx markers_addr=%Lx\n%!" + start_addr num_entries names_addr markers_addr + ) kallsymtabs + ); + + (* Vote for the most popular symbol table candidate and + * enhance the function for looking up ksyms. + *) + let lookup_ksym = + let freqs = frequency kallsymtabs in + match freqs with + | [] -> + (* Can't find any kallsymtabs, just return the lookup_ksym + * function generated previously from the exported symbols. + *) + lookup_ksym + + | (_, (_, _, _, Uncompressed names)) :: _ -> + let lookup_ksym name = + try (* first look it up in kallsyms table. *) + List.assoc name names + with Not_found -> (* try the old exports table instead *) + lookup_ksym name + in + lookup_ksym + + | (_, (start_addr, num_entries, names_addr, + Compressed (compressed_names, markers_addr))) :: _ -> + (* Skip the markers and look for the token table. *) + let num_markers = Int64.of_int ((num_entries + 255) / 256) in + let marker_size = + Int64.of_int (bytes_of_wordsize (MMap.get_wordsize mem)) in + let tokens_addr = markers_addr +^ marker_size *^ num_markers in + + (* Now read out the compression tokens, which are just + * 256 ASCIIZ strings that map bytes in the compression + * names to substrings. + *) + let tokens = Array.make 256 "" in + let rec loop i addr = + if i < 256 then ( + let str = MMap.get_string mem addr in + let len = String.length str in + let addr = addr +^ Int64.of_int (len+1) in + tokens.(i) <- str; + loop (i+1) addr + ) + in + loop 0 tokens_addr; + + (* Expand the compressed names using the tokens. *) + let names = List.filter_map ( + fun (name, sym_value) -> + let f c = tokens.(Char.code c) in + let name' = String.replace_chars f name in + (*printf "%S -> %S\n" name name';*) + (* First character in uncompressed output is the symbol + * type, eg. 'T'/'t' for text etc. Since we will never + * be using functions, and since some functions (eg. + * con_start) overlap with data symbols, drop functions. + *) + let typ = name'.[0] in + if typ = 't' || typ = 'T' then None + else ( + let name' = String.sub name' 1 (String.length name' - 1) in + Some (name', sym_value) + ) + ) compressed_names in + + let lookup_ksym name = + try (* first look it up in kallsyms table. *) + List.assoc name names + with Not_found -> (* try the old exports table instead *) + lookup_ksym name + in + + lookup_ksym in + + (* Just wrap the lookup_ksym call in something which prints + * the query when verbose is set. + *) + let lookup_ksym = + if verbose then + let lookup_ksym sym = + try + let value = lookup_ksym sym in + printf "lookup_ksym %S = %Lx\n%!" sym value; + value + with Not_found -> + printf "lookup_ksym %S failed\n%!" sym; + raise Not_found + in + lookup_ksym + else + lookup_ksym + in + ((name, arch, mem, lookup_ksym) : image) ) images in diff --git a/lib/virt_mem_mmap.ml b/lib/virt_mem_mmap.ml index 7401e17..f6edee8 100644 --- a/lib/virt_mem_mmap.ml +++ b/lib/virt_mem_mmap.ml @@ -205,7 +205,7 @@ let get_byte { mappings = mappings } addr = let rec loop = function | [] -> invalid_arg "get_byte" | { start = start; size = size; arr = arr } :: _ - when start <= addr && addr < size -> + when start <= addr && addr < start +^ size -> let offset = Int64.to_int (addr -^ start) in Char.code (Array1.get arr offset) | _ :: ms -> loop ms @@ -303,6 +303,15 @@ let is_C_identifier t addr = with Invalid_argument _ -> false +let is_mapped { mappings = mappings } addr = + let rec loop = function + | [] -> false + | { start = start; size = size; arr = arr } :: _ + when start <= addr && addr < start +^ size -> true + | _ :: ms -> loop ms + in + loop mappings + let follow_pointer t addr = let ws = get_wordsize t in let e = get_endian t in @@ -320,3 +329,8 @@ let succ_long t addr = let pred_long t addr = let ws = get_wordsize t in addr -^ Int64.of_int (bytes_of_wordsize ws) + +let align t addr = + let ws = get_wordsize t in + let mask = Int64.of_int (bytes_of_wordsize ws - 1) in + (addr +^ mask) &^ (Int64.lognot mask) diff --git a/lib/virt_mem_mmap.mli b/lib/virt_mem_mmap.mli index 06de86a..68c7b2f 100644 --- a/lib/virt_mem_mmap.mli +++ b/lib/virt_mem_mmap.mli @@ -105,6 +105,9 @@ val is_C_identifier : ('a, 'b) t -> addr -> bool (** Return true or false if the address contains a NUL-terminated C identifier. *) +val is_mapped : ('a, 'b) t -> addr -> bool +(** Return true if the single address [addr] is mapped. *) + val follow_pointer : ([`Wordsize], [`Endian]) t -> addr -> addr (** Follow (dereference) the pointer at [addr] and return the address pointed to. *) @@ -114,3 +117,7 @@ val succ_long : ([`Wordsize], 'b) t -> addr -> addr val pred_long : ([`Wordsize], 'b) t -> addr -> addr (** Subtract wordsize bytes from [addr] and return it. *) + +val align : ([`Wordsize], 'b) t -> addr -> addr +(** Align the [addr] to the next wordsize boundary. If it already + aligned, this just returns [addr]. *) -- 1.8.3.1