open Unix
open Printf
open ExtList
+open ExtString
open Virt_mem_gettext.Gettext
open Virt_mem_utils
module MMap = Virt_mem_mmap
+let min_kallsyms_tabsize = 1_000L
+let max_kallsyms_tabsize = 250_000L
+
type ksym = string
type image =
string
* Virt_mem_utils.architecture
* ([`Wordsize], [`Endian]) Virt_mem_mmap.t
- * (ksym -> Virt_mem_mmap.addr)
+ * (ksym -> MMap.addr)
+
+type kallsyms_compr =
+ | Compressed of (string * MMap.addr) list * MMap.addr
+ | Uncompressed of (string * MMap.addr) list
let start usage_msg =
(* Verbose messages. *)
let images =
List.map (
fun (name, arch, mem) ->
- (* Look for some common entries in the symbol table and from
- * that find the symbol table itself. These are just supposed to
- * be symbols which are very likely to be present in any Linux
- * kernel, although we only need one of them to be present to
- * find the symbol table.
+ (* Look for some common entries in the exported symbol table and
+ * from that find the symbol table itself. These are just
+ * supposed to be symbols which are very likely to be present
+ * in any Linux kernel, although we only need one of them to be
+ * present to find the symbol table.
*
- * NB. Must not be __initdata.
+ * NB. Must not be __initdata, must be in EXPORT_SYMBOL.
*)
let common_ksyms = [
"init_task"; (* first task_struct *)
"schedule"; (* scheduler entry point *)
] in
(* Searching for <NUL>string<NUL> *)
- let common_ksyms = List.map (sprintf "\000%s\000") common_ksyms in
+ let common_ksyms_nul = List.map (sprintf "\000%s\000") common_ksyms in
(* Search for these strings in the memory image. *)
- let ksym_strings = List.map (MMap.find_all mem) common_ksyms in
+ let ksym_strings = List.map (MMap.find_all mem) common_ksyms_nul in
let ksym_strings = List.concat ksym_strings in
(* Adjust found addresses to start of the string (skip <NUL>). *)
let ksym_strings = List.map Int64.succ ksym_strings in
let ksymtabs = List.filter (fun (_, size) -> size > 64L) ksymtabs in
if verbose then (
- printf "name %s:\n" name;
+ printf "%s: candidate symbol tables at:\n" name;
List.iter (
fun (addr, size) ->
printf "\t%Lx\t%Lx\t%!" addr size;
lookup_ksym
in
+
+ (* Now try to find the /proc/kallsyms table. This is in an odd
+ * compressed format (but not a very successful compression
+ * format). However if it exists we know that it will contain
+ * addresses of the common ksyms above, and it has some
+ * characteristics which make it easy to detect in the
+ * memory.
+ *
+ * kallsyms contains a complete list of symbols so is much
+ * more useful than the basic list of exports.
+ *)
+ let ksym_addrs = List.filter_map (
+ fun ksym -> try Some (lookup_ksym ksym) with Not_found -> None
+ ) common_ksyms in
+
+ (* Search for those kernel addresses in the image. We're looking
+ * for the table kallsyms_addresses followed by kallsyms_num_syms
+ * (number of symbols in the table).
+ *)
+ let ksym_addrs = List.map (MMap.find_pointer_all mem) ksym_addrs in
+ let ksym_addrs = List.concat ksym_addrs in
+
+ (* Test each one to see if it's a candidate list of kernel
+ * addresses followed by length of list.
+ *)
+ let kallsymtabs = List.filter_map (
+ fun addr ->
+ (* Search upwards from address until we find the length field.
+ * If found, jump backwards by length and check all addresses.
+ *)
+ if verbose then
+ printf "%s: testing candidate kallsyms at %Lx\n" name addr;
+ let rec loop addr =
+ let addrp = MMap.follow_pointer mem addr in
+ if MMap.is_mapped mem addrp then
+ loop (MMap.succ_long mem addr) (* continue up the table *)
+ else
+ if addrp >= min_kallsyms_tabsize &&
+ addrp <= max_kallsyms_tabsize then (
+ (* addrp might be the symbol count. Count backwards and
+ * check the full table.
+ *)
+ let num_entries = Int64.to_int addrp in
+ let entry_size = bytes_of_wordsize (MMap.get_wordsize mem) in
+ let start_addr =
+ addr -^ Int64.of_int (entry_size * num_entries) in
+ let end_addr = addr in
+ let rec loop2 addr =
+ if addr < end_addr then (
+ let addrp = MMap.follow_pointer mem addr in
+ if MMap.is_mapped mem addrp then
+ loop2 (MMap.succ_long mem addr)
+ else
+ None (* can't verify the full address table *)
+ ) else
+ (* ok! *)
+ let names_addr = MMap.succ_long mem end_addr in
+ if verbose then
+ printf "%s: candidate kallsyms found at %Lx (names_addr at %Lx, num_entries %d)\n"
+ name start_addr names_addr num_entries;
+ Some (start_addr, num_entries, names_addr)
+ in
+ loop2 start_addr
+ )
+ else
+ None (* forget it *)
+ in
+ match loop addr with
+ | None -> None
+ | Some (start_addr, num_entries, names_addr) ->
+ (* As an additional verification, check the list of
+ * kallsyms_names.
+ *)
+ try
+ (* If the first byte is '\000' and is followed by a
+ * C identifier, then this is old-school list of
+ * symbols with prefix compression as in 2.6.9.
+ * Otherwise Huffman-compressed kallsyms as in
+ * 2.6.25.
+ *)
+ if MMap.get_byte mem names_addr = 0 &&
+ MMap.is_C_identifier mem (names_addr+^1L) then (
+ let names = ref [] in
+ let prev = ref "" in
+ let rec loop names_addr start_addr num =
+ if num > 0 then (
+ let prefix = MMap.get_byte mem names_addr in
+ let prefix = String.sub !prev 0 prefix in
+ let name = MMap.get_string mem (names_addr+^1L) in
+ let len = String.length name in
+ let name = prefix ^ name in
+ prev := name;
+ let names_addr = names_addr +^ Int64.of_int len +^ 2L in
+ let sym_value = MMap.follow_pointer mem start_addr in
+ let start_addr = MMap.succ_long mem start_addr in
+ (*printf "%S -> %Lx\n" name sym_value;*)
+ names := (name, sym_value) :: !names;
+ loop names_addr start_addr (num-1)
+ )
+ in
+ loop names_addr start_addr num_entries;
+ let names = List.rev !names in
+
+ Some (start_addr, num_entries, names_addr,
+ Uncompressed names)
+ )
+ else ( (* new-style "compressed" names. *)
+ let compressed_names = ref [] in
+ let rec loop names_addr start_addr num =
+ if num > 0 then (
+ let len = MMap.get_byte mem names_addr in
+ let name = MMap.get_bytes mem (names_addr+^1L) len in
+ let names_addr = names_addr +^ Int64.of_int len +^ 1L in
+ let sym_value = MMap.follow_pointer mem start_addr in
+ let start_addr = MMap.succ_long mem start_addr in
+ compressed_names :=
+ (name, sym_value) :: !compressed_names;
+ loop names_addr start_addr (num-1)
+ ) else
+ names_addr
+ in
+ let markers_addr = loop names_addr start_addr num_entries in
+ let markers_addr = MMap.align mem markers_addr in
+ let compressed_names = List.rev !compressed_names in
+
+ Some (start_addr, num_entries, names_addr,
+ Compressed (compressed_names, markers_addr))
+ )
+ with
+ Invalid_argument _ -> None (* bad names list *)
+ ) ksym_addrs in
+
+ if verbose then (
+ printf "%s: candidate kallsyms at:\n" name;
+ List.iter (
+ function
+ | (start_addr, num_entries, names_addr, Uncompressed _) ->
+ printf "\t%Lx %d entries names_addr=%Lx old-style\n%!"
+ start_addr num_entries names_addr
+ | (start_addr, num_entries, names_addr,
+ Compressed (_, markers_addr)) ->
+ printf "\t%Lx %d entries names_addr=%Lx markers_addr=%Lx\n%!"
+ start_addr num_entries names_addr markers_addr
+ ) kallsymtabs
+ );
+
+ (* Vote for the most popular symbol table candidate and
+ * enhance the function for looking up ksyms.
+ *)
+ let lookup_ksym =
+ let freqs = frequency kallsymtabs in
+ match freqs with
+ | [] ->
+ (* Can't find any kallsymtabs, just return the lookup_ksym
+ * function generated previously from the exported symbols.
+ *)
+ lookup_ksym
+
+ | (_, (_, _, _, Uncompressed names)) :: _ ->
+ let lookup_ksym name =
+ try (* first look it up in kallsyms table. *)
+ List.assoc name names
+ with Not_found -> (* try the old exports table instead *)
+ lookup_ksym name
+ in
+ lookup_ksym
+
+ | (_, (start_addr, num_entries, names_addr,
+ Compressed (compressed_names, markers_addr))) :: _ ->
+ (* Skip the markers and look for the token table. *)
+ let num_markers = Int64.of_int ((num_entries + 255) / 256) in
+ let marker_size =
+ Int64.of_int (bytes_of_wordsize (MMap.get_wordsize mem)) in
+ let tokens_addr = markers_addr +^ marker_size *^ num_markers in
+
+ (* Now read out the compression tokens, which are just
+ * 256 ASCIIZ strings that map bytes in the compression
+ * names to substrings.
+ *)
+ let tokens = Array.make 256 "" in
+ let rec loop i addr =
+ if i < 256 then (
+ let str = MMap.get_string mem addr in
+ let len = String.length str in
+ let addr = addr +^ Int64.of_int (len+1) in
+ tokens.(i) <- str;
+ loop (i+1) addr
+ )
+ in
+ loop 0 tokens_addr;
+
+ (* Expand the compressed names using the tokens. *)
+ let names = List.filter_map (
+ fun (name, sym_value) ->
+ let f c = tokens.(Char.code c) in
+ let name' = String.replace_chars f name in
+ (*printf "%S -> %S\n" name name';*)
+ (* First character in uncompressed output is the symbol
+ * type, eg. 'T'/'t' for text etc. Since we will never
+ * be using functions, and since some functions (eg.
+ * con_start) overlap with data symbols, drop functions.
+ *)
+ let typ = name'.[0] in
+ if typ = 't' || typ = 'T' then None
+ else (
+ let name' = String.sub name' 1 (String.length name' - 1) in
+ Some (name', sym_value)
+ )
+ ) compressed_names in
+
+ let lookup_ksym name =
+ try (* first look it up in kallsyms table. *)
+ List.assoc name names
+ with Not_found -> (* try the old exports table instead *)
+ lookup_ksym name
+ in
+
+ lookup_ksym in
+
+ (* Just wrap the lookup_ksym call in something which prints
+ * the query when verbose is set.
+ *)
+ let lookup_ksym =
+ if verbose then
+ let lookup_ksym sym =
+ try
+ let value = lookup_ksym sym in
+ printf "lookup_ksym %S = %Lx\n%!" sym value;
+ value
+ with Not_found ->
+ printf "lookup_ksym %S failed\n%!" sym;
+ raise Not_found
+ in
+ lookup_ksym
+ else
+ lookup_ksym
+ in
+
((name, arch, mem, lookup_ksym) : image)
) images in