X-Git-Url: http://git.annexia.org/?p=virt-mem.git;a=blobdiff_plain;f=lib%2Fvirt_mem.ml;h=61d596847048fe11ccf0e65c46d20302ab14c37f;hp=82e0b37e820e7f1a7f389685d0fb92cf46201cf8;hb=97131d94f5513b732f8f8d310984e71d8201cadf;hpb=5ce06c3326a2672e82dc656b35eb7a3e6616539a diff --git a/lib/virt_mem.ml b/lib/virt_mem.ml index 82e0b37..61d5968 100644 --- a/lib/virt_mem.ml +++ b/lib/virt_mem.ml @@ -1,4 +1,4 @@ -(* Memory info command for virtual domains. +(* Memory info for virtual domains. (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. http://libvirt.org/ @@ -20,14 +20,159 @@ open Unix open Printf open ExtList +open ExtString +module C = Libvirt.Connect +module D = Libvirt.Domain + +open Virt_mem_gettext.Gettext open Virt_mem_utils module MMap = Virt_mem_mmap -(* Main program. *) -let () = - (* Verbose messages. *) - let verbose = ref false in +let min_kallsyms_tabsize = 1_000L +let max_kallsyms_tabsize = 250_000L + +(* Make the kernel size around 16 MB, but just a bit smaller than + * maximum string length so we can still run this on a 32 bit platform. + *) +let kernel_size = + if Sys.word_size = 32 then Sys.max_string_length + else 0x100_0000 +let max_memory_peek = 65536 (* XXX Use D.max_peek function *) + +type ksym = string + +type image = + int option + * string + * Virt_mem_utils.architecture + * ([`Wordsize], [`Endian]) Virt_mem_mmap.t + +type image_with_ksyms = + int option + * string + * Virt_mem_utils.architecture + * ([`Wordsize], [`Endian]) Virt_mem_mmap.t + * (ksym -> MMap.addr) + +type kallsyms_compr = + | Compressed of (string * MMap.addr) list * MMap.addr + | Uncompressed of (string * MMap.addr) list + +(* When tools register themselves, they are added to this list. + * Later, we will alphabetize the list. + *) +let tools = ref [] + +(* Registration function used by the tools. *) +let register ?(external_cmd = true) ?(extra_args = []) + ?argcheck ?beforeksyms ?run + name summary description = + tools := + (name, (name, summary, description, external_cmd, extra_args, + argcheck, beforeksyms, run)) + :: !tools + +(* Main program, called from mem/virt_mem_main.ml when all the + * tools have had a chance to register themselves. + *) +let main () = + (* Get the registered tools, alphabetically. *) + let tools = !tools in + let tools = List.sort ~cmp:(fun (a,_) (b,_) -> compare a b) tools in + + (* Which tool did the user want to run? Look at the executable + * name (eg. 'virt-dmesg' => tool == dmesg). If we don't recognise + * the executable name then we must look for the first parameter + * which doesn't begin with a '-' character. + * + * Note that we must do all of this before using the OCaml Arg + * module to properly parse the command line (below), so that + * we can have a usage message ready. + *) + let tool, ignore_first_anon_arg = + let prog = Sys.executable_name in (* eg. "/usr/bin/virt-dmesg.opt" *) + let prog = Filename.basename prog in(* eg. "virt-dmesg.opt" *) + let prog = (* eg. "virt-dmesg" *) + try Filename.chop_extension prog with Invalid_argument _ -> prog in + let prog = (* eg. "dmesg" *) + if String.starts_with prog "virt-" then + String.sub prog 5 (String.length prog - 5) + else prog in + try Some (List.assoc prog tools), false + with Not_found -> + let arg1 = (* First non-option argument. *) + match Array.to_list Sys.argv with + | [] -> None + | _::args -> + let rec loop = function + | [] -> None + | a::args when String.length a > 0 && a.[0] = '-' -> loop args + | a::_ -> Some a + in + loop args in + match arg1 with + | None -> None, false + | Some prog -> (* Recognisable first argument? *) + let prog = + try Filename.chop_extension prog with Invalid_argument _ -> prog in + let prog = + if String.starts_with prog "virt-" then + String.sub prog 5 (String.length prog - 5) + else prog in + (try Some (List.assoc prog tools), true + with Not_found -> None, false) in + + (* Make a usage message. *) + let usage_msg = + match tool with + | None -> (* Generic usage message. *) + let tools = List.map ( + fun (name, (_, summary, _, external_cmd, _, _, _, _)) -> + if external_cmd then "virt-"^name, summary + else "virt-mem "^name, summary + ) tools in + (* Maximum width of field in the left hand column. *) + let max_width = + List.fold_left max 0 (List.map String.length (List.map fst tools)) in + let tools = List.map (fun (l,r) -> pad max_width l, r) tools in + let tools = List.map (fun (l,r) -> " " ^ l ^ " - " ^ r) tools in + let tools = String.concat "\n" tools in + + sprintf (f_"\ + +virt-mem: Tools for providing information about virtual machines + +Currently available tools include: +%s + +General usage is: + [-options] [domains...] + +To display extra help for a single tool, do: + virt-mem help + +Options:") tools + + (* Tool-specific usage message. *) + | Some (name, summary, description, external_cmd, _, _, _, _) -> + let cmd = + if external_cmd then "virt-" ^ name else "virt-mem " ^ name in + + sprintf (f_"\ + +%s: %s + +Description: +%s + +Options:") cmd summary description in + + (* Now begin proper parsing of the command line arguments. *) + let debug = ref false in + let images = ref [] in + let uri = ref "" in + let anon_args = ref [] in (* Default wordsize. *) let def_wordsize = ref None in @@ -35,7 +180,7 @@ let () = | "32" -> def_wordsize := Some W32 | "64" -> def_wordsize := Some W64 | "auto" -> def_wordsize := None - | str -> failwith (sprintf "set_wordsize: %s: unknown wordsize" str) + | str -> failwith (sprintf (f_"set_wordsize: %s: unknown wordsize") str) in (* Default endianness. *) @@ -46,7 +191,7 @@ let () = def_endian := Some Bitmatch.LittleEndian | "be" | "big" | "bigendian" | "motorola" -> def_endian := Some Bitmatch.BigEndian - | str -> failwith (sprintf "set_endian: %s: unknown endianness" str) + | str -> failwith (sprintf (f_"set_endian: %s: unknown endianness") str) in (* Default architecture. *) @@ -69,50 +214,230 @@ let () = | str -> def_text_addr := Int64.of_string str in - (* List of kernel images. *) - let images = ref [] in - + (* Handle -t option. *) let memory_image filename = images := (!def_wordsize, !def_endian, !def_architecture, !def_text_addr, filename) :: !images in - let argspec = Arg.align [ - "-A", Arg.String set_architecture, - "arch " ^ "Set kernel architecture, endianness and word size"; - "-E", Arg.String set_endian, - "endian " ^ "Set kernel endianness"; - "-T", Arg.String set_text_addr, - "addr " ^ "Set kernel text address"; - "-W", Arg.String set_wordsize, - "addr " ^ "Set kernel word size"; - "-t", Arg.String memory_image, - "image " ^ "Use saved kernel memory image"; - "-verbose", Arg.Set verbose, - " " ^ "Verbose messages"; - ] in - - let anon_fun str = - raise (Arg.Bad (sprintf "%s: unknown parameter" str)) in - let usage_msg = "virt-mem: shows memory information for guests + (* Handle --version option. *) + let version () = + printf "virt-mem %s\n" Virt_mem_version.version; -SUMMARY - virt-mem [-options] - -OPTIONS" in + let major, minor, release = + let v, _ = Libvirt.get_version () in + v / 1_000_000, (v / 1_000) mod 1_000, v mod 1_000 in + printf "libvirt %d.%d.%d\n" major minor release; + exit 0 + in - Arg.parse argspec anon_fun usage_msg; + (* Function to collect up any anonymous args (domain names/IDs). *) + let anon_arg str = anon_args := str :: !anon_args in + + (* Construct the argspec. + * May include extra arguments specified by the tool. + *) + let argspec = + let extra_args = match tool with + | None -> [] + | Some (_, _, _, _, extra_args, _, _, _) -> extra_args in + let argspec = [ + "-A", Arg.String set_architecture, + "arch " ^ s_"Set kernel architecture, endianness and word size"; + "-E", Arg.String set_endian, + "endian " ^ s_"Set kernel endianness"; + "-T", Arg.String set_text_addr, + "addr " ^ s_"Set kernel text address"; + "-W", Arg.String set_wordsize, + "addr " ^ s_"Set kernel word size"; + "-c", Arg.Set_string uri, + "uri " ^ s_ "Connect to URI"; + "--connect", Arg.Set_string uri, + "uri " ^ s_ "Connect to URI"; + "--debug", Arg.Set debug, + " " ^ s_"Debug mode (default: false)"; + "-t", Arg.String memory_image, + "image " ^ s_"Use saved kernel memory image"; + "--version", Arg.Unit version, + " " ^ s_"Display version and exit"; + ] @ extra_args in + + (* Sort options alphabetically on first alpha character. *) + let cmp (a,_,_) (b,_,_) = + let chars = "-" in + let a = String.strip ~chars a and b = String.strip ~chars b in + compare a b + in + let argspec = List.sort ~cmp argspec in + (* Make the options line up nicely. *) + Arg.align argspec in + + (* Parse the command line. This will exit if --version or --help found. *) + Arg.parse argspec anon_arg usage_msg; let images = !images in - let verbose = !verbose in + let debug = !debug in + let uri = if !uri = "" then None else Some !uri in + + (* Discard the first anonymous argument if, above, we previously + * found it contained the tool name. + *) + let anon_args = List.rev !anon_args in + let anon_args = + if ignore_first_anon_arg then List.tl anon_args else anon_args in + + (* At this point, either --help was specified on the command line + * (and so the program has exited) or we must have determined tool, + * or the user didn't give us a valid tool (eg. "virt-mem foobar"). + * Detect that final case now and give an error. + *) + let name, _, _, _, _, argcheck, beforeksyms, run = + match tool with + | Some t -> t + | None -> + prerr_endline (s_"\ +virt-mem: I could not work out which tool you are trying to run. +Use 'virt-mem --help' for more help or read the manual page virt-mem(1)"); + exit 1 + in + if debug then eprintf "tool = %s\n%!" name; + + (* Optional argument checking in the tool. *) + (match argcheck with + | None -> () + | Some argcheck -> argcheck debug + ); (* Get the kernel images. *) let images = - if images = [] then - (* XXX use libvirt to get images *) - failwith "libvirt: not yet implemented" - else + if images = [] then ( + let conn = + let name = uri in + try C.connect_readonly ?name () + with Libvirt.Virterror err -> + prerr_endline (Libvirt.Virterror.to_string err); + (* If non-root and no explicit connection URI, print a warning. *) + if Unix.geteuid () <> 0 && name = None then ( + print_endline (s_ "NB: If you want to monitor a local Xen hypervisor, you usually need to be root"); + ); + exit 1 in + + (* If we have a list of parameters, then it is the domain names / UUIDs / + * IDs ONLY that we wish to display. Otherwise, display all active. + *) + let doms = + if anon_args = [] then ( + (* List of active domains. *) + let nr_active_doms = C.num_of_domains conn in + let active_doms = + Array.to_list (C.list_domains conn nr_active_doms) in + List.map (D.lookup_by_id conn) active_doms + ) else ( + List.map ( + fun arg -> + let dom = + try D.lookup_by_uuid_string conn arg + with _ -> + try D.lookup_by_name conn arg + with _ -> + try D.lookup_by_id conn (int_of_string arg) + with _ -> + failwith (sprintf (f_"%s: unknown domain (not a UUID, name or ID of any active domain)") arg) in + + (* XXX Primitive test to see if the domain is active. *) + let is_active = try D.get_id dom >= 0 with _ -> false in + if not is_active then + failwith (sprintf (f_"%s: domain is not running") arg); + + dom + ) anon_args + ) in + + (* Get their XML. *) + let xmls = List.map (fun dom -> dom, D.get_xml_desc dom) doms in + + (* Parse the XML. *) + let xmls = List.map (fun (dom, xml) -> + dom, Xml.parse_string xml) xmls in + + (* XXX Do something with the XML XXX + * such as detecting arch, wordsize, endianness. + * XXXXXXXXXXXXXX + * + * + * + *) + + + List.map ( + fun (dom, _) -> + let id = D.get_id dom in + let name = D.get_name dom in + + let wordsize = + match !def_wordsize with + | None -> + failwith + (sprintf (f_"%s: use -W to define word size for this image") + name); + | Some ws -> ws in + let endian = + match !def_endian with + | None -> + failwith + (sprintf (f_"%s: use -E to define endianness for this image") + name); + | Some e -> e in + + let arch = + match !def_architecture with + | Some I386 -> I386 | Some X86_64 -> X86_64 + | _ -> + failwith + (sprintf (f_"%s: use -A to define architecture (i386/x86-64 only) for this image") name) in + + if !def_text_addr = 0L then + failwith + (sprintf (f_"%s: use -T to define kernel load address for this image") name); + + let start_t = gettimeofday () in + + (* Read the kernel memory. + * Maximum 64K can be read over remote connections. + *) + let str = String.create kernel_size in + let rec loop i = + let remaining = kernel_size - i in + if remaining > 0 then ( + let size = min remaining max_memory_peek in + D.memory_peek dom [D.Virtual] + (!def_text_addr +^ Int64.of_int i) size str i; + loop (i + size) + ) + in + loop 0; + + if debug then ( + let end_t = gettimeofday () in + eprintf "timing: downloading kernel took %f seconds\n%!" + (end_t -. start_t) + ); + + (* Map the virtual memory. *) + let mem = MMap.of_string str !def_text_addr in + + (* Force the wordsize and endianness. *) + let mem = MMap.set_wordsize mem wordsize in + let mem = MMap.set_endian mem endian in + + ((Some id, name, arch, mem) : image) + ) xmls + ) else ( + (* One or more -t options passed. *) + if anon_args <> [] then + failwith (s_"virt-mem: if -t given on command line, then no domain arguments should be listed"); + List.map ( fun (wordsize, endian, arch, text_addr, filename) -> (* Quite a lot of limitations on the kernel images we can @@ -123,14 +448,14 @@ OPTIONS" in match wordsize with | None -> failwith - (sprintf "%s: use -W to define word size for this image" + (sprintf (f_"%s: use -W to define word size for this image") filename); | Some ws -> ws in let endian = match endian with | None -> failwith - (sprintf "%s: use -E to define endianness for this image" + (sprintf (f_"%s: use -E to define endianness for this image") filename); | Some e -> e in @@ -139,11 +464,11 @@ OPTIONS" in | Some I386 -> I386 | Some X86_64 -> X86_64 | _ -> failwith - (sprintf "%s: use -A to define architecture (i386/x86-64 only) for this image" filename) in + (sprintf (f_"%s: use -A to define architecture (i386/x86-64 only) for this image") filename) in if text_addr = 0L then failwith - (sprintf "%s: use -T to define kernel load address for this image" + (sprintf (f_"%s: use -T to define kernel load address for this image") filename); (* Map the virtual memory. *) @@ -154,159 +479,437 @@ OPTIONS" in let mem = MMap.set_wordsize mem wordsize in let mem = MMap.set_endian mem endian in - (filename, (arch, mem)) - ) images in + ((None, filename, arch, mem) : image) + ) images + ) in - List.iter ( - fun (name, (arch, mem)) -> - (* Look for some common entries in the symbol table and from - * that find the symbol table itself. These are just supposed to - * be symbols which are very likely to be present in any Linux - * kernel, although we only need one of them to be present to - * find the symbol table. - * - * NB. Must not be __initdata. - *) - let common_ksyms = [ - "init_task"; (* first task_struct *) - "root_mountflags"; (* flags for mounting root fs *) - "init_uts_ns"; (* uname strings *) - "sys_open"; (* open(2) entry point *) - "sys_chdir"; (* chdir(2) entry point *) - "sys_chroot"; (* chroot(2) entry point *) - "sys_umask"; (* umask(2) entry point *) - "schedule"; (* scheduler entry point *) - ] in - (* Searching for string *) - let common_ksyms = List.map (sprintf "\000%s\000") common_ksyms in - - (* Search for these strings in the memory image. *) - let ksym_strings = List.map (MMap.find_all mem) common_ksyms in - let ksym_strings = List.concat ksym_strings in - (* Adjust found addresses to start of the string (skip ). *) - let ksym_strings = List.map Int64.succ ksym_strings in - - (* For any we found, try to look up the symbol table - * base addr and size. - *) - let ksymtabs = List.map ( - fun addr -> - (* Search for 'addr' appearing in the image. *) - let addrs = MMap.find_pointer_all mem addr in - - (* Now consider each of these addresses and search back - * until we reach the beginning of the (possible) symbol - * table. - * - * Kernel symbol table struct is: - * struct kernel_symbol { - * unsigned long value; - * const char *name; <-- initial pointer - * } symbols[]; - *) - let pred_long2 addr = MMap.pred_long mem (MMap.pred_long mem addr) in - let base_addrs = List.map ( - fun addr -> - let rec loop addr = - (* '*addr' should point to a C identifier. If it does, - * step backwards to the previous symbol table entry. - *) - let addrp = MMap.follow_pointer mem addr in - if MMap.is_C_identifier mem addrp then - loop (pred_long2 addr) - else - MMap.succ_long mem addr + (* Optional callback into the tool before we start looking for + * kernel symbols. + *) + (match beforeksyms with + | None -> () + | Some beforeksyms -> beforeksyms debug images + ); + + (* If there is no run function, then there is no point continuing + * with the rest of the program (kernel symbol analysis) ... + *) + if run = None then exit 0; + + (* Now kernel symbol analysis starts ... *) + let images = + List.map ( + fun (domid, name, arch, mem) -> + (* Look for some common entries in the exported symbol table and + * from that find the symbol table itself. These are just + * supposed to be symbols which are very likely to be present + * in any Linux kernel, although we only need one of them to be + * present to find the symbol table. + * + * NB. Must not be __initdata, must be in EXPORT_SYMBOL. + *) + let common_ksyms = [ + "init_task"; (* first task_struct *) + "root_mountflags"; (* flags for mounting root fs *) + "init_uts_ns"; (* uname strings *) + "sys_open"; (* open(2) entry point *) + "sys_chdir"; (* chdir(2) entry point *) + "sys_chroot"; (* chroot(2) entry point *) + "sys_umask"; (* umask(2) entry point *) + "schedule"; (* scheduler entry point *) + ] in + (* Searching for string *) + let common_ksyms_nul = List.map (sprintf "\000%s\000") common_ksyms in + + let start_t = gettimeofday () in + + (* Search for these strings in the memory image. *) + let ksym_strings = List.map (MMap.find_all mem) common_ksyms_nul in + let ksym_strings = List.concat ksym_strings in + (* Adjust found addresses to start of the string (skip ). *) + let ksym_strings = List.map Int64.succ ksym_strings in + + if debug then ( + let end_t = gettimeofday () in + eprintf "timing: searching for common_ksyms took %f seconds\n%!" + (end_t -. start_t) + ); + + let start_t = gettimeofday () in + + (* For any we found, try to look up the symbol table + * base addr and size. + *) + let ksymtabs = List.map ( + fun addr -> + (* Search for 'addr' appearing in the image. *) + let addrs = MMap.find_pointer_all mem addr in + + (* Now consider each of these addresses and search back + * until we reach the beginning of the (possible) symbol + * table. + * + * Kernel symbol table struct is: + * struct kernel_symbol { + * unsigned long value; + * const char *name; <-- initial pointer + * } symbols[]; + *) + let pred_long2 addr = + MMap.pred_long mem (MMap.pred_long mem addr) + in + let base_addrs = List.map ( + fun addr -> + let rec loop addr = + (* '*addr' should point to a C identifier. If it does, + * step backwards to the previous symbol table entry. + *) + let addrp = MMap.follow_pointer mem addr in + if MMap.is_C_identifier mem addrp then + loop (pred_long2 addr) + else + MMap.succ_long mem addr + in + loop addr + ) addrs in + + (* Also look for the end of the symbol table and + * calculate its size. + *) + let base_addrs_sizes = List.map ( + fun base_addr -> + let rec loop addr = + let addr2 = MMap.succ_long mem addr in + let addr2p = MMap.follow_pointer mem addr2 in + if MMap.is_C_identifier mem addr2p then + loop (MMap.succ_long mem addr2) + else + addr + in + let end_addr = loop base_addr in + base_addr, end_addr -^ base_addr + ) base_addrs in + + base_addrs_sizes + ) ksym_strings in + let ksymtabs = List.concat ksymtabs in + + (* Simply ignore any symbol table candidates which are too small. *) + let ksymtabs = List.filter (fun (_, size) -> size > 64L) ksymtabs in + + if debug then ( + eprintf "%s: candidate symbol tables at:\n" name; + List.iter ( + fun (addr, size) -> + eprintf "\t%Lx\t%Lx\t%!" addr size; + eprintf "first symbol: %s\n%!" + (MMap.get_string mem + (MMap.follow_pointer mem + (MMap.succ_long mem addr))) + ) ksymtabs + ); + + (* Vote for the most popular symbol table candidate and from this + * generate a function to look up ksyms. + *) + let lookup_ksym = + let freqs = frequency ksymtabs in + match freqs with + | [] -> + eprintf (f_"%s: cannot find start of kernel symbol table\n") name; + (fun _ -> raise Not_found) + + | (_, (ksymtab_addr, ksymtab_size)) :: _ -> + if debug then + eprintf + "%s: Kernel symbol table found at %Lx, size %Lx bytes\n%!" + name ksymtab_addr ksymtab_size; + + (* Load the whole symbol table as a bitstring. *) + let ksymtab = + Bitmatch.bitstring_of_string + (MMap.get_bytes mem ksymtab_addr + (Int64.to_int ksymtab_size)) in + + (* Function to look up an address in the symbol table. *) + let lookup_ksym sym = + let bits = bits_of_wordsize (MMap.get_wordsize mem) in + let e = MMap.get_endian mem in + let rec loop bs = + bitmatch bs with + | { value : bits : endian(e); + name_ptr : bits : endian(e) } + when MMap.get_string mem name_ptr = sym -> + value + | { _ : bits : endian(e); + _ : bits : endian(e); + bs : -1 : bitstring } -> + loop bs + | { _ } -> raise Not_found + in + loop ksymtab in - loop addr - ) addrs in - (* Also look for the end of the symbol table and - * calculate its size. - *) - let base_addrs_sizes = List.map ( - fun base_addr -> - let rec loop addr = - let addr2 = MMap.succ_long mem addr in - let addr2p = MMap.follow_pointer mem addr2 in - if MMap.is_C_identifier mem addr2p then - loop (MMap.succ_long mem addr2) + lookup_ksym + in + + if debug then ( + let end_t = gettimeofday () in + eprintf "timing: searching for ordinary ksyms took %f seconds\n%!" + (end_t -. start_t) + ); + + let start_t = gettimeofday () in + + (* Now try to find the /proc/kallsyms table. This is in an odd + * compressed format (but not a very successful compression + * format). However if it exists we know that it will contain + * addresses of the common ksyms above, and it has some + * characteristics which make it easy to detect in the + * memory. + * + * kallsyms contains a complete list of symbols so is much + * more useful than the basic list of exports. + *) + let ksym_addrs = List.filter_map ( + fun ksym -> try Some (lookup_ksym ksym) with Not_found -> None + ) common_ksyms in + + (* Search for those kernel addresses in the image. We're looking + * for the table kallsyms_addresses followed by kallsyms_num_syms + * (number of symbols in the table). + *) + let ksym_addrs = List.map (MMap.find_pointer_all mem) ksym_addrs in + let ksym_addrs = List.concat ksym_addrs in + + (* Test each one to see if it's a candidate list of kernel + * addresses followed by length of list. + *) + let kallsymtabs = List.filter_map ( + fun addr -> + (* Search upwards from address until we find the length field. + * If found, jump backwards by length and check all addresses. + *) + if debug then + eprintf "%s: testing candidate kallsyms at %Lx\n" name addr; + let rec loop addr = + let addrp = MMap.follow_pointer mem addr in + if MMap.is_mapped mem addrp then + loop (MMap.succ_long mem addr) (* continue up the table *) + else + if addrp >= min_kallsyms_tabsize && + addrp <= max_kallsyms_tabsize then ( + (* addrp might be the symbol count. Count backwards and + * check the full table. + *) + let num_entries = Int64.to_int addrp in + let entry_size = bytes_of_wordsize (MMap.get_wordsize mem) in + let start_addr = + addr -^ Int64.of_int (entry_size * num_entries) in + let end_addr = addr in + let rec loop2 addr = + if addr < end_addr then ( + let addrp = MMap.follow_pointer mem addr in + if MMap.is_mapped mem addrp then + loop2 (MMap.succ_long mem addr) + else + None (* can't verify the full address table *) + ) else + (* ok! *) + let names_addr = MMap.succ_long mem end_addr in + if debug then + eprintf "%s: candidate kallsyms found at %Lx (names_addr at %Lx, num_entries %d)\n" + name start_addr names_addr num_entries; + Some (start_addr, num_entries, names_addr) + in + loop2 start_addr + ) else - addr - in - let end_addr = loop base_addr in - base_addr, end_addr -^ base_addr - ) base_addrs in - - base_addrs_sizes - ) ksym_strings in - let ksymtabs = List.concat ksymtabs in - - (* Simply ignore any symbol table candidates which are too small. *) - let ksymtabs = List.filter (fun (_, size) -> size > 64L) ksymtabs in - - if verbose then ( - printf "name %s:\n" name; - List.iter ( - fun (addr, size) -> - printf "\t%Lx\t%Lx\t%!" addr size; - printf "first symbol: %s\n%!" - (MMap.get_string mem - (MMap.follow_pointer mem - (MMap.succ_long mem addr))) - ) ksymtabs - ); - - (* Vote for the most popular symbol table candidate. *) - let freqs = frequency ksymtabs in - match freqs with - | [] -> - eprintf "%s: cannot find start of kernel symbol table\n" name - | (_, (ksymtab_addr, ksymtab_size)) :: _ -> - if verbose then - printf "%s: Kernel symbol table found at %Lx, size %Lx bytes\n%!" - name ksymtab_addr ksymtab_size; - - (* Load the whole symbol table as a bitstring. *) - let ksymtab = - Bitmatch.bitstring_of_string - (MMap.get_bytes mem ksymtab_addr (Int64.to_int ksymtab_size)) in - - (* Function to look up an address in the symbol table. *) - let lookup_ksym sym = - let bits = bits_of_wordsize (MMap.get_wordsize mem) in - let e = MMap.get_endian mem in - let rec loop bs = - bitmatch bs with - | { value : bits : endian(e); - name_ptr : bits : endian(e) } - when MMap.get_string mem name_ptr = sym -> - value - | { _ : bits : endian(e); - _ : bits : endian(e); - bs : -1 : bitstring } -> - loop bs - | { _ } -> raise Not_found + None (* forget it *) in - loop ksymtab - in - - if verbose then ( - (* This just tests looking up kernel symbols. *) - printf "init_task = %Lx\n" (lookup_ksym "init_task"); - printf "schedule = %Lx\n" (lookup_ksym "schedule"); - printf "system_utsname = %s\n" - (try - let addr = lookup_ksym "system_utsname" in - sprintf "%Lx" addr - with Not_found -> "not found"); - printf "init_uts_ns = %s\n" - (try - let addr = lookup_ksym "init_uts_ns" in - sprintf "%Lx" addr - with Not_found -> "not found"); - ); - - - + match loop addr with + | None -> None + | Some (start_addr, num_entries, names_addr) -> + (* As an additional verification, check the list of + * kallsyms_names. + *) + try + (* If the first byte is '\000' and is followed by a + * C identifier, then this is old-school list of + * symbols with prefix compression as in 2.6.9. + * Otherwise Huffman-compressed kallsyms as in + * 2.6.25. + *) + if MMap.get_byte mem names_addr = 0 && + MMap.is_C_identifier mem (names_addr+^1L) then ( + let names = ref [] in + let prev = ref "" in + let rec loop names_addr start_addr num = + if num > 0 then ( + let prefix = MMap.get_byte mem names_addr in + let prefix = String.sub !prev 0 prefix in + let name = MMap.get_string mem (names_addr+^1L) in + let len = String.length name in + let name = prefix ^ name in + prev := name; + let names_addr = names_addr +^ Int64.of_int len +^ 2L in + let sym_value = MMap.follow_pointer mem start_addr in + let start_addr = MMap.succ_long mem start_addr in + (*eprintf "%S -> %Lx\n" name sym_value;*) + names := (name, sym_value) :: !names; + loop names_addr start_addr (num-1) + ) + in + loop names_addr start_addr num_entries; + let names = List.rev !names in + + Some (start_addr, num_entries, names_addr, + Uncompressed names) + ) + else ( (* new-style "compressed" names. *) + let compressed_names = ref [] in + let rec loop names_addr start_addr num = + if num > 0 then ( + let len = MMap.get_byte mem names_addr in + let name = MMap.get_bytes mem (names_addr+^1L) len in + let names_addr = names_addr +^ Int64.of_int len +^ 1L in + let sym_value = MMap.follow_pointer mem start_addr in + let start_addr = MMap.succ_long mem start_addr in + compressed_names := + (name, sym_value) :: !compressed_names; + loop names_addr start_addr (num-1) + ) else + names_addr + in + let markers_addr = loop names_addr start_addr num_entries in + let markers_addr = MMap.align mem markers_addr in + let compressed_names = List.rev !compressed_names in + + Some (start_addr, num_entries, names_addr, + Compressed (compressed_names, markers_addr)) + ) + with + Invalid_argument _ -> None (* bad names list *) + ) ksym_addrs in + + if debug then ( + eprintf "%s: candidate kallsyms at:\n" name; + List.iter ( + function + | (start_addr, num_entries, names_addr, Uncompressed _) -> + eprintf "\t%Lx %d entries names_addr=%Lx old-style\n%!" + start_addr num_entries names_addr + | (start_addr, num_entries, names_addr, + Compressed (_, markers_addr)) -> + eprintf "\t%Lx %d entries names_addr=%Lx markers_addr=%Lx\n%!" + start_addr num_entries names_addr markers_addr + ) kallsymtabs + ); + + (* Vote for the most popular symbol table candidate and + * enhance the function for looking up ksyms. + *) + let lookup_ksym = + let freqs = frequency kallsymtabs in + match freqs with + | [] -> + (* Can't find any kallsymtabs, just return the lookup_ksym + * function generated previously from the exported symbols. + *) + lookup_ksym + + | (_, (_, _, _, Uncompressed names)) :: _ -> + let lookup_ksym name = + try (* first look it up in kallsyms table. *) + List.assoc name names + with Not_found -> (* try the old exports table instead *) + lookup_ksym name + in + lookup_ksym + + | (_, (start_addr, num_entries, names_addr, + Compressed (compressed_names, markers_addr))) :: _ -> + (* Skip the markers and look for the token table. *) + let num_markers = Int64.of_int ((num_entries + 255) / 256) in + let marker_size = + Int64.of_int (bytes_of_wordsize (MMap.get_wordsize mem)) in + let tokens_addr = markers_addr +^ marker_size *^ num_markers in + + (* Now read out the compression tokens, which are just + * 256 ASCIIZ strings that map bytes in the compression + * names to substrings. + *) + let tokens = Array.make 256 "" in + let rec loop i addr = + if i < 256 then ( + let str = MMap.get_string mem addr in + let len = String.length str in + let addr = addr +^ Int64.of_int (len+1) in + tokens.(i) <- str; + loop (i+1) addr + ) + in + loop 0 tokens_addr; + + (* Expand the compressed names using the tokens. *) + let names = List.filter_map ( + fun (name, sym_value) -> + let f c = tokens.(Char.code c) in + let name = String.replace_chars f name in + (* First character in uncompressed output is the symbol + * type, eg. 'T'/'t' for text etc. + *) + (* NOTE: Symbol names are NOT unique + * (eg. 'con_start' is both a function and data in + * some kernels). XXX We need to handle this situation + * better. + *) + (*let typ = name.[0] in*) + let name = String.sub name 1 (String.length name - 1) in + (*eprintf "%S -> %Lx\n" name sym_value;*) + Some (name, sym_value) + ) compressed_names in + + let lookup_ksym name = + try (* first look it up in kallsyms table. *) + List.assoc name names + with Not_found -> (* try the old exports table instead *) + lookup_ksym name + in - ) images + lookup_ksym in + + if debug then ( + let end_t = gettimeofday () in + eprintf "timing: searching for kallsyms took %f seconds\n%!" + (end_t -. start_t) + ); + + (* Just wrap the lookup_ksym call in something which prints + * the query when debug is set. + *) + let lookup_ksym = + if debug then + let lookup_ksym sym = + try + let value = lookup_ksym sym in + eprintf "lookup_ksym %S = %Lx\n%!" sym value; + value + with Not_found -> + eprintf "lookup_ksym %S failed\n%!" sym; + raise Not_found + in + lookup_ksym + else + lookup_ksym + in + + ((domid, name, arch, mem, lookup_ksym) : image_with_ksyms) + ) images in + + (* Run the tool's main function. *) + (match run with + | None -> () + | Some run -> + run debug images + )