More kerneldb.

[virt-mem.git] / ps / virt_ps.ml
diff --git a/ps/virt_ps.ml b/ps/virt_ps.ml

index df876ba..a9bcf1f 100644 (file)
--- a/ps/virt_ps.ml
+++ b/ps/virt_ps.ml
@@ -21,209 +21,77 @@ open Printf
  
  open Virt_mem_gettext.Gettext
  open Virt_mem_utils
  
  open Virt_mem_gettext.Gettext
  open Virt_mem_utils
-open Virt_mem_mmap
-
-(* The implementation of 'ps' has gone through a number of complete
- * rewrites.  This explains the "leading theories" of how to implement
- * this.  Probably some sort of hybrid method is the way to go.
- *
- * General comments: The init_task ksym points to the initial
- * task_struct (PID 1, init).  Out from this init task_struct
- * we find linked lists (struct list_head) which cover all
- * tasks and subsets of the tasks (eg. parent/children).
- * So if we have init_task and we know what the task_struct
- * looks like, then we can follow these chains of pointers
- * to find all processes in the kernel.
- *
- * task_struct varies greatly depending on: word size, kernel
- * version, CONFIG_* settings, and vendor/additional patches.
- *
- * (Theory 1) Precompiled task_struct.  We can easily and reliably
- * determine the Linux kernel version (see virt-uname).  In
- * theory we could compile a list of known kernel versions,
- * check out their sources beforehand, and find the absolute
- * layout of the task_struct (eg. using CIL).  This method would
- * only work for known kernel versions, but has the advantage
- * that all fields in the task_struct would be known.
- *
- * (Theory 2) Fuzzy matched task_struct.  The task_struct has
- * a certain internal structure which is stable even over many
- * kernel revisions.  For example, groups of pointers always
- * occur together.  We search through init_task looking for
- * these characteristic features and where a pointer is found
- * to another task_struct we search that (recursively) on the
- * assumption that those contain the same features at the same
- * location.  This works well for pointers, but not so well for
- * finding other fields (eg. uids, process name, etc).  We can
- * defray the cost of searches by caching the results between
- * runs.
- *)
-
-(* This accumulator stores a map of task_struct address to
- * task_struct content.  It can also store the address of a
- * task_struct which isn't fully parsed yet.
- *   key:   address (int64)
- *   value: task_struct option (None if not parsed yet)
- *)
-type task_struct = {
-  ts_addr : int64;
-}
-
-module Accum = struct
-  include Map.Make (Int64)
-
-  let contains_addr accum addr = mem addr accum
-  let add_address accum addr = add addr None accum
-  let add_task_struct accum addr ts = add addr (Some ts) accum
-end
-
-exception ShapeError of int
-
-(* Parse (recursively) all task_structs, starting at init_task.
- *
- * Either returns an accumulator of task_structs, or raises an
- * exception.
- *
- * On failure, the caller can try again with a different shape.
- * The exception gives some information back to the caller about
- * where the match failed, allowing faster, directed searches.
- *)
-let get_task_struct debug mem ((ws,e) as wse) ((n1,n2) as shape)
-    addr accum =
-
-  (* The 'struct list_head', the standard double-linked list used
-   * by Linux, contains next and prev pointers.  However these point
-   * not to the beginning of the struct, but to the offset of the
-   * list_head within the struct.  This function adjusts the pointer
-   * to get back to the beginning of the struct.
-   *)
-  let container_of addr bitoffset =
-    if addr <> 0L then (
-      let offset = Int64.of_int (bitoffset lsr 3) in
-      addr -^ offset
-    ) else 0L
-  in
-
-  (* Do get_task_struct as a recursive subfunction so we don't have
-   * to pass around all the fixed arguments on the stack.
-   *)
-  let rec get_task_struct ~i addr accum =
-    if Accum.contains_addr accum addr then accum
-    else (
-      (* NOTE: The order of the following three statements is crucial
-       * to avoid cycles and malicious guests.  Do not change it!
-       *)
-      let bits =
-       try Bitstring.bitstring_of_string (get_bytes mem addr (4096*8))
-       with Invalid_argument "get_bytes" -> raise (ShapeError i) in
-      let accum = Accum.add_address accum addr in
-      let accum = ref accum in
-
-      if debug then
-       eprintf "trying to match task_struct, i=%d %Lx\n%!" i addr;
-
-      bitmatch bits with
-      | { _ : n1*ws : bitstring;    (* Ignore start of task_struct. *)
-
-         (* struct list_head tasks *)
-         tasks_next : ws : endian(e),
-           save_offset_to (offset),
-            bind (let addr = container_of tasks_next offset in
-             eprintf "offset = %d, tasks_next = %Lx, addr = %Lx\n"
-               offset tasks_next addr;
-                 accum := get_task_struct ~i:1 addr !accum;
-                 addr);
-         tasks_prev : ws : endian(e),
-            bind (let addr = container_of tasks_prev offset in
-             eprintf "offset = %d, tasks_prev = %Lx, addr = %Lx\n"
-               offset tasks_prev addr;
-                 accum := get_task_struct ~i:1 addr !accum;
-                 addr);
-
-         _ : n2*ws : bitstring;
-
-         (* struct task_struct *parent *)
-         parent : ws : endian(e),
-            bind (let addr = parent in
-                 accum := get_task_struct ~i:2 addr !accum;
-                 addr);
-
-         (* struct list_head children *)
-         children_next : ws : endian(e),
-           save_offset_to (offset),
-            bind (let addr = container_of children_next offset in
-                 accum := get_task_struct ~i:2 addr !accum;
-                 addr);
-
-         children_prev : ws : endian(e),
-            bind (let addr = container_of children_prev offset in
-                 accum := get_task_struct ~i:2 addr !accum;
-                 addr);
-
-         (* struct list_head sibling *)
-         sibling_next : ws : endian(e),
-           save_offset_to (offset),
-            bind (let addr = container_of sibling_next offset in
-                 accum := get_task_struct ~i:2 addr !accum;
-                 addr);
-
-         sibling_prev : ws : endian(e),
-            bind (let addr = container_of sibling_prev offset in
-                 accum := get_task_struct ~i:2 addr !accum;
-                 addr);
-
-         (* struct task_struct *group_leader *)
-         group_leader : ws : endian(e),
-            bind (let addr = group_leader in
-                 accum := get_task_struct ~i:2 addr !accum;
-                 addr)
-       } ->
-         (* Successful match, so return the updated accumulator. *)
-         let accum = !accum in
-         let accum = Accum.add_task_struct accum addr { ts_addr = addr } in
-         accum
-
-      | { _ } ->
-         (* Unsuccessful match, throw an exception. *)
-         raise (ShapeError (-1))
-    )
-  in
-  get_task_struct ~i:0 addr accum
-
-(* This is the directed search function. *)
-let search debug mem lookup_ksym =
-  let ws = get_wordsize mem in
-  let ws = match ws with W32 -> 32 | W64 -> 64 in
-  let e = get_endian mem in
-  let wse = ws, e in
-
-  let init_task =
-    try lookup_ksym "init_task"
-    with Not_found ->
-      eprintf "virt-ps: lookup_ksym of init_task failed\n";
-      exit 1 in
-
-  let accum = Accum.empty in
-
-  let rec loop n1 n2 =
-    try
-      if debug then eprintf "search: trying (%d, %d)\n" n1 n2;
-      let ts = get_task_struct debug mem wse (n1, n2) init_task accum in
-      if debug then eprintf "search: success (%d, %d)\n" n1 n2;
-      ts
-    with
-    | ShapeError ((-1|0|1) as i) ->
-       if debug then eprintf "search: ShapeError %d\n" i;
-       loop (n1+1) n2
-    | ShapeError 2 ->
-       if debug then eprintf "search: ShapeError 2\n";
-       loop n1 (n2+1)
-  in
-  let ts = loop 0 0 in
-  ()
-
-let run debug (_, _, _, mem, lookup_ksym, _) =
-  search debug mem lookup_ksym
+open Virt_mem_types
+
+open Kernel_task_struct
+
+let run debug (image, ksymmap, utsname) =
+  try
+    let { domname = domname } = image in
+
+    let kernel_version =
+      match utsname with
+      | None ->
+         eprintf (f_"%s: could not guess kernel version\n") domname;
+         raise Exit
+      | Some { kernel_release = v } -> v in
+
+    if not (task_struct_known kernel_version) then (
+      eprintf (f_"%s: %s: unknown kernel version
+Try a newer version of virt-mem, or if the guest is not from a
+supported Linux distribution, see this page about adding support:
+  http://et.redhat.com/~rjones/virt-mem/faq.html\n") domname kernel_version;
+      raise Exit
+    );
+
+    let task_struct_size = task_struct_size kernel_version in
+
+    let init_task, init_task_addr =
+      let init_task_addr =
+       try Ksymmap.find "init_task" ksymmap
+       with Not_found ->
+         eprintf (f_"%s: could not find init_task in kernel image\n") domname;
+         raise Exit in
+      let init_task =
+       get_task_struct kernel_version image.mem init_task_addr in
+      init_task, init_task_addr in
+
+    (* Starting at init_task, navigate through the linked list of
+     * tasks (through tasks.next).  Grab each task_struct as we go.
+     *)
+    let tasks, image =
+      let rec loop image acc task =
+       let next = task.task_struct_tasks'next in
+       if next <> init_task_addr then (
+         let mapped =
+           Virt_mem_mmap.is_mapped_range image.mem next task_struct_size in
+         let image =
+           if not mapped then load_memory image next task_struct_size
+           else image in
+         let task = get_task_struct kernel_version image.mem next in
+         let task = {
+           task with
+             task_struct_comm = truncate_c_string task.task_struct_comm
+         } in
+         let acc = task :: acc in
+         loop image acc task
+       ) else
+         acc, image
+      in
+      loop image [] init_task in
+
+    (* Sort tasks by PID. *)
+    let cmp { task_struct_pid = p1 } { task_struct_pid = p2 } = compare p1 p2 in
+    let tasks = List.sort cmp tasks in
+
+    printf "  PID STAT COMMAND\n";
+
+    List.iter (
+      fun task ->
+       printf "%5Ld      %s\n" task.task_struct_pid task.task_struct_comm
+    ) tasks
+
+  with Exit -> ()
  
  let summary = s_"list processes in virtual machine"
  let description = s_"\
  
  let summary = s_"list processes in virtual machine"
  let description = s_"\