open Virt_mem_utils
open Virt_mem_mmap
-let run debug images = ()
+(* The implementation of 'ps' has gone through a number of complete
+ * rewrites. This explains the "leading theories" of how to implement
+ * this. Probably some sort of hybrid method is the way to go.
+ *
+ * General comments: The init_task ksym points to the initial
+ * task_struct (PID 1, init). Out from this init task_struct
+ * we find linked lists (struct list_head) which cover all
+ * tasks and subsets of the tasks (eg. parent/children).
+ * So if we have init_task and we know what the task_struct
+ * looks like, then we can follow these chains of pointers
+ * to find all processes in the kernel.
+ *
+ * task_struct varies greatly depending on: word size, kernel
+ * version, CONFIG_* settings, and vendor/additional patches.
+ *
+ * (Theory 1) Precompiled task_struct. We can easily and reliably
+ * determine the Linux kernel version (see virt-uname). In
+ * theory we could compile a list of known kernel versions,
+ * check out their sources beforehand, and find the absolute
+ * layout of the task_struct (eg. using CIL). This method would
+ * only work for known kernel versions, but has the advantage
+ * that all fields in the task_struct would be known.
+ *
+ * (Theory 2) Fuzzy matched task_struct. The task_struct has
+ * a certain internal structure which is stable even over many
+ * kernel revisions. For example, groups of pointers always
+ * occur together. We search through init_task looking for
+ * these characteristic features and where a pointer is found
+ * to another task_struct we search that (recursively) on the
+ * assumption that those contain the same features at the same
+ * location. This works well for pointers, but not so well for
+ * finding other fields (eg. uids, process name, etc). We can
+ * defray the cost of searches by caching the results between
+ * runs.
+ *)
+
+(* This accumulator stores a map of task_struct address to
+ * task_struct content. It can also store the address of a
+ * task_struct which isn't fully parsed yet.
+ * key: address (int64)
+ * value: task_struct option (None if not parsed yet)
+ *)
+type task_struct = {
+ ts_addr : int64;
+}
+
+module Accum = struct
+ include Map.Make (Int64)
+
+ let contains_addr accum addr = mem addr accum
+ let add_address accum addr = add addr None accum
+ let add_task_struct accum addr ts = add addr (Some ts) accum
+end
+
+exception ShapeError of int
+
+(* Parse (recursively) all task_structs, starting at init_task.
+ *
+ * Either returns an accumulator of task_structs, or raises an
+ * exception.
+ *
+ * On failure, the caller can try again with a different shape.
+ * The exception gives some information back to the caller about
+ * where the match failed, allowing faster, directed searches.
+ *)
+let get_task_struct debug mem ((ws,e) as wse) ((n1,n2) as shape)
+ addr accum =
+
+ (* The 'struct list_head', the standard double-linked list used
+ * by Linux, contains next and prev pointers. However these point
+ * not to the beginning of the struct, but to the offset of the
+ * list_head within the struct. This function adjusts the pointer
+ * to get back to the beginning of the struct.
+ *)
+ let container_of addr bitoffset =
+ if addr <> 0L then (
+ let offset = Int64.of_int (bitoffset lsr 3) in
+ addr -^ offset
+ ) else 0L
+ in
+
+ (* Do get_task_struct as a recursive subfunction so we don't have
+ * to pass around all the fixed arguments on the stack.
+ *)
+ let rec get_task_struct ~i addr accum =
+ if Accum.contains_addr accum addr then accum
+ else (
+ (* NOTE: The order of the following three statements is crucial
+ * to avoid cycles and malicious guests. Do not change it!
+ *)
+ let bits =
+ try Bitstring.bitstring_of_string (get_bytes mem addr (4096*8))
+ with Invalid_argument "get_bytes" -> raise (ShapeError i) in
+ let accum = Accum.add_address accum addr in
+ let accum = ref accum in
+
+ if debug then
+ eprintf "trying to match task_struct, i=%d %Lx\n%!" i addr;
+
+ bitmatch bits with
+ | { _ : n1*ws : bitstring; (* Ignore start of task_struct. *)
+
+ (* struct list_head tasks *)
+ tasks_next : ws : endian(e),
+ save_offset_to (offset),
+ bind (let addr = container_of tasks_next offset in
+ eprintf "offset = %d, tasks_next = %Lx, addr = %Lx\n"
+ offset tasks_next addr;
+ accum := get_task_struct ~i:1 addr !accum;
+ addr);
+ tasks_prev : ws : endian(e),
+ bind (let addr = container_of tasks_prev offset in
+ eprintf "offset = %d, tasks_prev = %Lx, addr = %Lx\n"
+ offset tasks_prev addr;
+ accum := get_task_struct ~i:1 addr !accum;
+ addr);
+
+ _ : n2*ws : bitstring;
+
+ (* struct task_struct *parent *)
+ parent : ws : endian(e),
+ bind (let addr = parent in
+ accum := get_task_struct ~i:2 addr !accum;
+ addr);
+
+ (* struct list_head children *)
+ children_next : ws : endian(e),
+ save_offset_to (offset),
+ bind (let addr = container_of children_next offset in
+ accum := get_task_struct ~i:2 addr !accum;
+ addr);
+
+ children_prev : ws : endian(e),
+ bind (let addr = container_of children_prev offset in
+ accum := get_task_struct ~i:2 addr !accum;
+ addr);
+
+ (* struct list_head sibling *)
+ sibling_next : ws : endian(e),
+ save_offset_to (offset),
+ bind (let addr = container_of sibling_next offset in
+ accum := get_task_struct ~i:2 addr !accum;
+ addr);
+
+ sibling_prev : ws : endian(e),
+ bind (let addr = container_of sibling_prev offset in
+ accum := get_task_struct ~i:2 addr !accum;
+ addr);
+
+ (* struct task_struct *group_leader *)
+ group_leader : ws : endian(e),
+ bind (let addr = group_leader in
+ accum := get_task_struct ~i:2 addr !accum;
+ addr)
+ } ->
+ (* Successful match, so return the updated accumulator. *)
+ let accum = !accum in
+ let accum = Accum.add_task_struct accum addr { ts_addr = addr } in
+ accum
+
+ | { _ } ->
+ (* Unsuccessful match, throw an exception. *)
+ raise (ShapeError (-1))
+ )
+ in
+ get_task_struct ~i:0 addr accum
+
+(* This is the directed search function. *)
+let search debug mem lookup_ksym =
+ let ws = get_wordsize mem in
+ let ws = match ws with W32 -> 32 | W64 -> 64 in
+ let e = get_endian mem in
+ let wse = ws, e in
+
+ let init_task =
+ try lookup_ksym "init_task"
+ with Not_found ->
+ eprintf "virt-ps: lookup_ksym of init_task failed\n";
+ exit 1 in
+
+ let accum = Accum.empty in
+
+ let rec loop n1 n2 =
+ try
+ if debug then eprintf "search: trying (%d, %d)\n" n1 n2;
+ let ts = get_task_struct debug mem wse (n1, n2) init_task accum in
+ if debug then eprintf "search: success (%d, %d)\n" n1 n2;
+ ts
+ with
+ | ShapeError ((-1|0|1) as i) ->
+ if debug then eprintf "search: ShapeError %d\n" i;
+ loop (n1+1) n2
+ | ShapeError 2 ->
+ if debug then eprintf "search: ShapeError 2\n";
+ loop n1 (n2+1)
+ in
+ let ts = loop 0 0 in
+ ()
+
+let run debug images =
+ List.iter (
+ fun (domid, domname, arch, mem, lookup_ksym) ->
+ search debug mem lookup_ksym
+ ) images
let summary = s_"list processes in virtual machine"
let description = s_"\