(* Memory info for virtual domains.
   (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc.
   http://libvirt.org/

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *)

open Printf

open Virt_mem_gettext.Gettext
open Virt_mem_utils
open Virt_mem_mmap

(* The implementation of 'ps' has gone through a number of complete
 * rewrites.  This explains the "leading theories" of how to implement
 * this.  Probably some sort of hybrid method is the way to go.
 *
 * General comments: The init_task ksym points to the initial
 * task_struct (PID 1, init).  Out from this init task_struct
 * we find linked lists (struct list_head) which cover all
 * tasks and subsets of the tasks (eg. parent/children).
 * So if we have init_task and we know what the task_struct
 * looks like, then we can follow these chains of pointers
 * to find all processes in the kernel.
 *
 * task_struct varies greatly depending on: word size, kernel
 * version, CONFIG_* settings, and vendor/additional patches.
 *
 * (Theory 1) Precompiled task_struct.  We can easily and reliably
 * determine the Linux kernel version (see virt-uname).  In
 * theory we could compile a list of known kernel versions,
 * check out their sources beforehand, and find the absolute
 * layout of the task_struct (eg. using CIL).  This method would
 * only work for known kernel versions, but has the advantage
 * that all fields in the task_struct would be known.
 *
 * (Theory 2) Fuzzy matched task_struct.  The task_struct has
 * a certain internal structure which is stable even over many
 * kernel revisions.  For example, groups of pointers always
 * occur together.  We search through init_task looking for
 * these characteristic features and where a pointer is found
 * to another task_struct we search that (recursively) on the
 * assumption that those contain the same features at the same
 * location.  This works well for pointers, but not so well for
 * finding other fields (eg. uids, process name, etc).  We can
 * defray the cost of searches by caching the results between
 * runs.
 *)

(* This accumulator stores a map of task_struct address to
 * task_struct content.  It can also store the address of a
 * task_struct which isn't fully parsed yet.
 *   key:   address (int64)
 *   value: task_struct option (None if not parsed yet)
 *)
type task_struct = {
  ts_addr : int64;
}

module Accum = struct
  include Map.Make (Int64)

  let contains_addr accum addr = mem addr accum
  let add_address accum addr = add addr None accum
  let add_task_struct accum addr ts = add addr (Some ts) accum
end

exception ShapeError of int

(* Parse (recursively) all task_structs, starting at init_task.
 *
 * Either returns an accumulator of task_structs, or raises an
 * exception.
 *
 * On failure, the caller can try again with a different shape.
 * The exception gives some information back to the caller about
 * where the match failed, allowing faster, directed searches.
 *)
let get_task_struct debug mem ((ws,e) as wse) ((n1,n2) as shape)
    addr accum =

  (* The 'struct list_head', the standard double-linked list used
   * by Linux, contains next and prev pointers.  However these point
   * not to the beginning of the struct, but to the offset of the
   * list_head within the struct.  This function adjusts the pointer
   * to get back to the beginning of the struct.
   *)
  let container_of addr bitoffset =
    if addr <> 0L then (
      let offset = Int64.of_int (bitoffset lsr 3) in
      addr -^ offset
    ) else 0L
  in

  (* Do get_task_struct as a recursive subfunction so we don't have
   * to pass around all the fixed arguments on the stack.
   *)
  let rec get_task_struct ~i addr accum =
    if Accum.contains_addr accum addr then accum
    else (
      (* NOTE: The order of the following three statements is crucial
       * to avoid cycles and malicious guests.  Do not change it!
       *)
      let bits =
	try Bitstring.bitstring_of_string (get_bytes mem addr (4096*8))
	with Invalid_argument "get_bytes" -> raise (ShapeError i) in
      let accum = Accum.add_address accum addr in
      let accum = ref accum in

      if debug then
	eprintf "trying to match task_struct, i=%d %Lx\n%!" i addr;

      bitmatch bits with
      | { _ : n1*ws : bitstring;    (* Ignore start of task_struct. *)

	  (* struct list_head tasks *)
	  tasks_next : ws : endian(e),
	    save_offset_to (offset),
            bind (let addr = container_of tasks_next offset in
	      eprintf "offset = %d, tasks_next = %Lx, addr = %Lx\n"
		offset tasks_next addr;
		  accum := get_task_struct ~i:1 addr !accum;
		  addr);
	  tasks_prev : ws : endian(e),
            bind (let addr = container_of tasks_prev offset in
	      eprintf "offset = %d, tasks_prev = %Lx, addr = %Lx\n"
		offset tasks_prev addr;
		  accum := get_task_struct ~i:1 addr !accum;
		  addr);

	  _ : n2*ws : bitstring;

	  (* struct task_struct *parent *)
	  parent : ws : endian(e),
            bind (let addr = parent in
		  accum := get_task_struct ~i:2 addr !accum;
		  addr);

	  (* struct list_head children *)
	  children_next : ws : endian(e),
	    save_offset_to (offset),
            bind (let addr = container_of children_next offset in
		  accum := get_task_struct ~i:2 addr !accum;
		  addr);

	  children_prev : ws : endian(e),
            bind (let addr = container_of children_prev offset in
		  accum := get_task_struct ~i:2 addr !accum;
		  addr);

	  (* struct list_head sibling *)
	  sibling_next : ws : endian(e),
	    save_offset_to (offset),
            bind (let addr = container_of sibling_next offset in
		  accum := get_task_struct ~i:2 addr !accum;
		  addr);

	  sibling_prev : ws : endian(e),
            bind (let addr = container_of sibling_prev offset in
		  accum := get_task_struct ~i:2 addr !accum;
		  addr);

	  (* struct task_struct *group_leader *)
	  group_leader : ws : endian(e),
            bind (let addr = group_leader in
		  accum := get_task_struct ~i:2 addr !accum;
		  addr)
	} ->
	  (* Successful match, so return the updated accumulator. *)
	  let accum = !accum in
	  let accum = Accum.add_task_struct accum addr { ts_addr = addr } in
	  accum

      | { _ } ->
	  (* Unsuccessful match, throw an exception. *)
	  raise (ShapeError (-1))
    )
  in
  get_task_struct ~i:0 addr accum

(* This is the directed search function. *)
let search debug mem lookup_ksym =
  let ws = get_wordsize mem in
  let ws = match ws with W32 -> 32 | W64 -> 64 in
  let e = get_endian mem in
  let wse = ws, e in

  let init_task =
    try lookup_ksym "init_task"
    with Not_found ->
      eprintf "virt-ps: lookup_ksym of init_task failed\n";
      exit 1 in

  let accum = Accum.empty in

  let rec loop n1 n2 =
    try
      if debug then eprintf "search: trying (%d, %d)\n" n1 n2;
      let ts = get_task_struct debug mem wse (n1, n2) init_task accum in
      if debug then eprintf "search: success (%d, %d)\n" n1 n2;
      ts
    with
    | ShapeError ((-1|0|1) as i) ->
	if debug then eprintf "search: ShapeError %d\n" i;
	loop (n1+1) n2
    | ShapeError 2 ->
	if debug then eprintf "search: ShapeError 2\n";
	loop n1 (n2+1)
  in
  let ts = loop 0 0 in
  ()

let run debug images =
  List.iter (
    fun (domid, domname, arch, mem, lookup_ksym) ->
      search debug mem lookup_ksym
  ) images

let summary = s_"list processes in virtual machine"
let description = s_"\
virt-ps prints a process listing for virtual machines running under
libvirt."

let () = Virt_mem.register "ps" summary description ~run