From c15c1624692d506eefe5f2cb2d775a6fb9127589 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 Jan 1970 00:00:00 +0000
Subject: [PATCH] Separate out the parsing code into a separately defined
 module. *NOT WORKING*

---
 MANIFEST                              |   2 +
 extract/codegen/.depend               |   4 +
 extract/codegen/Makefile.in           |   2 +-
 extract/codegen/kerneldb_to_parser.ml | 433 +++++++---------------------------
 extract/codegen/pahole_parser.ml      | 390 ++++++++++++++++++++++++++++++
 extract/codegen/pahole_parser.mli     |  91 +++++++
 6 files changed, 569 insertions(+), 353 deletions(-)
 create mode 100644 extract/codegen/pahole_parser.ml
 create mode 100644 extract/codegen/pahole_parser.mli

diff --git a/MANIFEST b/MANIFEST
index b7877ed..fab3591 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -6,6 +6,8 @@ dmesg/Makefile.in
 dmesg/virt_dmesg.ml
 extract/codegen/.depend
 extract/codegen/kerneldb_to_parser.ml
+extract/codeget/pahole_parser.ml
+extract/codeget/pahole_parser.mli
 extract/codegen/Makefile.in
 extract/fedora-koji/.depend
 extract/fedora-koji/fedora_koji_download_kernels.ml
diff --git a/extract/codegen/.depend b/extract/codegen/.depend
index e69de29..bbed776 100644
--- a/extract/codegen/.depend
+++ b/extract/codegen/.depend
@@ -0,0 +1,4 @@
+kerneldb_to_parser.cmo: pahole_parser.cmi 
+kerneldb_to_parser.cmx: pahole_parser.cmx 
+pahole_parser.cmo: pahole_parser.cmi 
+pahole_parser.cmx: pahole_parser.cmi 
diff --git a/extract/codegen/Makefile.in b/extract/codegen/Makefile.in
index eeb8774..8054b55 100644
--- a/extract/codegen/Makefile.in
+++ b/extract/codegen/Makefile.in
@@ -35,7 +35,7 @@ OCAMLOPTLIBS	= -linkpkg camlp4lib.cmxa
 
 TARGETS		= kerneldb-to-parser.opt
 
-OBJS		= kerneldb_to_parser.cmo
+OBJS		= pahole_parser.cmo kerneldb_to_parser.cmo
 XOBJS		= $(OBJS:.cmo=.cmx)
 
 all:	$(TARGETS)
diff --git a/extract/codegen/kerneldb_to_parser.ml b/extract/codegen/kerneldb_to_parser.ml
index e4c7519..177d607 100644
--- a/extract/codegen/kerneldb_to_parser.ml
+++ b/extract/codegen/kerneldb_to_parser.ml
@@ -131,7 +131,7 @@ let structs = [
   };
 ]
 
-let debug = false
+let debug = true
 
 open Camlp4.PreCast
 open Syntax
@@ -141,6 +141,8 @@ open ExtList
 open ExtString
 open Printf
 
+module PP = Pahole_parser
+
 let (//) = Filename.concat
 
 (* Couple of handy camlp4 construction functions which do some
@@ -211,366 +213,83 @@ Example (from toplevel of virt-mem source tree):
 " arg0 arg0 arg0;
 	exit 2 in
 
-  (* Get the *.info files from the kernels database. *)
-  let infos = Sys.readdir kernelsdir in
-  let infos = Array.to_list infos in
-  let infos = List.filter (fun name -> String.ends_with name ".info") infos in
-  let infos = List.map ( (//) kernelsdir) infos in
-
-  (* Regular expressions.  We really really should use ocaml-mikmatch ... *)
-  let re_oldformat = Pcre.regexp "^RPM: \\d+: \\(build \\d+\\) ([-\\w]+) ([\\w.]+) ([\\w.]+) \\(.*?\\) (\\w+)" in
-  let re_keyvalue = Pcre.regexp "^(\\w+): (.*)" in
+  let kernels = PP.list_kernels kernelsdir in
+  let nr_kernels = List.length kernels in
 
-  (* Parse in the *.info files.  These have historically had a few different
-   * formats that we need to support.
-   *)
-  let infos = List.map (
-    fun filename ->
-      (* Get the basename (for getting the .data file later on). *)
-      let basename = Filename.chop_suffix filename ".info" in
-
-      let chan = open_in filename in
-      let line = input_line chan in
-
-      (* Kernel version string. *)
-      let version, arch =
-	if Pcre.pmatch ~rex:re_oldformat line then (
-	  (* If the file starts with "RPM: \d+: ..." then it's the
-	   * original Fedora format.  Everything in one line.
-	   *)
-	  let subs = Pcre.exec ~rex:re_oldformat line in
-	  (* let name = Pcre.get_substring subs 1 in *)
-	  let version = Pcre.get_substring subs 2 in
-	  let release = Pcre.get_substring subs 3 in
-	  let arch = Pcre.get_substring subs 4 in
-	  close_in chan;
-	  (* XXX Map name -> PAE, hugemem etc. *)
-	  (* name, *) sprintf "%s-%s.%s" version release arch, arch
-	) else (
-	  (* New-style "key: value" entries, up to end of file or the first
-	   * blank line.
-	   *)
-	  let (*name,*) version, release, arch =
-	    (*ref "",*) ref "", ref "", ref "" in
-	  let rec loop line =
-	    try
-	      let subs = Pcre.exec ~rex:re_keyvalue line in
-	      let key = Pcre.get_substring subs 1 in
-	      let value = Pcre.get_substring subs 2 in
-	      (*if key = "Name" then name := value
-	      else*) if key = "Version" then version := value
-	      else if key = "Release" then release := value
-	      else if key = "Architecture" then arch := value;
-	      let line = input_line chan in
-	      loop line
-	    with
-	      Not_found | End_of_file ->
-		close_in chan
-	  in
-	  loop line;
-	  let (*name,*) version, release, arch =
-	    (*!name,*) !version, !release, !arch in
-	  if (*name = "" ||*) version = "" || release = "" || arch = "" then
-	    failwith (sprintf "%s: missing Name, Version, Release or Architecture key" filename);
-	  (* XXX Map name -> PAE, hugemem etc. *)
-	  (* name, *) sprintf "%s-%s.%s" version release arch, arch
-	) in
-
-      (*printf "%s -> %s %s\n%!" basename version arch;*)
-
-      (basename, version, arch)
-  ) infos in
-
-  let nr_kernels = List.length infos in
-
-  (* For quick access to the opener strings, build a hash. *)
-  let openers = Hashtbl.create 13 in
-  List.iter (
-    fun (name, { opener = opener; closer = closer }) ->
-      Hashtbl.add openers opener (closer, name)
-  ) structs;
-
-  (* Now read the data files and parse out the structures of interest. *)
   let kernels = List.mapi (
-    fun i (basename, version, arch) ->
+    fun i info ->
       printf "Loading kernel data file %d/%d\r%!" (i+1) nr_kernels;
 
-      let file_exists name =
-	try Unix.access name [Unix.F_OK]; true
-	with Unix.Unix_error _ -> false
-      in
-      let close_process_in cmd chan =
-	match Unix.close_process_in chan with
-	| Unix.WEXITED 0 -> ()
-	| Unix.WEXITED i ->
-	    eprintf "%s: command exited with code %d\n" cmd i; exit i
-	| Unix.WSIGNALED i ->
-	    eprintf "%s: command exited with signal %d\n" cmd i; exit 1
-	| Unix.WSTOPPED i ->
-	    eprintf "%s: command stopped by signal %d\n" cmd i; exit 1
-      in
-
-      (* Open the data file, uncompressing it on the fly if necessary. *)
-      let chan, close =
-	if file_exists (basename ^ ".data") then
-	  open_in (basename ^ ".data"), close_in
-	else if file_exists (basename ^ ".data.gz") then (
-	  let cmd =
-	    sprintf "gzip -cd %s" (Filename.quote (basename ^ ".data.gz")) in
-	  Unix.open_process_in cmd, close_process_in cmd
-	)
-	else if file_exists (basename ^ ".data.bz2") then (
-	  let cmd =
-	    sprintf "bzip2 -cd %s" (Filename.quote (basename ^ ".data.bz2")) in
-	  Unix.open_process_in cmd, close_process_in cmd
-	) else
-	  failwith
-	    (sprintf "%s: cannot find corresponding data file" basename) in
-
-      (* Read the data file in, looking for structures of interest to us. *)
-      let bodies = Hashtbl.create 13 in
-      let rec loop () =
-	let line = input_line chan in
-
-	(* If the line is an opener for one of the structures we
-	 * are looking for, then for now just save all the text until
-	 * we get to the closer line.
-	 *)
-	(try
-	   let closer, name = Hashtbl.find openers line in
-	   let rec loop2 lines =
-	     let line = input_line chan in
-	     let lines = line :: lines in
-	     if String.starts_with line closer then List.rev lines
-	     else loop2 lines
-	   in
-
-	   let body =
-	     try loop2 [line]
-	     with End_of_file ->
-	       failwith (sprintf "%s: %s: %S not matched by closing %S" basename name line closer) in
-
-	   Hashtbl.replace bodies name body
-	 with Not_found -> ());
-
-	loop ()
-      in
-      (try loop () with End_of_file -> ());
-
-      close chan;
+      let struct_names = List.map fst structs in
+      let structures = PP.load_structures info struct_names in
 
-      (* Make sure we got all the mandatory structures. *)
+      (* Make sure we got all the mandatory structures & fields. *)
       List.iter (
-	 fun (name, { mandatory_struct = mandatory }) ->
-	   if mandatory && not (Hashtbl.mem bodies name) then
-	     failwith (sprintf "%s: structure %s not found in this kernel" basename name)
-      ) structs;
-
-      (basename, version, arch, bodies)
-  ) infos in
-
-  (* Now parse each structure body.
-   * XXX This would be better as a proper lex/yacc parser.
-   * XXX Even better would be to have a proper interface to libdwarves.
-   *)
-  let re_offsetsize = Pcre.regexp "/\\*\\s+(\\d+)\\s+(\\d+)\\s+\\*/" in
-  let re_intfield = Pcre.regexp "(?:int|char)\\s+(\\w+);" in
-  let re_ptrfield = Pcre.regexp "struct\\s+(\\w+)\\s*\\*\\s*(\\w+);" in
-  let re_voidptrfield = Pcre.regexp "void\\s*\\*\\s*(\\w+);" in
-  let re_strfield = Pcre.regexp "char\\s+(\\w+)\\[(\\d+)\\];" in
-  let re_structopener = Pcre.regexp "(struct|union)\\s+.*{$" in
-  let re_structcloser = Pcre.regexp "}\\s*(\\w+)?(\\[\\d+\\])?;" in
-
-  (* 'basename' is the source file, and second parameter ('body') is
-   * the list of text lines which covers this structure (minus the
-   * opener line).  Result is the list of parsed fields from this
-   * structure.
-   *)
-  let rec parse basename = function
-    | [] -> assert false
-    | [_] -> []			 (* Just the closer line, finished. *)
-    | line :: lines when Pcre.pmatch ~rex:re_structopener line ->
-      (* Recursively parse a sub-structure.  First search for the
-       * corresponding closer line.
-       *)
-      let rec loop depth acc = function
-	| [] ->
-	    eprintf "%s: %S has no matching close structure line\n%!"
-	      basename line;
-	    assert false
-	| line :: lines when Pcre.pmatch ~rex:re_structopener line ->
-	  loop (depth+1) (line :: acc) lines
-	| line :: lines
-	    when depth = 0 && Pcre.pmatch ~rex:re_structcloser line ->
-	  (line :: acc), lines
-	| line :: lines
-	    when depth > 0 && Pcre.pmatch ~rex:re_structcloser line ->
-	  loop (depth-1) (line :: acc) lines
-	| line :: lines -> loop depth (line :: acc) lines
-      in
-      let nested_body, rest = loop 0 [] lines in
-
-      (* Then parse the sub-structure. *)
-      let struct_name, nested_body =
-	match nested_body with
-	| [] -> assert false
-	| closer :: _ ->
-	    let subs = Pcre.exec ~rex:re_structcloser closer in
-	    let struct_name =
-	      try Some (Pcre.get_substring subs 1) with Not_found -> None in
-	    struct_name, List.rev nested_body in
-      let nested_fields = parse basename nested_body in
-
-      (* Prefix the sub-fields with the name of the structure. *)
-      let nested_fields =
-	match struct_name with
-	| None -> nested_fields
-	| Some prefix ->
-            List.map (
-              fun (name, details) -> (prefix ^ "'" ^ name, details)
-            ) nested_fields in
-
-      (* Parse the rest. *)
-      nested_fields @ parse basename rest
-
-    | line :: lines when Pcre.pmatch ~rex:re_intfield line ->
-      (* An int field. *)
-      let subs = Pcre.exec ~rex:re_intfield line in
-      let name = Pcre.get_substring subs 1 in
-      (try
-	 let subs = Pcre.exec ~rex:re_offsetsize line in
-	 let offset = int_of_string (Pcre.get_substring subs 1) in
-	 let size = int_of_string (Pcre.get_substring subs 2) in
-	 (name, (`Int, offset, size)) :: parse basename lines
-       with
-	 Not_found -> parse basename lines
-      );
-
-    | line :: lines when Pcre.pmatch ~rex:re_ptrfield line ->
-      (* A pointer-to-struct field. *)
-      let subs = Pcre.exec ~rex:re_ptrfield line in
-      let struct_name = Pcre.get_substring subs 1 in
-      let name = Pcre.get_substring subs 2 in
-      (try
-	 let subs = Pcre.exec ~rex:re_offsetsize line in
-	 let offset = int_of_string (Pcre.get_substring subs 1) in
-	 let size = int_of_string (Pcre.get_substring subs 2) in
-	 (name, (`Ptr struct_name, offset, size))
-	   :: parse basename lines
-       with
-	 Not_found -> parse basename lines
-      );
-
-    | line :: lines when Pcre.pmatch ~rex:re_voidptrfield line ->
-      (* A void* field. *)
-      let subs = Pcre.exec ~rex:re_voidptrfield line in
-      let name = Pcre.get_substring subs 1 in
-      (try
-	 let subs = Pcre.exec ~rex:re_offsetsize line in
-	 let offset = int_of_string (Pcre.get_substring subs 1) in
-	 let size = int_of_string (Pcre.get_substring subs 2) in
-	 (name, (`VoidPtr, offset, size))
-	   :: parse basename lines
-       with
-	 Not_found -> parse basename lines
-      );
-
-    | line :: lines when Pcre.pmatch ~rex:re_strfield line ->
-      (* A string (char array) field. *)
-      let subs = Pcre.exec ~rex:re_strfield line in
-      let name = Pcre.get_substring subs 1 in
-      let width = int_of_string (Pcre.get_substring subs 2) in
-      (try
-	 let subs = Pcre.exec ~rex:re_offsetsize line in
-	 let offset = int_of_string (Pcre.get_substring subs 1) in
-	 let size = int_of_string (Pcre.get_substring subs 2) in
-	 (name, (`Str width, offset, size))
-	   :: parse basename lines
-       with
-	 Not_found -> parse basename lines
-      );
-
-    | _ :: lines ->
-	(* Just ignore any other field we can't parse. *)
-	parse basename lines
+	fun (struct_name,
+	     { mandatory_struct = mandatory; fields = wanted_fields }) ->
+	  try
+	    let s =
+	      List.find (fun s -> struct_name = s.PP.struct_name)
+		structures in
+
+	    (* Check we have all the mandatory fields. *)
+	    let all_fields = s.PP.struct_fields in
+	    List.iter (
+	      fun (wanted_field, { mandatory_field = mandatory }) ->
+		let got_it =
+		  List.exists (
+		    fun { PP.field_name = name } -> name = wanted_field
+		  ) all_fields in
+		if mandatory && not got_it then (
+		  eprintf "%s: structure %s is missing required field %s\n"
+		    info.PP.basename struct_name wanted_field;
+		  eprintf "fields found in this structure:\n";
+		  List.iter (
+		    fun { PP.field_name = name } -> eprintf "\t%s\n" name
+		  ) all_fields;
+		  exit 1
+		);
+	    ) wanted_fields
 
-  in
+	  with Not_found ->
+	    if mandatory then
+	      failwith (sprintf "%s: structure %s not found in this kernel"
+			  info.PP.basename struct_name)
+      ) structs;
 
-  let kernels = List.map (
-    fun (basename, version, arch, bodies) ->
-      let structures = List.filter_map (
-	fun (struct_name, { fields = wanted_fields }) ->
-	  let body =
-	    try Some (Hashtbl.find bodies struct_name)
-	    with Not_found -> None in
-	  match body with
-	  | None -> None
-	  | Some body ->
-	      let body = List.tl body in (* Don't care about opener line. *)
-	      let fields = parse basename body in
-
-	      (* Compute total size of the structure. *)
-	      let total_size =
-		let fields = List.map (
-		  fun (_, (_, offset, size)) -> offset + size
-		) fields in
-		List.fold_left max 0 fields in
-
-	      (* That got us all the fields, but we only care about
-	       * the wanted_fields.
-	       *)
-	      let all_fields = fields in
-	      let fields = List.filter (
-		fun (name, _) -> List.mem_assoc name wanted_fields
+      let structures =
+	List.map (
+	  fun ({ PP.struct_name = struct_name; PP.struct_fields = fields }
+		 as structure) ->
+	    let { fields = wanted_fields } = List.assoc struct_name structs in
+
+	    (* That got us all the fields, but we only care about
+	     * the wanted_fields.
+	     *)
+	    let fields = List.filter (
+	      fun { PP.field_name = name } -> List.mem_assoc name wanted_fields
+	    ) fields in
+
+	    (* Prefix all the field names with the structure name. *)
+	    let fields =
+	      List.map (
+		fun ({ PP.field_name = name } as field) ->
+		  let name = struct_name ^ "_" ^ name in
+		  { field with PP.field_name = name }
 	      ) fields in
+	    { structure with PP.struct_fields = fields }
+	) structures in
 
-	      (* Also check we have all the mandatory fields. *)
-	      List.iter (
-		fun (wanted_field, { mandatory_field = mandatory }) ->
-		  if mandatory && not (List.mem_assoc wanted_field fields)
-		  then (
-		    eprintf "%s: structure %s is missing required field %s\n" basename struct_name wanted_field;
-		    eprintf "fields found in this structure:\n";
-		    List.iter (
-		      fun (name, _) -> eprintf "\t%s\n" name
-		    ) all_fields;
-		    exit 1
-		  );
-	      ) wanted_fields;
-
-	      (* Prefix all the field names with the structure name. *)
-	      let fields =
-		List.map (fun (name, details) ->
-			    struct_name ^ "_" ^ name, details) fields in
-
-	      Some (struct_name, (fields, total_size))
-      ) structs in
-
-      (basename, version, arch, structures)
+      (info, structures)
   ) kernels in
 
   if debug then
     List.iter (
-      fun (basename, version, arch, structures) ->
-	printf "%s (version: %s, arch: %s):\n" basename version arch;
+      fun (info, structures) ->
+	printf "%s ----------\n" (PP.string_of_info info);
 	List.iter (
-	  fun (struct_name, (fields, total_size)) ->
-	    printf "  struct %s {\n" struct_name;
-	    List.iter (
-	      fun (field_name, (typ, offset, size)) ->
-		(match typ with
-		 | `Int ->
-                     printf "    int %s; " field_name
-		 | `Ptr struct_name ->
-		     printf "    struct %s *%s; " struct_name field_name
-		 | `VoidPtr ->
-		     printf "    void *%s; " field_name
-		 | `Str width ->
-		     printf "    char %s[%d]; " field_name width
-		);
-		printf " /* offset = %d, size = %d */\n" offset size
-	    ) fields;
-	    printf "  } /* %d bytes */\n\n" total_size;
+	  fun structure ->
+	    printf "%s\n\n" (PP.string_of_structure structure);
 	) structures;
     ) kernels;
 
@@ -580,7 +299,9 @@ Example (from toplevel of virt-mem source tree):
   let () =
     let _loc = Loc.ghost in
 
-    let versions = List.map (fun (_, version, _, _) -> version) kernels in
+    let versions = List.map (
+      fun ({ PP.kernel_version = version }, _) -> version
+    ) kernels in
 
     (* Sort them in reverse because we are going to generate the
      * final list in reverse.
@@ -607,12 +328,18 @@ Example (from toplevel of virt-mem source tree):
    *)
   let files =
     List.map (
-      fun (name, _) ->
+      fun (struct_name, _) ->
 	let kernels =
 	  List.filter_map (
-	    fun (basename, version, arch, structures) ->
-	      try Some (basename, version, arch, List.assoc name structures)
-	      with Not_found -> None
+	    fun (info, structures) ->
+	      try
+		let structure =
+		  List.find (
+		    fun { PP.struct_name = name } -> name = struct_name
+		  ) structures in
+		Some (info, structure)
+	      with Not_found ->
+		None
 	  ) kernels in
 
 	(* Sort the kernels, which makes the generated output more stable
@@ -620,11 +347,12 @@ Example (from toplevel of virt-mem source tree):
 	 *)
 	let kernels = List.sort kernels in
 
-	name, kernels
+	struct_name, kernels
     ) structs in
 
   let kernels = () in ignore kernels; (* garbage collect *)
 
+(*
   (* Get just the field types.
    *
    * It's plausible that a field with the same name has a different
@@ -1063,3 +791,4 @@ Example (from toplevel of virt-mem source tree):
 
       Unix.unlink new_output_file
   ) files
+*)
diff --git a/extract/codegen/pahole_parser.ml b/extract/codegen/pahole_parser.ml
new file mode 100644
index 0000000..1798b44
--- /dev/null
+++ b/extract/codegen/pahole_parser.ml
@@ -0,0 +1,390 @@
+(* Memory info command for virtual domains.
+   (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc.
+   http://libvirt.org/
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *)
+
+open ExtList
+open ExtString
+
+open Printf
+
+let (//) = Filename.concat
+
+type pathname = string
+
+type info = {
+  kernel_version : string;
+  arch : string;
+  basename : string;
+}
+
+type structure = {
+  struct_name : string;
+  struct_total_size : int;
+  struct_fields : field list;
+}
+
+and field = {
+  field_name : string;
+  field_type : f_type;
+  field_offset : int;
+  field_size : int;
+}
+
+and f_type =
+  | FStructPointer of string
+  | FVoidPointer
+  | FListHeadPointer
+  | FInteger
+  | FString of int
+
+let string_of_info i =
+  sprintf "%s: %s %s" i.basename i.kernel_version i.arch
+
+let rec string_of_structure s =
+  let fields = List.map string_of_field s.struct_fields in
+  let fields = String.concat "\n  " fields in
+  sprintf "struct %s {\n  %s\n}; /* total size = %d bytes */"
+    s.struct_name fields s.struct_total_size
+
+and string_of_field f =
+  sprintf "%s %s; /* offset = %d, size = %d */"
+    f.field_name (string_of_f_type f.field_type)
+    f.field_offset f.field_size
+
+and string_of_f_type = function
+  | FStructPointer struct_name -> sprintf "struct %s*" struct_name
+  | FVoidPointer -> "void *"
+  | FListHeadPointer -> "struct list_head *"
+  | FInteger -> "int"
+  | FString width -> sprintf "char[%d]" width
+
+(* Regular expressions.  We really really should use ocaml-mikmatch ... *)
+let re_oldformat = Pcre.regexp "^RPM: \\d+: \\(build \\d+\\) ([-\\w]+) ([\\w.]+) ([\\w.]+) \\(.*?\\) (\\w+)"
+let re_keyvalue = Pcre.regexp "^(\\w+): (.*)"
+
+let list_kernels path =
+  (* Get the *.info files from the kernels database. *)
+  let infos = Sys.readdir path in
+  let infos = Array.to_list infos in
+  let infos = List.filter (fun name -> String.ends_with name ".info") infos in
+  let infos = List.map ( (//) path) infos in
+
+  (* Parse in the *.info files.  These have historically had a few different
+   * formats that we need to support.
+   *)
+  let infos = List.map (
+    fun filename ->
+      (* Get the basename (for getting the .data file later on). *)
+      let basename = Filename.chop_suffix filename ".info" in
+
+      let chan = open_in filename in
+      let line = input_line chan in
+
+      (* Kernel version string. *)
+      let version, arch =
+	if Pcre.pmatch ~rex:re_oldformat line then (
+	  (* If the file starts with "RPM: \d+: ..." then it's the
+	   * original Fedora format.  Everything in one line.
+	   *)
+	  let subs = Pcre.exec ~rex:re_oldformat line in
+	  (* let name = Pcre.get_substring subs 1 in *)
+	  let version = Pcre.get_substring subs 2 in
+	  let release = Pcre.get_substring subs 3 in
+	  let arch = Pcre.get_substring subs 4 in
+	  close_in chan;
+	  (* XXX Map name -> PAE, hugemem etc. *)
+	  (* name, *) sprintf "%s-%s.%s" version release arch, arch
+	) else (
+	  (* New-style "key: value" entries, up to end of file or the first
+	   * blank line.
+	   *)
+	  let (*name,*) version, release, arch =
+	    (*ref "",*) ref "", ref "", ref "" in
+	  let rec loop line =
+	    try
+	      let subs = Pcre.exec ~rex:re_keyvalue line in
+	      let key = Pcre.get_substring subs 1 in
+	      let value = Pcre.get_substring subs 2 in
+	      (*if key = "Name" then name := value
+	      else*) if key = "Version" then version := value
+	      else if key = "Release" then release := value
+	      else if key = "Architecture" then arch := value;
+	      let line = input_line chan in
+	      loop line
+	    with
+	      Not_found | End_of_file ->
+		close_in chan
+	  in
+	  loop line;
+	  let (*name,*) version, release, arch =
+	    (*!name,*) !version, !release, !arch in
+	  if (*name = "" ||*) version = "" || release = "" || arch = "" then
+	    failwith (sprintf "%s: missing Name, Version, Release or Architecture key" filename);
+	  (* XXX Map name -> PAE, hugemem etc. *)
+	  (* name, *) sprintf "%s-%s.%s" version release arch, arch
+	) in
+
+      (*printf "%s -> %s %s\n%!" basename version arch;*)
+
+      { basename = basename; arch = arch;
+	kernel_version = version }
+  ) infos in
+  infos
+
+(* XXX This would be better as a proper lex/yacc parser.
+ * XXX Even better would be to have a proper interface to libdwarves.
+ *)
+let re_offsetsize = Pcre.regexp "/\\*\\s+(\\d+)\\s+(\\d+)\\s+\\*/"
+let re_intfield = Pcre.regexp "(?:int|char)\\s+(\\w+);"
+let re_ptrfield = Pcre.regexp "struct\\s+(\\w+)\\s*\\*\\s*(\\w+);"
+let re_voidptrfield = Pcre.regexp "void\\s*\\*\\s*(\\w+);"
+let re_strfield = Pcre.regexp "char\\s+(\\w+)\\[(\\d+)\\];"
+let re_structopener = Pcre.regexp "(struct|union)\\s+.*{$"
+let re_structcloser = Pcre.regexp "}\\s*(\\w+)?(\\[\\d+\\])?;"
+
+let load_structures { basename = basename } struct_names =
+  (* For quick access to the opener strings, build a hash. *)
+  let openers = Hashtbl.create 13 in
+  List.iter (
+    fun struct_name ->
+      let opener = sprintf "struct %s {" struct_name in
+      let closer = "};" in
+      Hashtbl.add openers opener (closer, struct_name)
+  ) struct_names;
+
+  (* Now read the data file and parse out the structures of interest. *)
+  let file_exists name =
+    try Unix.access name [Unix.F_OK]; true
+    with Unix.Unix_error _ -> false
+  in
+  let close_process_in cmd chan =
+    match Unix.close_process_in chan with
+    | Unix.WEXITED 0 -> ()
+    | Unix.WEXITED i ->
+	eprintf "%s: command exited with code %d\n" cmd i; exit i
+    | Unix.WSIGNALED i ->
+	eprintf "%s: command exited with signal %d\n" cmd i; exit 1
+    | Unix.WSTOPPED i ->
+	eprintf "%s: command stopped by signal %d\n" cmd i; exit 1
+  in
+
+  (* Open the data file, uncompressing it on the fly if necessary. *)
+  let chan, close =
+    if file_exists (basename ^ ".data") then
+      open_in (basename ^ ".data"), close_in
+    else if file_exists (basename ^ ".data.gz") then (
+      let cmd =
+	sprintf "gzip -cd %s" (Filename.quote (basename ^ ".data.gz")) in
+      Unix.open_process_in cmd, close_process_in cmd
+    )
+    else if file_exists (basename ^ ".data.bz2") then (
+      let cmd =
+	sprintf "bzip2 -cd %s" (Filename.quote (basename ^ ".data.bz2")) in
+      Unix.open_process_in cmd, close_process_in cmd
+    ) else
+      failwith (sprintf "%s: cannot find corresponding data file" basename) in
+
+  (* Read the data file in, looking for structures of interest to us. *)
+  let bodies = Hashtbl.create 13 in
+  let rec loop () =
+    let line = input_line chan in
+
+    (* If the line is an opener for one of the structures we
+     * are looking for, then for now just save all the text until
+     * we get to the closer line.
+     *)
+    (try
+       let closer, struct_name = Hashtbl.find openers line in
+       let rec loop2 lines =
+	 let line = input_line chan in
+	 let lines = line :: lines in
+	 if String.starts_with line closer then List.rev lines
+	 else loop2 lines
+       in
+
+       let body =
+	 try loop2 [line]
+	 with End_of_file ->
+	   failwith (sprintf "%s: %s: %S not matched by closing %S" basename struct_name line closer) in
+
+       Hashtbl.replace bodies struct_name body
+     with Not_found -> ());
+
+    loop ()
+  in
+  (try loop () with End_of_file -> ());
+
+  close chan;
+
+  (* Now parse each structure body. *)
+
+  (* 'basename' is the source file, and second parameter ('body') is
+   * the list of text lines which covers this structure (minus the
+   * opener line).  Result is the list of parsed fields from this
+   * structure.
+   *)
+  let rec parse basename = function
+    | [] -> assert false
+    | [_] -> []			 (* Just the closer line, finished. *)
+    | line :: lines when Pcre.pmatch ~rex:re_structopener line ->
+      (* Recursively parse a sub-structure.  First search for the
+       * corresponding closer line.
+       *)
+      let rec loop depth acc = function
+	| [] ->
+	    eprintf "%s: %S has no matching close structure line\n%!"
+	      basename line;
+	    assert false
+	| line :: lines when Pcre.pmatch ~rex:re_structopener line ->
+	  loop (depth+1) (line :: acc) lines
+	| line :: lines
+	    when depth = 0 && Pcre.pmatch ~rex:re_structcloser line ->
+	  (line :: acc), lines
+	| line :: lines
+	    when depth > 0 && Pcre.pmatch ~rex:re_structcloser line ->
+	  loop (depth-1) (line :: acc) lines
+	| line :: lines -> loop depth (line :: acc) lines
+      in
+      let nested_body, rest = loop 0 [] lines in
+
+      (* Then parse the sub-structure. *)
+      let struct_name, nested_body =
+	match nested_body with
+	| [] -> assert false
+	| closer :: _ ->
+	    let subs = Pcre.exec ~rex:re_structcloser closer in
+	    let struct_name =
+	      try Some (Pcre.get_substring subs 1) with Not_found -> None in
+	    struct_name, List.rev nested_body in
+      let nested_fields = parse basename nested_body in
+
+      (* Prefix the sub-fields with the name of the structure. *)
+      let nested_fields =
+	match struct_name with
+	| None -> nested_fields
+	| Some prefix ->
+            List.map (
+              fun ({ field_name = name } as field) ->
+		let name = prefix ^ "'" ^ name in
+		{ field with field_name = name }
+            ) nested_fields in
+
+      (* Parse the rest. *)
+      nested_fields @ parse basename rest
+
+    | line :: lines when Pcre.pmatch ~rex:re_intfield line ->
+      (* An int field. *)
+      let subs = Pcre.exec ~rex:re_intfield line in
+      let name = Pcre.get_substring subs 1 in
+      (try
+	 let subs = Pcre.exec ~rex:re_offsetsize line in
+	 let offset = int_of_string (Pcre.get_substring subs 1) in
+	 let size = int_of_string (Pcre.get_substring subs 2) in
+	 let field =
+	   { field_name = name; field_type = FInteger;
+	     field_offset = offset; field_size = size } in
+	 field :: parse basename lines
+       with
+	 Not_found -> parse basename lines
+      );
+
+    | line :: lines when Pcre.pmatch ~rex:re_ptrfield line ->
+      (* A pointer-to-struct field. *)
+      let subs = Pcre.exec ~rex:re_ptrfield line in
+      let struct_name = Pcre.get_substring subs 1 in
+      let name = Pcre.get_substring subs 2 in
+      (try
+	 let subs = Pcre.exec ~rex:re_offsetsize line in
+	 let offset = int_of_string (Pcre.get_substring subs 1) in
+	 let size = int_of_string (Pcre.get_substring subs 2) in
+	 let field_type =
+	   if struct_name <> "list_head" then
+	     FStructPointer struct_name
+	   else
+	     FListHeadPointer in
+	 let field =
+	   { field_name = name; field_type = field_type;
+	     field_offset = offset; field_size = size } in
+	 field :: parse basename lines
+       with
+	 Not_found -> parse basename lines
+      );
+
+    | line :: lines when Pcre.pmatch ~rex:re_voidptrfield line ->
+      (* A void* field. *)
+      let subs = Pcre.exec ~rex:re_voidptrfield line in
+      let name = Pcre.get_substring subs 1 in
+      (try
+	 let subs = Pcre.exec ~rex:re_offsetsize line in
+	 let offset = int_of_string (Pcre.get_substring subs 1) in
+	 let size = int_of_string (Pcre.get_substring subs 2) in
+	 let field =
+	   { field_name = name; field_type = FVoidPointer;
+	     field_offset = offset; field_size = size } in
+	 field :: parse basename lines
+       with
+	 Not_found -> parse basename lines
+      );
+
+    | line :: lines when Pcre.pmatch ~rex:re_strfield line ->
+      (* A string (char array) field. *)
+      let subs = Pcre.exec ~rex:re_strfield line in
+      let name = Pcre.get_substring subs 1 in
+      let width = int_of_string (Pcre.get_substring subs 2) in
+      (try
+	 let subs = Pcre.exec ~rex:re_offsetsize line in
+	 let offset = int_of_string (Pcre.get_substring subs 1) in
+	 let size = int_of_string (Pcre.get_substring subs 2) in
+	 let field =
+	   { field_name = name; field_type = FString width;
+	     field_offset = offset; field_size = size } in
+	 field :: parse basename lines
+       with
+	 Not_found -> parse basename lines
+      );
+
+    | _ :: lines ->
+	(* Just ignore any other field we can't parse. *)
+	parse basename lines
+  in
+
+  let structures = List.filter_map (
+    fun struct_name ->
+      let body =
+	try Some (Hashtbl.find bodies struct_name)
+	with Not_found -> None in
+      match body with
+      | None -> None
+      | Some body ->
+	  let body = List.tl body in (* Don't care about opener line. *)
+	  let fields = parse basename body in
+
+	  (* Compute total size of the structure. *)
+	  let total_size =
+	    let fields = List.map (
+	      fun { field_offset = offset;
+		    field_size = size } -> offset + size
+	    ) fields in
+	    List.fold_left max 0 fields in
+
+	  Some { struct_name = struct_name;
+		 struct_fields = fields;
+		 struct_total_size = total_size }
+  ) struct_names in
+
+  structures
diff --git a/extract/codegen/pahole_parser.mli b/extract/codegen/pahole_parser.mli
new file mode 100644
index 0000000..f34b7e3
--- /dev/null
+++ b/extract/codegen/pahole_parser.mli
@@ -0,0 +1,91 @@
+(** 'pahole' output parser. *)
+(* Memory info command for virtual domains.
+   (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc.
+   http://libvirt.org/
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *)
+
+(** This parses the output of the pahole command, allowing us
+    to extract the layout of kernel structures for particular
+    kernel versions.
+
+    Its primary input is the [*.info] and [*.data*] files found
+    in the [kernels/] subdirectory (ie. the kerneldb).
+*)
+
+(** {2 Types} *)
+
+type pathname = string
+  (** Path and filenames. *)
+
+type info = {
+  kernel_version : string;	       (** Kernel version that this matches. *)
+  arch : string;		       (** Architecture, eg. "i686", "ppc64". *)
+  basename : string;		       (** [basename.info] is the info
+					   file and [basename.data*] is
+					   the data file. *)
+}
+  (** Kernel metainformation, extracted from the [*.info] file. *)
+
+type structure = {
+  struct_name : string;			(** Structure name. *)
+  struct_total_size : int;		(** Total size in bytes. *)
+  struct_fields : field list;		(** Fields in the structure. *)
+}
+  (** A kernel structure, eg. [task_struct]. *)
+
+and field = {
+  field_name : string;			(** Field name. *)
+  field_type : f_type;			(** Field type. *)
+  field_offset : int;			(** Offset within the structure. *)
+  field_size : int;			(** Size of the field (bytes). *)
+}
+  (** A kernel structure field.
+
+      Note that nested fields are flattened with single quotes (')
+      between elements, so you get names like [tasks'next]. *)
+
+and f_type =
+  | FStructPointer of string		(** A pointer to a named struct. *)
+  | FVoidPointer			(** A [void*] pointer. *)
+  | FListHeadPointer		        (** A pointer to a [list_head]. *)
+  | FInteger				(** An integer. *)
+  | FString of int			(** A char array of given width. *)
+  (** Type of a kernel field. *)
+
+val string_of_info : info -> string
+val string_of_structure : structure -> string
+val string_of_field : field -> string
+val string_of_f_type : f_type -> string
+  (** Printing functions. *)
+
+(** {2 List kernels in kerneldb} *)
+
+val list_kernels : pathname -> info list
+  (** Return a list of all the kernels in the kerneldb at [path]. *)
+
+(** {2 Load kernel structures} *)
+
+val load_structures : info -> string list -> structure list
+  (** [load_structures info names] loads the named kernel structures
+      from a particular kernel.
+
+      The returned list is not necessarily in the same order, or the
+      same length, as the [names] list.  Check the
+      {!structure.struct_name} field for the structure name.
+      Structures which don't actually occur in the given kernel are
+      not loaded and not present in the final list.
+  *)
-- 
1.8.3.1