From: Richard W.M. Jones <"Richard W.M. Jones "> Date: Wed, 13 Aug 2008 14:36:22 +0000 (+0100) Subject: Separate out the parsing code into a separately defined module. *NOT WORKING* X-Git-Url: http://git.annexia.org/?a=commitdiff_plain;h=c15c1624692d506eefe5f2cb2d775a6fb9127589;p=virt-mem.git Separate out the parsing code into a separately defined module. *NOT WORKING* --- diff --git a/MANIFEST b/MANIFEST index b7877ed..fab3591 100644 --- a/MANIFEST +++ b/MANIFEST @@ -6,6 +6,8 @@ dmesg/Makefile.in dmesg/virt_dmesg.ml extract/codegen/.depend extract/codegen/kerneldb_to_parser.ml +extract/codeget/pahole_parser.ml +extract/codeget/pahole_parser.mli extract/codegen/Makefile.in extract/fedora-koji/.depend extract/fedora-koji/fedora_koji_download_kernels.ml diff --git a/extract/codegen/.depend b/extract/codegen/.depend index e69de29..bbed776 100644 --- a/extract/codegen/.depend +++ b/extract/codegen/.depend @@ -0,0 +1,4 @@ +kerneldb_to_parser.cmo: pahole_parser.cmi +kerneldb_to_parser.cmx: pahole_parser.cmx +pahole_parser.cmo: pahole_parser.cmi +pahole_parser.cmx: pahole_parser.cmi diff --git a/extract/codegen/Makefile.in b/extract/codegen/Makefile.in index eeb8774..8054b55 100644 --- a/extract/codegen/Makefile.in +++ b/extract/codegen/Makefile.in @@ -35,7 +35,7 @@ OCAMLOPTLIBS = -linkpkg camlp4lib.cmxa TARGETS = kerneldb-to-parser.opt -OBJS = kerneldb_to_parser.cmo +OBJS = pahole_parser.cmo kerneldb_to_parser.cmo XOBJS = $(OBJS:.cmo=.cmx) all: $(TARGETS) diff --git a/extract/codegen/kerneldb_to_parser.ml b/extract/codegen/kerneldb_to_parser.ml index e4c7519..177d607 100644 --- a/extract/codegen/kerneldb_to_parser.ml +++ b/extract/codegen/kerneldb_to_parser.ml @@ -131,7 +131,7 @@ let structs = [ }; ] -let debug = false +let debug = true open Camlp4.PreCast open Syntax @@ -141,6 +141,8 @@ open ExtList open ExtString open Printf +module PP = Pahole_parser + let (//) = Filename.concat (* Couple of handy camlp4 construction functions which do some @@ -211,366 +213,83 @@ Example (from toplevel of virt-mem source tree): " arg0 arg0 arg0; exit 2 in - (* Get the *.info files from the kernels database. *) - let infos = Sys.readdir kernelsdir in - let infos = Array.to_list infos in - let infos = List.filter (fun name -> String.ends_with name ".info") infos in - let infos = List.map ( (//) kernelsdir) infos in - - (* Regular expressions. We really really should use ocaml-mikmatch ... *) - let re_oldformat = Pcre.regexp "^RPM: \\d+: \\(build \\d+\\) ([-\\w]+) ([\\w.]+) ([\\w.]+) \\(.*?\\) (\\w+)" in - let re_keyvalue = Pcre.regexp "^(\\w+): (.*)" in + let kernels = PP.list_kernels kernelsdir in + let nr_kernels = List.length kernels in - (* Parse in the *.info files. These have historically had a few different - * formats that we need to support. - *) - let infos = List.map ( - fun filename -> - (* Get the basename (for getting the .data file later on). *) - let basename = Filename.chop_suffix filename ".info" in - - let chan = open_in filename in - let line = input_line chan in - - (* Kernel version string. *) - let version, arch = - if Pcre.pmatch ~rex:re_oldformat line then ( - (* If the file starts with "RPM: \d+: ..." then it's the - * original Fedora format. Everything in one line. - *) - let subs = Pcre.exec ~rex:re_oldformat line in - (* let name = Pcre.get_substring subs 1 in *) - let version = Pcre.get_substring subs 2 in - let release = Pcre.get_substring subs 3 in - let arch = Pcre.get_substring subs 4 in - close_in chan; - (* XXX Map name -> PAE, hugemem etc. *) - (* name, *) sprintf "%s-%s.%s" version release arch, arch - ) else ( - (* New-style "key: value" entries, up to end of file or the first - * blank line. - *) - let (*name,*) version, release, arch = - (*ref "",*) ref "", ref "", ref "" in - let rec loop line = - try - let subs = Pcre.exec ~rex:re_keyvalue line in - let key = Pcre.get_substring subs 1 in - let value = Pcre.get_substring subs 2 in - (*if key = "Name" then name := value - else*) if key = "Version" then version := value - else if key = "Release" then release := value - else if key = "Architecture" then arch := value; - let line = input_line chan in - loop line - with - Not_found | End_of_file -> - close_in chan - in - loop line; - let (*name,*) version, release, arch = - (*!name,*) !version, !release, !arch in - if (*name = "" ||*) version = "" || release = "" || arch = "" then - failwith (sprintf "%s: missing Name, Version, Release or Architecture key" filename); - (* XXX Map name -> PAE, hugemem etc. *) - (* name, *) sprintf "%s-%s.%s" version release arch, arch - ) in - - (*printf "%s -> %s %s\n%!" basename version arch;*) - - (basename, version, arch) - ) infos in - - let nr_kernels = List.length infos in - - (* For quick access to the opener strings, build a hash. *) - let openers = Hashtbl.create 13 in - List.iter ( - fun (name, { opener = opener; closer = closer }) -> - Hashtbl.add openers opener (closer, name) - ) structs; - - (* Now read the data files and parse out the structures of interest. *) let kernels = List.mapi ( - fun i (basename, version, arch) -> + fun i info -> printf "Loading kernel data file %d/%d\r%!" (i+1) nr_kernels; - let file_exists name = - try Unix.access name [Unix.F_OK]; true - with Unix.Unix_error _ -> false - in - let close_process_in cmd chan = - match Unix.close_process_in chan with - | Unix.WEXITED 0 -> () - | Unix.WEXITED i -> - eprintf "%s: command exited with code %d\n" cmd i; exit i - | Unix.WSIGNALED i -> - eprintf "%s: command exited with signal %d\n" cmd i; exit 1 - | Unix.WSTOPPED i -> - eprintf "%s: command stopped by signal %d\n" cmd i; exit 1 - in - - (* Open the data file, uncompressing it on the fly if necessary. *) - let chan, close = - if file_exists (basename ^ ".data") then - open_in (basename ^ ".data"), close_in - else if file_exists (basename ^ ".data.gz") then ( - let cmd = - sprintf "gzip -cd %s" (Filename.quote (basename ^ ".data.gz")) in - Unix.open_process_in cmd, close_process_in cmd - ) - else if file_exists (basename ^ ".data.bz2") then ( - let cmd = - sprintf "bzip2 -cd %s" (Filename.quote (basename ^ ".data.bz2")) in - Unix.open_process_in cmd, close_process_in cmd - ) else - failwith - (sprintf "%s: cannot find corresponding data file" basename) in - - (* Read the data file in, looking for structures of interest to us. *) - let bodies = Hashtbl.create 13 in - let rec loop () = - let line = input_line chan in - - (* If the line is an opener for one of the structures we - * are looking for, then for now just save all the text until - * we get to the closer line. - *) - (try - let closer, name = Hashtbl.find openers line in - let rec loop2 lines = - let line = input_line chan in - let lines = line :: lines in - if String.starts_with line closer then List.rev lines - else loop2 lines - in - - let body = - try loop2 [line] - with End_of_file -> - failwith (sprintf "%s: %s: %S not matched by closing %S" basename name line closer) in - - Hashtbl.replace bodies name body - with Not_found -> ()); - - loop () - in - (try loop () with End_of_file -> ()); - - close chan; + let struct_names = List.map fst structs in + let structures = PP.load_structures info struct_names in - (* Make sure we got all the mandatory structures. *) + (* Make sure we got all the mandatory structures & fields. *) List.iter ( - fun (name, { mandatory_struct = mandatory }) -> - if mandatory && not (Hashtbl.mem bodies name) then - failwith (sprintf "%s: structure %s not found in this kernel" basename name) - ) structs; - - (basename, version, arch, bodies) - ) infos in - - (* Now parse each structure body. - * XXX This would be better as a proper lex/yacc parser. - * XXX Even better would be to have a proper interface to libdwarves. - *) - let re_offsetsize = Pcre.regexp "/\\*\\s+(\\d+)\\s+(\\d+)\\s+\\*/" in - let re_intfield = Pcre.regexp "(?:int|char)\\s+(\\w+);" in - let re_ptrfield = Pcre.regexp "struct\\s+(\\w+)\\s*\\*\\s*(\\w+);" in - let re_voidptrfield = Pcre.regexp "void\\s*\\*\\s*(\\w+);" in - let re_strfield = Pcre.regexp "char\\s+(\\w+)\\[(\\d+)\\];" in - let re_structopener = Pcre.regexp "(struct|union)\\s+.*{$" in - let re_structcloser = Pcre.regexp "}\\s*(\\w+)?(\\[\\d+\\])?;" in - - (* 'basename' is the source file, and second parameter ('body') is - * the list of text lines which covers this structure (minus the - * opener line). Result is the list of parsed fields from this - * structure. - *) - let rec parse basename = function - | [] -> assert false - | [_] -> [] (* Just the closer line, finished. *) - | line :: lines when Pcre.pmatch ~rex:re_structopener line -> - (* Recursively parse a sub-structure. First search for the - * corresponding closer line. - *) - let rec loop depth acc = function - | [] -> - eprintf "%s: %S has no matching close structure line\n%!" - basename line; - assert false - | line :: lines when Pcre.pmatch ~rex:re_structopener line -> - loop (depth+1) (line :: acc) lines - | line :: lines - when depth = 0 && Pcre.pmatch ~rex:re_structcloser line -> - (line :: acc), lines - | line :: lines - when depth > 0 && Pcre.pmatch ~rex:re_structcloser line -> - loop (depth-1) (line :: acc) lines - | line :: lines -> loop depth (line :: acc) lines - in - let nested_body, rest = loop 0 [] lines in - - (* Then parse the sub-structure. *) - let struct_name, nested_body = - match nested_body with - | [] -> assert false - | closer :: _ -> - let subs = Pcre.exec ~rex:re_structcloser closer in - let struct_name = - try Some (Pcre.get_substring subs 1) with Not_found -> None in - struct_name, List.rev nested_body in - let nested_fields = parse basename nested_body in - - (* Prefix the sub-fields with the name of the structure. *) - let nested_fields = - match struct_name with - | None -> nested_fields - | Some prefix -> - List.map ( - fun (name, details) -> (prefix ^ "'" ^ name, details) - ) nested_fields in - - (* Parse the rest. *) - nested_fields @ parse basename rest - - | line :: lines when Pcre.pmatch ~rex:re_intfield line -> - (* An int field. *) - let subs = Pcre.exec ~rex:re_intfield line in - let name = Pcre.get_substring subs 1 in - (try - let subs = Pcre.exec ~rex:re_offsetsize line in - let offset = int_of_string (Pcre.get_substring subs 1) in - let size = int_of_string (Pcre.get_substring subs 2) in - (name, (`Int, offset, size)) :: parse basename lines - with - Not_found -> parse basename lines - ); - - | line :: lines when Pcre.pmatch ~rex:re_ptrfield line -> - (* A pointer-to-struct field. *) - let subs = Pcre.exec ~rex:re_ptrfield line in - let struct_name = Pcre.get_substring subs 1 in - let name = Pcre.get_substring subs 2 in - (try - let subs = Pcre.exec ~rex:re_offsetsize line in - let offset = int_of_string (Pcre.get_substring subs 1) in - let size = int_of_string (Pcre.get_substring subs 2) in - (name, (`Ptr struct_name, offset, size)) - :: parse basename lines - with - Not_found -> parse basename lines - ); - - | line :: lines when Pcre.pmatch ~rex:re_voidptrfield line -> - (* A void* field. *) - let subs = Pcre.exec ~rex:re_voidptrfield line in - let name = Pcre.get_substring subs 1 in - (try - let subs = Pcre.exec ~rex:re_offsetsize line in - let offset = int_of_string (Pcre.get_substring subs 1) in - let size = int_of_string (Pcre.get_substring subs 2) in - (name, (`VoidPtr, offset, size)) - :: parse basename lines - with - Not_found -> parse basename lines - ); - - | line :: lines when Pcre.pmatch ~rex:re_strfield line -> - (* A string (char array) field. *) - let subs = Pcre.exec ~rex:re_strfield line in - let name = Pcre.get_substring subs 1 in - let width = int_of_string (Pcre.get_substring subs 2) in - (try - let subs = Pcre.exec ~rex:re_offsetsize line in - let offset = int_of_string (Pcre.get_substring subs 1) in - let size = int_of_string (Pcre.get_substring subs 2) in - (name, (`Str width, offset, size)) - :: parse basename lines - with - Not_found -> parse basename lines - ); - - | _ :: lines -> - (* Just ignore any other field we can't parse. *) - parse basename lines + fun (struct_name, + { mandatory_struct = mandatory; fields = wanted_fields }) -> + try + let s = + List.find (fun s -> struct_name = s.PP.struct_name) + structures in + + (* Check we have all the mandatory fields. *) + let all_fields = s.PP.struct_fields in + List.iter ( + fun (wanted_field, { mandatory_field = mandatory }) -> + let got_it = + List.exists ( + fun { PP.field_name = name } -> name = wanted_field + ) all_fields in + if mandatory && not got_it then ( + eprintf "%s: structure %s is missing required field %s\n" + info.PP.basename struct_name wanted_field; + eprintf "fields found in this structure:\n"; + List.iter ( + fun { PP.field_name = name } -> eprintf "\t%s\n" name + ) all_fields; + exit 1 + ); + ) wanted_fields - in + with Not_found -> + if mandatory then + failwith (sprintf "%s: structure %s not found in this kernel" + info.PP.basename struct_name) + ) structs; - let kernels = List.map ( - fun (basename, version, arch, bodies) -> - let structures = List.filter_map ( - fun (struct_name, { fields = wanted_fields }) -> - let body = - try Some (Hashtbl.find bodies struct_name) - with Not_found -> None in - match body with - | None -> None - | Some body -> - let body = List.tl body in (* Don't care about opener line. *) - let fields = parse basename body in - - (* Compute total size of the structure. *) - let total_size = - let fields = List.map ( - fun (_, (_, offset, size)) -> offset + size - ) fields in - List.fold_left max 0 fields in - - (* That got us all the fields, but we only care about - * the wanted_fields. - *) - let all_fields = fields in - let fields = List.filter ( - fun (name, _) -> List.mem_assoc name wanted_fields + let structures = + List.map ( + fun ({ PP.struct_name = struct_name; PP.struct_fields = fields } + as structure) -> + let { fields = wanted_fields } = List.assoc struct_name structs in + + (* That got us all the fields, but we only care about + * the wanted_fields. + *) + let fields = List.filter ( + fun { PP.field_name = name } -> List.mem_assoc name wanted_fields + ) fields in + + (* Prefix all the field names with the structure name. *) + let fields = + List.map ( + fun ({ PP.field_name = name } as field) -> + let name = struct_name ^ "_" ^ name in + { field with PP.field_name = name } ) fields in + { structure with PP.struct_fields = fields } + ) structures in - (* Also check we have all the mandatory fields. *) - List.iter ( - fun (wanted_field, { mandatory_field = mandatory }) -> - if mandatory && not (List.mem_assoc wanted_field fields) - then ( - eprintf "%s: structure %s is missing required field %s\n" basename struct_name wanted_field; - eprintf "fields found in this structure:\n"; - List.iter ( - fun (name, _) -> eprintf "\t%s\n" name - ) all_fields; - exit 1 - ); - ) wanted_fields; - - (* Prefix all the field names with the structure name. *) - let fields = - List.map (fun (name, details) -> - struct_name ^ "_" ^ name, details) fields in - - Some (struct_name, (fields, total_size)) - ) structs in - - (basename, version, arch, structures) + (info, structures) ) kernels in if debug then List.iter ( - fun (basename, version, arch, structures) -> - printf "%s (version: %s, arch: %s):\n" basename version arch; + fun (info, structures) -> + printf "%s ----------\n" (PP.string_of_info info); List.iter ( - fun (struct_name, (fields, total_size)) -> - printf " struct %s {\n" struct_name; - List.iter ( - fun (field_name, (typ, offset, size)) -> - (match typ with - | `Int -> - printf " int %s; " field_name - | `Ptr struct_name -> - printf " struct %s *%s; " struct_name field_name - | `VoidPtr -> - printf " void *%s; " field_name - | `Str width -> - printf " char %s[%d]; " field_name width - ); - printf " /* offset = %d, size = %d */\n" offset size - ) fields; - printf " } /* %d bytes */\n\n" total_size; + fun structure -> + printf "%s\n\n" (PP.string_of_structure structure); ) structures; ) kernels; @@ -580,7 +299,9 @@ Example (from toplevel of virt-mem source tree): let () = let _loc = Loc.ghost in - let versions = List.map (fun (_, version, _, _) -> version) kernels in + let versions = List.map ( + fun ({ PP.kernel_version = version }, _) -> version + ) kernels in (* Sort them in reverse because we are going to generate the * final list in reverse. @@ -607,12 +328,18 @@ Example (from toplevel of virt-mem source tree): *) let files = List.map ( - fun (name, _) -> + fun (struct_name, _) -> let kernels = List.filter_map ( - fun (basename, version, arch, structures) -> - try Some (basename, version, arch, List.assoc name structures) - with Not_found -> None + fun (info, structures) -> + try + let structure = + List.find ( + fun { PP.struct_name = name } -> name = struct_name + ) structures in + Some (info, structure) + with Not_found -> + None ) kernels in (* Sort the kernels, which makes the generated output more stable @@ -620,11 +347,12 @@ Example (from toplevel of virt-mem source tree): *) let kernels = List.sort kernels in - name, kernels + struct_name, kernels ) structs in let kernels = () in ignore kernels; (* garbage collect *) +(* (* Get just the field types. * * It's plausible that a field with the same name has a different @@ -1063,3 +791,4 @@ Example (from toplevel of virt-mem source tree): Unix.unlink new_output_file ) files +*) diff --git a/extract/codegen/pahole_parser.ml b/extract/codegen/pahole_parser.ml new file mode 100644 index 0000000..1798b44 --- /dev/null +++ b/extract/codegen/pahole_parser.ml @@ -0,0 +1,390 @@ +(* Memory info command for virtual domains. + (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. + http://libvirt.org/ + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + *) + +open ExtList +open ExtString + +open Printf + +let (//) = Filename.concat + +type pathname = string + +type info = { + kernel_version : string; + arch : string; + basename : string; +} + +type structure = { + struct_name : string; + struct_total_size : int; + struct_fields : field list; +} + +and field = { + field_name : string; + field_type : f_type; + field_offset : int; + field_size : int; +} + +and f_type = + | FStructPointer of string + | FVoidPointer + | FListHeadPointer + | FInteger + | FString of int + +let string_of_info i = + sprintf "%s: %s %s" i.basename i.kernel_version i.arch + +let rec string_of_structure s = + let fields = List.map string_of_field s.struct_fields in + let fields = String.concat "\n " fields in + sprintf "struct %s {\n %s\n}; /* total size = %d bytes */" + s.struct_name fields s.struct_total_size + +and string_of_field f = + sprintf "%s %s; /* offset = %d, size = %d */" + f.field_name (string_of_f_type f.field_type) + f.field_offset f.field_size + +and string_of_f_type = function + | FStructPointer struct_name -> sprintf "struct %s*" struct_name + | FVoidPointer -> "void *" + | FListHeadPointer -> "struct list_head *" + | FInteger -> "int" + | FString width -> sprintf "char[%d]" width + +(* Regular expressions. We really really should use ocaml-mikmatch ... *) +let re_oldformat = Pcre.regexp "^RPM: \\d+: \\(build \\d+\\) ([-\\w]+) ([\\w.]+) ([\\w.]+) \\(.*?\\) (\\w+)" +let re_keyvalue = Pcre.regexp "^(\\w+): (.*)" + +let list_kernels path = + (* Get the *.info files from the kernels database. *) + let infos = Sys.readdir path in + let infos = Array.to_list infos in + let infos = List.filter (fun name -> String.ends_with name ".info") infos in + let infos = List.map ( (//) path) infos in + + (* Parse in the *.info files. These have historically had a few different + * formats that we need to support. + *) + let infos = List.map ( + fun filename -> + (* Get the basename (for getting the .data file later on). *) + let basename = Filename.chop_suffix filename ".info" in + + let chan = open_in filename in + let line = input_line chan in + + (* Kernel version string. *) + let version, arch = + if Pcre.pmatch ~rex:re_oldformat line then ( + (* If the file starts with "RPM: \d+: ..." then it's the + * original Fedora format. Everything in one line. + *) + let subs = Pcre.exec ~rex:re_oldformat line in + (* let name = Pcre.get_substring subs 1 in *) + let version = Pcre.get_substring subs 2 in + let release = Pcre.get_substring subs 3 in + let arch = Pcre.get_substring subs 4 in + close_in chan; + (* XXX Map name -> PAE, hugemem etc. *) + (* name, *) sprintf "%s-%s.%s" version release arch, arch + ) else ( + (* New-style "key: value" entries, up to end of file or the first + * blank line. + *) + let (*name,*) version, release, arch = + (*ref "",*) ref "", ref "", ref "" in + let rec loop line = + try + let subs = Pcre.exec ~rex:re_keyvalue line in + let key = Pcre.get_substring subs 1 in + let value = Pcre.get_substring subs 2 in + (*if key = "Name" then name := value + else*) if key = "Version" then version := value + else if key = "Release" then release := value + else if key = "Architecture" then arch := value; + let line = input_line chan in + loop line + with + Not_found | End_of_file -> + close_in chan + in + loop line; + let (*name,*) version, release, arch = + (*!name,*) !version, !release, !arch in + if (*name = "" ||*) version = "" || release = "" || arch = "" then + failwith (sprintf "%s: missing Name, Version, Release or Architecture key" filename); + (* XXX Map name -> PAE, hugemem etc. *) + (* name, *) sprintf "%s-%s.%s" version release arch, arch + ) in + + (*printf "%s -> %s %s\n%!" basename version arch;*) + + { basename = basename; arch = arch; + kernel_version = version } + ) infos in + infos + +(* XXX This would be better as a proper lex/yacc parser. + * XXX Even better would be to have a proper interface to libdwarves. + *) +let re_offsetsize = Pcre.regexp "/\\*\\s+(\\d+)\\s+(\\d+)\\s+\\*/" +let re_intfield = Pcre.regexp "(?:int|char)\\s+(\\w+);" +let re_ptrfield = Pcre.regexp "struct\\s+(\\w+)\\s*\\*\\s*(\\w+);" +let re_voidptrfield = Pcre.regexp "void\\s*\\*\\s*(\\w+);" +let re_strfield = Pcre.regexp "char\\s+(\\w+)\\[(\\d+)\\];" +let re_structopener = Pcre.regexp "(struct|union)\\s+.*{$" +let re_structcloser = Pcre.regexp "}\\s*(\\w+)?(\\[\\d+\\])?;" + +let load_structures { basename = basename } struct_names = + (* For quick access to the opener strings, build a hash. *) + let openers = Hashtbl.create 13 in + List.iter ( + fun struct_name -> + let opener = sprintf "struct %s {" struct_name in + let closer = "};" in + Hashtbl.add openers opener (closer, struct_name) + ) struct_names; + + (* Now read the data file and parse out the structures of interest. *) + let file_exists name = + try Unix.access name [Unix.F_OK]; true + with Unix.Unix_error _ -> false + in + let close_process_in cmd chan = + match Unix.close_process_in chan with + | Unix.WEXITED 0 -> () + | Unix.WEXITED i -> + eprintf "%s: command exited with code %d\n" cmd i; exit i + | Unix.WSIGNALED i -> + eprintf "%s: command exited with signal %d\n" cmd i; exit 1 + | Unix.WSTOPPED i -> + eprintf "%s: command stopped by signal %d\n" cmd i; exit 1 + in + + (* Open the data file, uncompressing it on the fly if necessary. *) + let chan, close = + if file_exists (basename ^ ".data") then + open_in (basename ^ ".data"), close_in + else if file_exists (basename ^ ".data.gz") then ( + let cmd = + sprintf "gzip -cd %s" (Filename.quote (basename ^ ".data.gz")) in + Unix.open_process_in cmd, close_process_in cmd + ) + else if file_exists (basename ^ ".data.bz2") then ( + let cmd = + sprintf "bzip2 -cd %s" (Filename.quote (basename ^ ".data.bz2")) in + Unix.open_process_in cmd, close_process_in cmd + ) else + failwith (sprintf "%s: cannot find corresponding data file" basename) in + + (* Read the data file in, looking for structures of interest to us. *) + let bodies = Hashtbl.create 13 in + let rec loop () = + let line = input_line chan in + + (* If the line is an opener for one of the structures we + * are looking for, then for now just save all the text until + * we get to the closer line. + *) + (try + let closer, struct_name = Hashtbl.find openers line in + let rec loop2 lines = + let line = input_line chan in + let lines = line :: lines in + if String.starts_with line closer then List.rev lines + else loop2 lines + in + + let body = + try loop2 [line] + with End_of_file -> + failwith (sprintf "%s: %s: %S not matched by closing %S" basename struct_name line closer) in + + Hashtbl.replace bodies struct_name body + with Not_found -> ()); + + loop () + in + (try loop () with End_of_file -> ()); + + close chan; + + (* Now parse each structure body. *) + + (* 'basename' is the source file, and second parameter ('body') is + * the list of text lines which covers this structure (minus the + * opener line). Result is the list of parsed fields from this + * structure. + *) + let rec parse basename = function + | [] -> assert false + | [_] -> [] (* Just the closer line, finished. *) + | line :: lines when Pcre.pmatch ~rex:re_structopener line -> + (* Recursively parse a sub-structure. First search for the + * corresponding closer line. + *) + let rec loop depth acc = function + | [] -> + eprintf "%s: %S has no matching close structure line\n%!" + basename line; + assert false + | line :: lines when Pcre.pmatch ~rex:re_structopener line -> + loop (depth+1) (line :: acc) lines + | line :: lines + when depth = 0 && Pcre.pmatch ~rex:re_structcloser line -> + (line :: acc), lines + | line :: lines + when depth > 0 && Pcre.pmatch ~rex:re_structcloser line -> + loop (depth-1) (line :: acc) lines + | line :: lines -> loop depth (line :: acc) lines + in + let nested_body, rest = loop 0 [] lines in + + (* Then parse the sub-structure. *) + let struct_name, nested_body = + match nested_body with + | [] -> assert false + | closer :: _ -> + let subs = Pcre.exec ~rex:re_structcloser closer in + let struct_name = + try Some (Pcre.get_substring subs 1) with Not_found -> None in + struct_name, List.rev nested_body in + let nested_fields = parse basename nested_body in + + (* Prefix the sub-fields with the name of the structure. *) + let nested_fields = + match struct_name with + | None -> nested_fields + | Some prefix -> + List.map ( + fun ({ field_name = name } as field) -> + let name = prefix ^ "'" ^ name in + { field with field_name = name } + ) nested_fields in + + (* Parse the rest. *) + nested_fields @ parse basename rest + + | line :: lines when Pcre.pmatch ~rex:re_intfield line -> + (* An int field. *) + let subs = Pcre.exec ~rex:re_intfield line in + let name = Pcre.get_substring subs 1 in + (try + let subs = Pcre.exec ~rex:re_offsetsize line in + let offset = int_of_string (Pcre.get_substring subs 1) in + let size = int_of_string (Pcre.get_substring subs 2) in + let field = + { field_name = name; field_type = FInteger; + field_offset = offset; field_size = size } in + field :: parse basename lines + with + Not_found -> parse basename lines + ); + + | line :: lines when Pcre.pmatch ~rex:re_ptrfield line -> + (* A pointer-to-struct field. *) + let subs = Pcre.exec ~rex:re_ptrfield line in + let struct_name = Pcre.get_substring subs 1 in + let name = Pcre.get_substring subs 2 in + (try + let subs = Pcre.exec ~rex:re_offsetsize line in + let offset = int_of_string (Pcre.get_substring subs 1) in + let size = int_of_string (Pcre.get_substring subs 2) in + let field_type = + if struct_name <> "list_head" then + FStructPointer struct_name + else + FListHeadPointer in + let field = + { field_name = name; field_type = field_type; + field_offset = offset; field_size = size } in + field :: parse basename lines + with + Not_found -> parse basename lines + ); + + | line :: lines when Pcre.pmatch ~rex:re_voidptrfield line -> + (* A void* field. *) + let subs = Pcre.exec ~rex:re_voidptrfield line in + let name = Pcre.get_substring subs 1 in + (try + let subs = Pcre.exec ~rex:re_offsetsize line in + let offset = int_of_string (Pcre.get_substring subs 1) in + let size = int_of_string (Pcre.get_substring subs 2) in + let field = + { field_name = name; field_type = FVoidPointer; + field_offset = offset; field_size = size } in + field :: parse basename lines + with + Not_found -> parse basename lines + ); + + | line :: lines when Pcre.pmatch ~rex:re_strfield line -> + (* A string (char array) field. *) + let subs = Pcre.exec ~rex:re_strfield line in + let name = Pcre.get_substring subs 1 in + let width = int_of_string (Pcre.get_substring subs 2) in + (try + let subs = Pcre.exec ~rex:re_offsetsize line in + let offset = int_of_string (Pcre.get_substring subs 1) in + let size = int_of_string (Pcre.get_substring subs 2) in + let field = + { field_name = name; field_type = FString width; + field_offset = offset; field_size = size } in + field :: parse basename lines + with + Not_found -> parse basename lines + ); + + | _ :: lines -> + (* Just ignore any other field we can't parse. *) + parse basename lines + in + + let structures = List.filter_map ( + fun struct_name -> + let body = + try Some (Hashtbl.find bodies struct_name) + with Not_found -> None in + match body with + | None -> None + | Some body -> + let body = List.tl body in (* Don't care about opener line. *) + let fields = parse basename body in + + (* Compute total size of the structure. *) + let total_size = + let fields = List.map ( + fun { field_offset = offset; + field_size = size } -> offset + size + ) fields in + List.fold_left max 0 fields in + + Some { struct_name = struct_name; + struct_fields = fields; + struct_total_size = total_size } + ) struct_names in + + structures diff --git a/extract/codegen/pahole_parser.mli b/extract/codegen/pahole_parser.mli new file mode 100644 index 0000000..f34b7e3 --- /dev/null +++ b/extract/codegen/pahole_parser.mli @@ -0,0 +1,91 @@ +(** 'pahole' output parser. *) +(* Memory info command for virtual domains. + (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. + http://libvirt.org/ + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + *) + +(** This parses the output of the pahole command, allowing us + to extract the layout of kernel structures for particular + kernel versions. + + Its primary input is the [*.info] and [*.data*] files found + in the [kernels/] subdirectory (ie. the kerneldb). +*) + +(** {2 Types} *) + +type pathname = string + (** Path and filenames. *) + +type info = { + kernel_version : string; (** Kernel version that this matches. *) + arch : string; (** Architecture, eg. "i686", "ppc64". *) + basename : string; (** [basename.info] is the info + file and [basename.data*] is + the data file. *) +} + (** Kernel metainformation, extracted from the [*.info] file. *) + +type structure = { + struct_name : string; (** Structure name. *) + struct_total_size : int; (** Total size in bytes. *) + struct_fields : field list; (** Fields in the structure. *) +} + (** A kernel structure, eg. [task_struct]. *) + +and field = { + field_name : string; (** Field name. *) + field_type : f_type; (** Field type. *) + field_offset : int; (** Offset within the structure. *) + field_size : int; (** Size of the field (bytes). *) +} + (** A kernel structure field. + + Note that nested fields are flattened with single quotes (') + between elements, so you get names like [tasks'next]. *) + +and f_type = + | FStructPointer of string (** A pointer to a named struct. *) + | FVoidPointer (** A [void*] pointer. *) + | FListHeadPointer (** A pointer to a [list_head]. *) + | FInteger (** An integer. *) + | FString of int (** A char array of given width. *) + (** Type of a kernel field. *) + +val string_of_info : info -> string +val string_of_structure : structure -> string +val string_of_field : field -> string +val string_of_f_type : f_type -> string + (** Printing functions. *) + +(** {2 List kernels in kerneldb} *) + +val list_kernels : pathname -> info list + (** Return a list of all the kernels in the kerneldb at [path]. *) + +(** {2 Load kernel structures} *) + +val load_structures : info -> string list -> structure list + (** [load_structures info names] loads the named kernel structures + from a particular kernel. + + The returned list is not necessarily in the same order, or the + same length, as the [names] list. Check the + {!structure.struct_name} field for the structure name. + Structures which don't actually occur in the given kernel are + not loaded and not present in the final list. + *)