From: Richard W.M. Jones <"Richard W.M. Jones "> Date: Thu, 14 Aug 2008 15:02:21 +0000 (+0100) Subject: New kernel database parser *NOT WORKING YET*. X-Git-Url: http://git.annexia.org/?a=commitdiff_plain;h=159e16043bbadc056b6ae193ddf3d31be2810e39;p=virt-mem.git New kernel database parser *NOT WORKING YET*. --- diff --git a/Makefile.in b/Makefile.in index fa1ad9d..1b6721f 100644 --- a/Makefile.in +++ b/Makefile.in @@ -94,7 +94,7 @@ update-kerneldb: # Rebuild the generated kernel struct parsers from the kerneldb. update-kernel-structs: - extract/codegen/kerneldb-to-parser.opt kernels lib + extract/codegen/compile-kerneldb.opt kernels lib # Developer documentation (in html/ subdirectory). diff --git a/extract/codegen/.depend b/extract/codegen/.depend index bbed776..af6233e 100644 --- a/extract/codegen/.depend +++ b/extract/codegen/.depend @@ -1,4 +1,9 @@ +struct_classify.cmi: pahole_parser.cmi +compile_kerneldb.cmo: struct_classify.cmi pahole_parser.cmi +compile_kerneldb.cmx: struct_classify.cmx pahole_parser.cmx kerneldb_to_parser.cmo: pahole_parser.cmi kerneldb_to_parser.cmx: pahole_parser.cmx pahole_parser.cmo: pahole_parser.cmi pahole_parser.cmx: pahole_parser.cmi +struct_classify.cmo: pahole_parser.cmi struct_classify.cmi +struct_classify.cmx: pahole_parser.cmx struct_classify.cmi diff --git a/extract/codegen/Makefile.in b/extract/codegen/Makefile.in index 8054b55..fb48f3f 100644 --- a/extract/codegen/Makefile.in +++ b/extract/codegen/Makefile.in @@ -33,14 +33,16 @@ OCAMLOPTFLAGS = @OCAMLOPTFLAGS@ OCAMLOPTPACKAGES = $(OCAMLCPACKAGES) OCAMLOPTLIBS = -linkpkg camlp4lib.cmxa -TARGETS = kerneldb-to-parser.opt +TARGETS = compile-kerneldb.opt -OBJS = pahole_parser.cmo kerneldb_to_parser.cmo +OBJS = pahole_parser.cmo \ + struct_classify.cmo \ + compile_kerneldb.cmo XOBJS = $(OBJS:.cmo=.cmx) all: $(TARGETS) -kerneldb-to-parser.opt: $(XOBJS) +compile-kerneldb.opt: $(XOBJS) ocamlfind ocamlopt \ $(OCAMLOPTFLAGS) $(OCAMLOPTPACKAGES) $(OCAMLOPTLIBS) $(XOBJS) -o $@ diff --git a/extract/codegen/compile_kerneldb.ml b/extract/codegen/compile_kerneldb.ml new file mode 100644 index 0000000..27fcb14 --- /dev/null +++ b/extract/codegen/compile_kerneldb.ml @@ -0,0 +1,382 @@ +(* Memory info for virtual domains. + (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. + http://libvirt.org/ + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*) + +(* This program takes the kernel database (in kernels/ in toplevel + directory) and generates parsing code for the various structures + in the kernel that we are interested in. + + The output programs -- *.ml, *.mli files of generated code -- go + into lib/ at the toplevel, eg. lib/virt_mem_kernels.ml + + The stuff at the top of this file determine what structures + we try to parse. +*) + +type struct_t = { + good_fields : string list; + field_metadata : (string * field_metadata_t) list; +} +and field_metadata_t = + | VoidPointerIsReally of string + | ListHeadIsReally of string + +(*---------------------------------------------------------------------- + * This controls what structures & fields we will parse out. + *----------------------------------------------------------------------*) +let good_structs = [ + "task_struct", { + good_fields = [ "tasks'next"; "tasks'prev"; + "run_list'next"; "run_list'prev"; + "state"; "prio"; "static_prio"; "normal_prio"; + "comm"; "pid" ]; + field_metadata = [ + "tasks'next", ListHeadIsReally "task_struct"; + "tasks'prev", ListHeadIsReally "task_struct"; + "run_list'next", ListHeadIsReally "task_struct"; + "run_list'prev", ListHeadIsReally "task_struct"; + ]; + }; + "net_device", { + good_fields = [ "dev_list'prev"; "dev_list'next"; "next"; + "ip_ptr"; "ip6_ptr"; + "name"; "flags"; "operstate"; "mtu"; "perm_addr"; + "addr_len" ]; + field_metadata = [ + "dev_list'next", ListHeadIsReally "net_device"; + "dev_list'prev", ListHeadIsReally "net_device"; + "ip_ptr", VoidPointerIsReally "in_device"; + "ip6_ptr", VoidPointerIsReally "inet6_dev"; + ]; + }; + "net", { + good_fields = [ "dev_base_head'next"; "dev_base_head'prev" ]; + field_metadata = [ + "dev_base_head'next", ListHeadIsReally "net_device"; + "dev_base_head'prev", ListHeadIsReally "net_device"; + ]; + }; + "in_device", { + good_fields = [ "ifa_list" ]; + field_metadata = []; + }; + "inet6_dev", { + good_fields = [ "addr_list" ]; + field_metadata = []; + }; + "in_ifaddr", { + good_fields = [ "ifa_next"; "ifa_local"; "ifa_address"; + "ifa_mask"; "ifa_broadcast" ]; + field_metadata = []; + }; + "inet6_ifaddr", { + good_fields = [ "prefix_len"; "lst_next" ]; + field_metadata = []; + }; +] + +let debug = true + +open Camlp4.PreCast +open Syntax +(*open Ast*) + +open ExtList +open ExtString +open Printf + +module PP = Pahole_parser +module SC = Struct_classify + +let (//) = Filename.concat + +(* Couple of handy camlp4 construction functions which do some + * things that ought to be easy/obvious but aren't. + * + * 'concat_str_items' concatenates a list of str_item together into + * one big str_item. + * + * 'concat_record_fields' concatenates a list of records fields into + * a record. The list must have at least one element. + * + * 'build_record' builds a record out of record fields. + * + * 'build_tuple_from_exprs' builds an arbitrary length tuple from + * a list of expressions of length >= 2. + * + * Thanks to bluestorm on #ocaml for getting these working. + *) +let concat_str_items _loc items = + match items with + | [] -> <:str_item< >> + | x :: xs -> + List.fold_left (fun xs x -> <:str_item< $xs$ $x$ >>) x xs + +let concat_sig_items _loc items = + match items with + | [] -> <:sig_item< >> + | x :: xs -> + List.fold_left (fun xs x -> <:sig_item< $xs$ $x$ >>) x xs + +let concat_record_fields _loc fields = + match fields with + | [] -> assert false + | f :: fs -> + List.fold_left (fun fs f -> <:ctyp< $fs$ ; $f$ >>) f fs + +let concat_record_bindings _loc rbs = + match rbs with + | [] -> assert false + | rb :: rbs -> + List.fold_left (fun rbs rb -> <:rec_binding< $rbs$ ; $rb$ >>) rb rbs + +let build_record _loc rbs = + Ast.ExRec (_loc, rbs, Ast.ExNil _loc) + +let build_tuple_from_exprs _loc exprs = + match exprs with + | [] | [_] -> assert false + | x :: xs -> + Ast.ExTup (_loc, + List.fold_left (fun xs x -> Ast.ExCom (_loc, x, xs)) x xs) + +(* Start of the main program. *) +let () = + let quick = ref false in + let anon_args = ref [] in + + let argspec = Arg.align [ + "--quick", Arg.Set quick, " Quick mode (just for testing)"; + ] in + + let anon_arg str = anon_args := str :: !anon_args in + let usage = "\ +compile-kerneldb: Turn kernels database into code modules. + +Usage: + compile-kerneldb [-options] + +For example, from the top level of the virt-mem source tree: + compile-kerneldb kernels/ lib/ + +Options: +" in + Arg.parse argspec anon_arg usage; + + let quick = !quick in + let anon_args = List.rev !anon_args in + + let kernelsdir, outputdir = + match anon_args with + | [kd;od] -> kd,od + | _ -> + eprintf "compile-kerneldb \n"; + exit 2 in + + (* Read in the list of kernels from the kerneldb. *) + let kernels = PP.list_kernels kernelsdir in + + (* In quick mode, just process the first few kernels. *) + let kernels = if quick then List.take 10 kernels else kernels in + + let good_struct_names = List.map fst good_structs in + + (* Load in the structures. *) + let nr_kernels = List.length kernels in + let kernels = List.mapi ( + fun i info -> + printf "Loading kernel data file %d/%d\r%!" (i+1) nr_kernels; + + let structures = PP.load_structures info good_struct_names in + + (info, structures) + ) kernels in + + (* Keep only the good fields. *) + let kernels = List.map ( + fun (info, structures) -> + let structures = List.map ( + fun (struct_name, structure) -> + let { good_fields = good_fields } = + List.assoc struct_name good_structs in + let fields = List.filter ( + fun { PP.field_name = name } -> List.mem name good_fields + ) structure.PP.struct_fields in + struct_name, { structure with PP.struct_fields = fields } + ) structures in + (info, structures) + ) kernels in + + (* Turn anonymous list_head and void * pointers into pointers to + * known structure types, where we have that meta-information. + *) + let kernels = List.map ( + fun (info, structures) -> + let structures = List.map ( + fun (struct_name, structure) -> + let { field_metadata = metadata } = + List.assoc struct_name good_structs in + let fields = structure.PP.struct_fields in + let fields = List.map ( + fun ({ PP.field_name = name; PP.field_type = typ } as field) -> + try + let meta = List.assoc name metadata in + let typ = + match meta, typ with + | ListHeadIsReally s, PP.FAnonListHeadPointer -> + PP.FListHeadPointer s + | VoidPointerIsReally s, PP.FVoidPointer -> + PP.FStructPointer s + | _, typ -> typ in + { field with PP.field_type = typ } + with + Not_found -> field + ) fields in + struct_name, { structure with PP.struct_fields = fields } + ) structures in + (info, structures) + ) kernels in + + if debug then + List.iter ( + fun (info, structures) -> + printf "\n%s ----------\n" (PP.string_of_info info); + List.iter ( + fun (_, structure) -> + printf "%s\n\n" (PP.string_of_structure structure); + ) structures; + ) kernels; + + (* First output file is a simple list of kernels, to support the + * 'virt-mem --list-kernels' option. + *) + let () = + let _loc = Loc.ghost in + + let versions = List.map ( + fun ({ PP.kernel_version = version }, _) -> version + ) kernels in + + (* Sort them in reverse because we are going to generate the + * final list in reverse. + *) + let cmp a b = compare b a in + let versions = List.sort ~cmp versions in + + let xs = + List.fold_left (fun xs version -> <:expr< $str:version$ :: $xs$ >>) + <:expr< [] >> versions in + + let code = <:str_item< + let kernels = $xs$ + >> in + + let output_file = outputdir // "virt_mem_kernels.ml" in + printf "Writing list of kernels to %s ...\n%!" output_file; + Printers.OCaml.print_implem ~output_file code in + + (* We want to track single structures as they have changed over + * time, ie. over kernel versions. Transpose our dataset so we are + * looking at structures over time. + *) + let structures = PP.transpose good_struct_names kernels in + + let kernels = () in ignore (kernels); (* garbage collect *) + + let structures = + List.map ( + fun (struct_name, kernels) -> + let all_fields = PP.get_fields kernels in + (struct_name, (kernels, all_fields)) + ) structures in + + if debug then + List.iter ( + fun (struct_name, (kernels, all_fields)) -> + printf "struct %s:\n" struct_name; + printf " structure occurs in %d kernel versions\n" + (List.length kernels); + printf " union of fields found:\n"; + List.iter ( + fun (field_name, field_type) -> + printf " %s %s\n" (PP.string_of_f_type field_type) field_name + ) all_fields + ) structures; + + (* Now perform the minimization step for each structure. + * We do separate minimization for: + * - shape field structures + * - content field structures + * - parsers + *) + let structures = + List.map ( + fun (struct_name, (kernels, all_fields)) -> + let sflist, sfhash = + SC.minimize_shape_field_structs struct_name good_struct_names + kernels in + + let cflist, cfhash = + SC.minimize_content_field_structs struct_name good_struct_names + kernels in + + let palist, pahash = + SC.minimize_parsers struct_name kernels sfhash cfhash in + + (struct_name, (kernels, all_fields, + sflist, sfhash, cflist, cfhash, palist, pahash)) + ) structures in + + if debug then + List.iter ( + fun (struct_name, + (kernels, all_fields, + sflist, sfhash, cflist, cfhash, palist, pahash)) -> + printf "struct %s:\n" struct_name; + + printf " shape field structures:\n"; + List.iter ( + fun { SC.sf_name = name; sf_fields = fields } -> + printf " type %s = {\n" name; + List.iter ( + fun { PP.field_name = name; field_type = typ } -> + printf " %s %s;\n" (PP.string_of_f_type typ) name + ) fields; + printf " }\n"; + ) sflist; + + printf " content field structures:\n"; + List.iter ( + fun { SC.cf_name = name; cf_fields = fields } -> + printf " type %s = {\n" name; + List.iter ( + fun { PP.field_name = name; field_type = typ } -> + printf " %s %s;\n" (PP.string_of_f_type typ) name + ) fields; + printf " }\n"; + ) cflist; + + printf " parsers:\n"; + List.iter ( + fun { SC.pa_name = name; + pa_shape_field_struct = sf; + pa_content_field_struct = cf } -> + printf " let %s = ...\n" name; + printf " -> (%s, %s)\n" sf.SC.sf_name cf.SC.cf_name + ) palist + ) structures; diff --git a/extract/codegen/pahole_parser.ml b/extract/codegen/pahole_parser.ml index 1798b44..d593dca 100644 --- a/extract/codegen/pahole_parser.ml +++ b/extract/codegen/pahole_parser.ml @@ -48,7 +48,8 @@ and field = { and f_type = | FStructPointer of string | FVoidPointer - | FListHeadPointer + | FAnonListHeadPointer + | FListHeadPointer of string | FInteger | FString of int @@ -63,13 +64,15 @@ let rec string_of_structure s = and string_of_field f = sprintf "%s %s; /* offset = %d, size = %d */" - f.field_name (string_of_f_type f.field_type) + (string_of_f_type f.field_type) f.field_name f.field_offset f.field_size and string_of_f_type = function - | FStructPointer struct_name -> sprintf "struct %s*" struct_name + | FStructPointer struct_name -> sprintf "struct %s *" struct_name | FVoidPointer -> "void *" - | FListHeadPointer -> "struct list_head *" + | FAnonListHeadPointer -> "struct list_head *" + | FListHeadPointer struct_name -> + sprintf "struct /* %s */ list_head *" struct_name | FInteger -> "int" | FString width -> sprintf "char[%d]" width @@ -316,7 +319,7 @@ let load_structures { basename = basename } struct_names = if struct_name <> "list_head" then FStructPointer struct_name else - FListHeadPointer in + FAnonListHeadPointer in let field = { field_name = name; field_type = field_type; field_offset = offset; field_size = size } in @@ -382,9 +385,81 @@ let load_structures { basename = basename } struct_names = ) fields in List.fold_left max 0 fields in - Some { struct_name = struct_name; - struct_fields = fields; - struct_total_size = total_size } + (* Sort the structure fields by field offset. They are + * probably already in this order, but just make sure. + *) + let cmp { field_offset = o1 } { field_offset = o2 } = compare o1 o2 in + let fields = List.sort ~cmp fields in + + Some ( + struct_name, + { struct_name = struct_name; + struct_fields = fields; + struct_total_size = total_size } + ) ) struct_names in structures + +(* XXX This loop is O(n^3), luckily n is small! *) +let transpose good_struct_names kernels = + List.map ( + fun struct_name -> + let kernels = + List.filter_map ( + fun (info, structures) -> + try + let s = List.assoc struct_name structures in + Some (info, s) + with + Not_found -> None + ) kernels in + + (* Sort the kernels, which makes the generated output more stable + * and makes patches more useful. + *) + let kernels = List.sort kernels in + + struct_name, kernels + ) good_struct_names + +let get_fields structures = + (* Use a hash table to accumulate the fields as we find them. + * The key is the field name. The value is the field type and the + * kernel version where first seen (for error reporting). If + * we meet the field again, we check its type hasn't changed. + * Finally, we can use the hash to pull out all field names and + * types. + *) + let h = Hashtbl.create 13 in + + List.iter ( + fun ({kernel_version = version}, + {struct_name = struct_name; struct_fields = fields}) -> + List.iter ( + fun {field_name = name; field_type = typ} -> + try + let (field_type, version_first_seen) = Hashtbl.find h name in + if typ <> field_type then ( + eprintf "Error: %s.%s: field changed type between kernel versions.\n" + struct_name name; + eprintf "In version %s it had type %s.\n" + version_first_seen (string_of_f_type field_type); + eprintf "In version %s it had type %s.\n" + version (string_of_f_type typ); + eprintf "The code cannot handle fields which change type like this.\n"; + eprintf "See extract/codegen/pahole_parser.mli for more details.\n"; + exit 1 + ) + with Not_found -> + Hashtbl.add h name (typ, version) + ) fields + ) structures; + + let fields = + Hashtbl.fold ( + fun name (typ, _) fields -> + (name, typ) :: fields + ) h [] in + + List.sort fields diff --git a/extract/codegen/pahole_parser.mli b/extract/codegen/pahole_parser.mli index f34b7e3..60e5eb2 100644 --- a/extract/codegen/pahole_parser.mli +++ b/extract/codegen/pahole_parser.mli @@ -61,7 +61,10 @@ and field = { and f_type = | FStructPointer of string (** A pointer to a named struct. *) | FVoidPointer (** A [void*] pointer. *) - | FListHeadPointer (** A pointer to a [list_head]. *) + | FAnonListHeadPointer (** A pointer to an unknown + [list_head]. *) + | FListHeadPointer of string (** A pointer to a [list_head] in a + named struct. *) | FInteger (** An integer. *) | FString of int (** A char array of given width. *) (** Type of a kernel field. *) @@ -79,13 +82,57 @@ val list_kernels : pathname -> info list (** {2 Load kernel structures} *) -val load_structures : info -> string list -> structure list +val load_structures : info -> string list -> (string * structure) list (** [load_structures info names] loads the named kernel structures from a particular kernel. The returned list is not necessarily in the same order, or the - same length, as the [names] list. Check the - {!structure.struct_name} field for the structure name. - Structures which don't actually occur in the given kernel are - not loaded and not present in the final list. + same length, as the [names] list. Check the first field in each + pair for the structure name. Structures which don't actually + occur in the given kernel are not loaded and not present in the + final list. + + The fields list in {!structure} is always sorted by field offset. *) + +(** {2 Transpose and check field types} + + After we've used {!load_structures} for each kernel, we end + up with a list of kernels, and within that a list of structures + supported by the kernel. What we really want is to see how + each structure changes over time, and also to check if field + types have changed between versions (which we currently disallow). + + The {!transpose} operation transposes the original list + of kernels to a list of structures. + + The {!get_fields} operation gets a complete list of fields + and their types, and checks that the types haven't changed over + kernel versions. (Note that particular fields can be missing from + some kernel version, but that is OK). +*) + +val transpose : string list -> + (info * (string * structure) list) list -> + (string * (info * structure) list) list + (** Transpose list of kernels to list of structures. The result + shows, for each structure, how it changed over kernel versions. + + The first parameter is the list of structure names of interest, + and should be the same as was passed to {!load_structures}. *) + +val get_fields : (info * structure) list -> (string * f_type) list + (** This gets a complete list of fields which have appeared in + any kernel version, and the type of those fields. + + Fields must not change type between kernel versions - if + so this function prints an error and exits. (We may support + fields which change type in future, but we don't right now). + "Type" is quite widely defined here, see {!f_type}, and so + certain changes such as between sizes of ints are allowed, + but you can't have a field which once was a pointer and then + became a string or anything like that. + + Note that a field may not be present in particular kernel + versions, but if it appears at all in any version, then it + will be in the result list. *) diff --git a/extract/codegen/struct_classify.ml b/extract/codegen/struct_classify.ml new file mode 100644 index 0000000..506d783 --- /dev/null +++ b/extract/codegen/struct_classify.ml @@ -0,0 +1,185 @@ +(* Memory info command for virtual domains. + (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. + http://libvirt.org/ + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + *) + +open ExtList +open ExtString + +open Printf + +module PP = Pahole_parser + +type f_class = ShapeField | ContentField + +let classify_field names = function + | (PP.FStructPointer struct_name | PP.FListHeadPointer struct_name) + when List.mem struct_name names -> ShapeField + (* .. anything else is a content field: *) + | PP.FStructPointer _ + | PP.FListHeadPointer _ + | PP.FVoidPointer + | PP.FAnonListHeadPointer + | PP.FInteger + | PP.FString _ -> ContentField + +type shape_field_struct = { + sf_i : int; + sf_name : string; + sf_fields : PP.field list; +} + +and content_field_struct = { + cf_i : int; + cf_name : string; + cf_fields : PP.field list; +} + +and parser_ = { + pa_i : int; + pa_name : string; + pa_endian : Bitstring.endian; + pa_fields : Pahole_parser.field list; + pa_shape_field_struct : shape_field_struct; + pa_content_field_struct : content_field_struct; +} + +let endian_of_architecture arch = + if String.starts_with arch "i386" || + String.starts_with arch "i486" || + String.starts_with arch "i586" || + String.starts_with arch "i686" || + String.starts_with arch "x86_64" || + String.starts_with arch "x86-64" then + Bitstring.LittleEndian + else if String.starts_with arch "ia64" then + Bitstring.LittleEndian (* XXX usually? *) + else if String.starts_with arch "ppc" then + Bitstring.BigEndian + else if String.starts_with arch "sparc" then + Bitstring.BigEndian + else + failwith (sprintf "endian_of_architecture: cannot parse %S" arch) + +let unique = + let i = ref 0 in + fun () -> + incr i; !i + +(* Minimization of shape fields & content fields. *) + +let cmp { PP.field_name = n1 } { PP.field_name = n2 } = compare n1 n2 + +let hash_values h = Hashtbl.fold (fun _ v vs -> v :: vs) h [] + +let minimize_shape_field_structs struct_name names kernels = + let h = Hashtbl.create 13 in + let rh = Hashtbl.create 13 in + + let only_shape_fields = + List.filter ( + fun { PP.field_type = typ } -> + classify_field names typ = ShapeField + ) + in + + let name_of i = sprintf "%s_shape_fields_%d" struct_name i in + + List.iter ( + fun ({ PP.kernel_version = version }, + { PP.struct_fields = fields; struct_name = name_check }) -> + assert (struct_name = name_check); + let fields = List.sort ~cmp (only_shape_fields fields) in + let key = List.map (fun { PP.field_name = name } -> name) fields in + let sf = + try Hashtbl.find h key + with Not_found -> + let i = unique () in + let sf = { sf_i = i; sf_name = name_of i; sf_fields = fields } in + Hashtbl.add h key sf; + sf in + Hashtbl.add rh version sf + ) kernels; + + let sfs = hash_values h in + sfs, rh + +let minimize_content_field_structs struct_name names kernels = + let h = Hashtbl.create 13 in + let rh = Hashtbl.create 13 in + + let only_content_fields = + List.filter ( + fun { PP.field_type = typ } -> + classify_field names typ = ContentField + ) + in + + let name_of i = sprintf "%s_content_fields_%d" struct_name i in + + List.iter ( + fun ({ PP.kernel_version = version }, + { PP.struct_fields = fields; struct_name = name_check }) -> + assert (struct_name = name_check); + let fields = List.sort ~cmp (only_content_fields fields) in + let key = List.map (fun { PP.field_name = name } -> name) fields in + let cf = + try Hashtbl.find h key + with Not_found -> + let i = unique () in + let cf = { cf_i = i; cf_name = name_of i; cf_fields = fields } in + Hashtbl.add h key cf; + cf in + Hashtbl.add rh version cf + ) kernels; + + let cfs = hash_values h in + cfs, rh + +let minimize_parsers struct_name kernels sfhash cfhash = + let h = Hashtbl.create 13 in + let rh = Hashtbl.create 13 in + + let name_of i = sprintf "%s_parser_%d" struct_name i in + + List.iter ( + fun ({ PP.kernel_version = version; arch = arch }, + { PP.struct_fields = fields; struct_name = name_check }) -> + assert (struct_name = name_check); + let endian = endian_of_architecture arch in + let key = endian, fields in + let pa = + try Hashtbl.find h key + with Not_found -> + let i = unique () in + let sf = + try Hashtbl.find sfhash version + with Not_found -> assert false in + let cf = + try Hashtbl.find cfhash version + with Not_found -> assert false in + let pa = { pa_i = i; pa_name = name_of i; pa_fields = fields; + pa_endian = endian; + pa_shape_field_struct = sf; + pa_content_field_struct = cf } in + Hashtbl.add h key pa; + pa in + Hashtbl.add rh version pa + ) kernels; + + let pas = hash_values h in + pas, rh diff --git a/extract/codegen/struct_classify.mli b/extract/codegen/struct_classify.mli new file mode 100644 index 0000000..802324c --- /dev/null +++ b/extract/codegen/struct_classify.mli @@ -0,0 +1,172 @@ +(** Structure classification. *) +(* Memory info command for virtual domains. + (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. + http://libvirt.org/ + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + *) + +(** + {2 How it works} + + There's no getting around it, this is complicated. + + {!Pahole_parser} has parsed in a limited set of structures from + each available kernel. We now aim to take a holistic view of + a structure as it changed over time, though different kernel + versions and also on different architectures. + + {3 Shape fields and content fields} + + A structure is a list of fields. + + Fields fall into two classifications, as far as we are interested: + + (i) 'Shape' fields define the relationship between different + structures. Shape fields are all pointers to other structures + that we care about. They have type {!Pahole_parser.FStructPointer} + and {!Pahole_parser.FListHeadPointer}. + + (ii) 'Content' fields are fields in a structure that contain + some data, like ints and strings. (In the current implementation, + it's anything left which isn't a shape field). + + So we can easily take a structure and place its fields into two + buckets. For example, with [task_struct] it might be: + + [task_struct] shape fields: + - [tasks'next] (the linked list of tasks) + - [tasks'prev] + - [parent] (points to the parent task) + + [task_struct] content fields: + - [pid] (process ID) + - [comm] (task name) + - etc. etc. + + {3 Shape fields and iterator functions} + + For each kernel/structure we can build a list of shape fields, but + in fact in many kernels they will be the same, so we also + performing a {i sharing} operation to minimize the number of + variations. + + We also write (by hand) iterator functions. These iterator + functions are matched to the corresponding shape field structure, + by setting up some prerequisites that the function needs, then + matching on those prerequisites with the available shape + field structures. + + {3 Content fields and printing functions} + + The same (minimization & matching) applies to hand-written printing + functions over content field structures. + + {3 Generated parsing functions} + + A third form of minimization is required to find kernel + structures which happen to be similar - ie. all the fields + happen to be in the same place, with the same wordsize and + endianness. + + We can then generate a minimal set of parsing functions which + map the binary data from the kernel image into shape and + content field structures. + + {3 Generated loading code} + + Finally, we generate recursive loading code which recurses over + structures into order to load the kernel memory and invoke the + correct parsers on it, ensuring that when the program runs, all + known kernel structures are recursively reached and loaded in. +*) + +(** {2 Field classification} *) + +type f_class = ShapeField | ContentField + +val classify_field : string list -> Pahole_parser.f_type -> f_class + (** [classify_field names field] classifies a field as either + a shape field or a content field. [names] is a list of + all kernel structures that we care about. *) + +(** {2 Minimization of shape field structures and content field structures} *) + +type shape_field_struct = { + sf_i : int; (** Unique number. *) + sf_name : string; (** Structure name in output. *) + sf_fields : Pahole_parser.field list; (** Shape fields. *) +} + (** The type of a shape field structure. *) + +and content_field_struct = { + cf_i : int; (** Unique number. *) + cf_name : string; (** Structure name in output. *) + cf_fields : Pahole_parser.field list; (** Content fields. *) +} + (** The type of a content field structure. *) + +val minimize_shape_field_structs : + string -> string list -> + (Pahole_parser.info * Pahole_parser.structure) list -> + shape_field_struct list * (string, shape_field_struct) Hashtbl.t + (** [minimize_shape_field_structs struct_name names kernels] returns + a minimized list of shape field structures + (a hash table of kernel version to {!shape_field_struct}). + + [struct_name] is the name of the structure. + + [names] is the list of interesting kernel structures. *) + +val minimize_content_field_structs : + string -> string list -> + (Pahole_parser.info * Pahole_parser.structure) list -> + content_field_struct list * (string, content_field_struct) Hashtbl.t + (** [minimize_content_field_structs struct_name names kernels] returns + a minimized list of content field structures + (a hash table of kernel version to {!content_field_struct}). + + [struct_name] is the name of the structure. + + [names] is the list of interesting kernel structures. *) + +(** {2 Minimization of parsers} *) + +type parser_ (* parser is a reserved word *) = { + pa_i : int; (** Unique number. *) + pa_name : string; (** Parser function name in output. *) + (* The input to the parser: *) + pa_endian : Bitstring.endian; (** Default field endianness. *) + pa_fields : Pahole_parser.field list; (** All fields. *) + (* The output of the parser: *) + pa_shape_field_struct : shape_field_struct; + pa_content_field_struct : content_field_struct; +} + (** The type of a parser. *) + +val minimize_parsers : + string -> + (Pahole_parser.info * Pahole_parser.structure) list -> + (string, shape_field_struct) Hashtbl.t -> + (string, content_field_struct) Hashtbl.t -> + parser_ list * (string, parser_) Hashtbl.t + (** [minimize_parsers struct_name kernels sfhash cfhash] returns + a minimized list of parsers (a hash table of kernel version + to {!parser_}). + + [sfhash] and [cfhash] are the kernel version -> shape/content + field struct hashes returned by a previous call to + {!minimize_shape_field_structs} and {!minimize_content_field_structs} + respectively. *)