From: Richard W.M. Jones <"Richard W.M. Jones "> Date: Wed, 6 Aug 2008 13:47:04 +0000 (+0100) Subject: Code generation phase. X-Git-Url: http://git.annexia.org/?a=commitdiff_plain;h=c7603296c4f420bfc7db9a7dbf72efb0fec97b4d;p=virt-mem.git Code generation phase. --- diff --git a/extract/codegen/Makefile.in b/extract/codegen/Makefile.in index edcfe96..85b2619 100644 --- a/extract/codegen/Makefile.in +++ b/extract/codegen/Makefile.in @@ -23,14 +23,14 @@ INSTALL = @INSTALL@ MKDIR_P = @MKDIR_P@ bindir = @bindir@ -OCAMLCPACKAGES = -package extlib,pcre,unix +OCAMLCPACKAGES = -package extlib,pcre,unix,camlp4,bitstring.syntax -syntax bitstring.syntax OCAMLCFLAGS = @OCAMLCFLAGS@ -OCAMLCLIBS = -linkpkg +OCAMLCLIBS = -linkpkg camlp4lib.cma OCAMLOPTFLAGS = @OCAMLOPTFLAGS@ OCAMLOPTPACKAGES = $(OCAMLCPACKAGES) -OCAMLOPTLIBS = -linkpkg +OCAMLOPTLIBS = -linkpkg camlp4lib.cmxa TARGETS = kerneldb-to-parser.opt diff --git a/extract/codegen/kerneldb_to_parser.ml b/extract/codegen/kerneldb_to_parser.ml index 1137f30..61fc8b0 100644 --- a/extract/codegen/kerneldb_to_parser.ml +++ b/extract/codegen/kerneldb_to_parser.ml @@ -32,18 +32,26 @@ let what = [ "task_struct", ( "struct task_struct {", "};", true, [ "state"; "prio"; "normal_prio"; "static_prio"; - "tasks.prev"; "tasks.next"; "comm"] + "tasks'prev"; "tasks'next"; "comm"] ); +(* "mm_struct", ( "struct mm_struct {", "};", true, [ ] ); +*) "net_device", ( "struct net_device {", "};", true, [ "name"; "dev_addr" ] ); ] +let debug = true + +open Camlp4.PreCast +open Syntax +(*open Ast*) + open ExtList open ExtString open Printf @@ -90,7 +98,7 @@ Example (from toplevel of virt-mem source tree): let line = input_line chan in (* Kernel version string. *) - let version = + let version, arch = if Pcre.pmatch ~rex:re_oldformat line then ( (* If the file starts with "RPM: \d+: ..." then it's the * original Fedora format. Everything in one line. @@ -102,7 +110,7 @@ Example (from toplevel of virt-mem source tree): let arch = Pcre.get_substring subs 4 in close_in chan; (* XXX Map name -> PAE, hugemem etc. *) - (* name, *) sprintf "%s-%s.%s" version release arch + (* name, *) sprintf "%s-%s.%s" version release arch, arch ) else ( (* New-style "key: value" entries, up to end of file or the first * blank line. @@ -130,12 +138,12 @@ Example (from toplevel of virt-mem source tree): if (*name = "" ||*) version = "" || release = "" || arch = "" then failwith (sprintf "%s: missing Name, Version, Release or Architecture key" filename); (* XXX Map name -> PAE, hugemem etc. *) - (* name, *) sprintf "%s-%s.%s" version release arch + (* name, *) sprintf "%s-%s.%s" version release arch, arch ) in - (*printf "%s -> %s\n%!" basename version;*) + (*printf "%s -> %s %s\n%!" basename version arch;*) - (basename, version) + (basename, version, arch) ) infos in (* For quick access to the opener strings, build a hash. *) @@ -147,7 +155,7 @@ Example (from toplevel of virt-mem source tree): (* Now read the data files and parse out the structures of interest. *) let datas = List.map ( - fun (basename, version) -> + fun (basename, version, arch) -> let file_exists name = try Unix.access name [Unix.F_OK]; true with Unix.Unix_error _ -> false @@ -219,7 +227,7 @@ Example (from toplevel of virt-mem source tree): failwith (sprintf "%s: structure %s not found in this kernel" basename name) ) what; - (basename, version, bodies) + (basename, version, arch, bodies) ) infos in (* Now parse each structure body. @@ -279,7 +287,7 @@ Example (from toplevel of virt-mem source tree): | None -> nested_fields | Some prefix -> List.map ( - fun (name, details) -> (prefix ^ "." ^ name, details) + fun (name, details) -> (prefix ^ "'" ^ name, details) ) nested_fields in (* Parse the rest. *) @@ -333,7 +341,7 @@ Example (from toplevel of virt-mem source tree): in let datas = List.map ( - fun (basename, version, bodies) -> + fun (basename, version, arch, bodies) -> let structures = List.filter_map ( fun (name, (_, _, _, wanted_fields)) -> let body = @@ -361,37 +369,179 @@ Example (from toplevel of virt-mem source tree): Some (name, fields) ) what in - (basename, version, structures) + (basename, version, arch, structures) ) datas in - (* If you're debugging, uncomment this to print out the parsed - * structures. + if debug then + List.iter ( + fun (basename, version, arch, structures) -> + printf "%s (version: %s, arch: %s):\n" basename version arch; + List.iter ( + fun (struct_name, fields) -> + printf " struct %s {\n" struct_name; + List.iter ( + fun (field_name, (typ, offset, size)) -> + (match typ with + | `Int -> + printf " int %s; " field_name + | `Ptr struct_name -> + printf " struct %s *%s; " struct_name field_name + | `Str width -> + printf " char %s[%d]; " field_name width + ); + printf " /* offset = %d, size = %d */\n" offset size + ) fields; + printf " }\n\n"; + ) structures; + ) datas; + + (* We'll generate a code file for each structure type (eg. task_struct + * across all kernel versions), so rearrange 'datas' for that purpose. + * + * XXX This loop is O(n^3), luckily n is small! *) -(* - List.iter ( - fun (basename, version, structures) -> - printf "%s (version: %s):\n" basename version; - List.iter ( - fun (struct_name, fields) -> - printf " struct %s {\n" struct_name; - List.iter ( - fun (field_name, (typ, offset, size)) -> - (match typ with - | `Int -> printf " int %s; " field_name - | `Ptr struct_name -> - printf " struct %s *%s; " struct_name field_name - | `Str width -> - printf " char %s[%d]; " field_name width - ); - printf " /* offset = %d, size = %d */\n" offset size - ) fields; - printf " }\n\n"; - ) structures; - ) datas; -*) + let files = + List.map ( + fun (name, _) -> + name, + List.filter_map ( + fun (basename, version, arch, structures) -> + try Some (basename, version, arch, List.assoc name structures) + with Not_found -> None + ) datas + ) what in + + let datas = () in ignore datas; (* garbage collect *) + + (* Get just the field types. It's plausible that a field with the + * same name has a different type between kernel versions, so we must + * check that didn't happen. + *) + let files = List.map ( + fun (struct_name, kernels) -> + let field_types = + match kernels with + | [] -> [] + | (_, _, _, fields) :: kernels -> + let field_types_of_fields fields = + List.sort ( + List.map ( + fun (field_name, (typ, _, _)) -> field_name, typ + ) fields + ) + in + let field_types = field_types_of_fields fields in + List.iter ( + fun (_, _, _, fields) -> + if field_types <> field_types_of_fields fields then + failwith (sprintf "%s: one of the structure fields changed type between kernel versions" struct_name) + ) kernels; + field_types in + (struct_name, kernels, field_types) + ) files in + + (* To minimize generated code size, we want to fold together all + * structures where the particulars (eg. offsets, sizes, endianness) + * of the fields we care about are the same -- eg. between kernel + * versions which are very similar. + *) + let endian_of_architecture arch = + if String.starts_with arch "i386" || + String.starts_with arch "i486" || + String.starts_with arch "i586" || + String.starts_with arch "i686" || + String.starts_with arch "x86_64" || + String.starts_with arch "x86-64" then + Bitstring.LittleEndian + else if String.starts_with arch "ia64" then + Bitstring.LittleEndian (* XXX usually? *) + else if String.starts_with arch "ppc" then + Bitstring.BigEndian + else if String.starts_with arch "sparc" then + Bitstring.BigEndian + else + failwith (sprintf "endian_of_architecture: cannot parse %S" arch) + in - (* Let's generate some code! *) - + let files = + List.map ( + fun (struct_name, kernels, field_types) -> + let hash = Hashtbl.create 13 in + let i = ref 0 in + let xs = ref [] in + let kernels = + List.map ( + fun (basename, version, arch, fields) -> + let key = endian_of_architecture arch, fields in + let j = + try Hashtbl.find hash key + with Not_found -> + incr i; + xs := (!i, key) :: !xs; Hashtbl.add hash key !i; + !i in + (basename, version, arch, j) + ) kernels in + struct_name, kernels, field_types, List.rev !xs + ) files in + + (* How much did we save by sharing? *) + if debug then + List.iter ( + fun (struct_name, kernels, _, parsers) -> + printf "struct %s:\n" struct_name; + printf " number of kernel versions: %d\n" (List.length kernels); + printf " number of parser functions needed after sharing: %d\n" + (List.length parsers) + ) files; + (* Let's generate some code! *) + let files = + List.map ( + fun (struct_name, kernels, field_types, parsers) -> + (* Dummy location required - there are no real locations for + * output files. + *) + let _loc = Loc.ghost in + + (* The structure type. *) + let struct_type, struct_sig = + let fields = List.map ( + function + | (name, `Int) -> + <:ctyp< $lid:name$ : int >> + | (name, `Ptr struct_name) -> + <:ctyp< $lid:name$ : (*`$str:struct_name$*) int64 >> + | (name, `Str _) -> + <:ctyp< $lid:name$ : string >> + ) field_types in + let f, fs = match fields with + | [] -> failwith (sprintf "%s: structure has no fields" struct_name) + | f :: fs -> f, fs in + let fields = List.fold_left ( + fun fs f -> <:ctyp< $fs$ ; $f$ >> + ) f fs in + + let struct_type = <:str_item< type t = { $fields$ } >> in + let struct_sig = <:sig_item< type t = { $fields$ } >> in + struct_type, struct_sig in + + let code = <:str_item< + $struct_type$ + >> in + + let interface = <:sig_item< + $struct_sig$ + >> in + + (struct_name, code, interface) + ) files in + + (* Finally generate the output files. *) + List.iter ( + fun (struct_name, code, interface) -> + let output_file = outputdir // "kernel_" ^ struct_name ^ ".ml" in + Printers.OCaml.print_implem ~output_file code; - () + let output_file = outputdir // "kernel_" ^ struct_name ^ ".mli" in + Printers.OCaml.print_interf ~output_file interface + ) files