X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=extract%2Fcodegen%2Fkerneldb_to_parser.ml;h=e7b4142e8c520e1c552f4e97dfced12492caa970;hb=7bd9a4fd2fef8beee44fc8982c90aee2643876dd;hp=1137f30a05f5f715a837e71a5effa01a57f73a0c;hpb=c0ef66b9b93fdc37e351f2fa657df35868472638;p=virt-mem.git diff --git a/extract/codegen/kerneldb_to_parser.ml b/extract/codegen/kerneldb_to_parser.ml index 1137f30..e7b4142 100644 --- a/extract/codegen/kerneldb_to_parser.ml +++ b/extract/codegen/kerneldb_to_parser.ml @@ -32,18 +32,26 @@ let what = [ "task_struct", ( "struct task_struct {", "};", true, [ "state"; "prio"; "normal_prio"; "static_prio"; - "tasks.prev"; "tasks.next"; "comm"] + "tasks'prev"; "tasks'next"; "comm"] ); +(* "mm_struct", ( "struct mm_struct {", "};", true, [ ] ); +*) "net_device", ( "struct net_device {", "};", true, [ "name"; "dev_addr" ] ); ] +let debug = true + +open Camlp4.PreCast +open Syntax +(*open Ast*) + open ExtList open ExtString open Printf @@ -90,7 +98,7 @@ Example (from toplevel of virt-mem source tree): let line = input_line chan in (* Kernel version string. *) - let version = + let version, arch = if Pcre.pmatch ~rex:re_oldformat line then ( (* If the file starts with "RPM: \d+: ..." then it's the * original Fedora format. Everything in one line. @@ -102,7 +110,7 @@ Example (from toplevel of virt-mem source tree): let arch = Pcre.get_substring subs 4 in close_in chan; (* XXX Map name -> PAE, hugemem etc. *) - (* name, *) sprintf "%s-%s.%s" version release arch + (* name, *) sprintf "%s-%s.%s" version release arch, arch ) else ( (* New-style "key: value" entries, up to end of file or the first * blank line. @@ -130,12 +138,12 @@ Example (from toplevel of virt-mem source tree): if (*name = "" ||*) version = "" || release = "" || arch = "" then failwith (sprintf "%s: missing Name, Version, Release or Architecture key" filename); (* XXX Map name -> PAE, hugemem etc. *) - (* name, *) sprintf "%s-%s.%s" version release arch + (* name, *) sprintf "%s-%s.%s" version release arch, arch ) in - (*printf "%s -> %s\n%!" basename version;*) + (*printf "%s -> %s %s\n%!" basename version arch;*) - (basename, version) + (basename, version, arch) ) infos in (* For quick access to the opener strings, build a hash. *) @@ -147,7 +155,7 @@ Example (from toplevel of virt-mem source tree): (* Now read the data files and parse out the structures of interest. *) let datas = List.map ( - fun (basename, version) -> + fun (basename, version, arch) -> let file_exists name = try Unix.access name [Unix.F_OK]; true with Unix.Unix_error _ -> false @@ -219,7 +227,7 @@ Example (from toplevel of virt-mem source tree): failwith (sprintf "%s: structure %s not found in this kernel" basename name) ) what; - (basename, version, bodies) + (basename, version, arch, bodies) ) infos in (* Now parse each structure body. @@ -279,7 +287,7 @@ Example (from toplevel of virt-mem source tree): | None -> nested_fields | Some prefix -> List.map ( - fun (name, details) -> (prefix ^ "." ^ name, details) + fun (name, details) -> (prefix ^ "'" ^ name, details) ) nested_fields in (* Parse the rest. *) @@ -333,7 +341,7 @@ Example (from toplevel of virt-mem source tree): in let datas = List.map ( - fun (basename, version, bodies) -> + fun (basename, version, arch, bodies) -> let structures = List.filter_map ( fun (name, (_, _, _, wanted_fields)) -> let body = @@ -344,6 +352,13 @@ Example (from toplevel of virt-mem source tree): let body = List.tl body in (* Don't care about opener line. *) let fields = parse basename body in + (* Compute total size of the structure. *) + let total_size = + let fields = List.map ( + fun (_, (_, offset, size)) -> offset + size + ) fields in + List.fold_left max 0 fields in + (* That got us all the fields, but we only care about * the wanted_fields. *) @@ -358,40 +373,259 @@ Example (from toplevel of virt-mem source tree): failwith (sprintf "%s: structure %s is missing required field %s" basename name wanted_field) ) wanted_fields; - Some (name, fields) + Some (name, (fields, total_size)) ) what in - (basename, version, structures) + (basename, version, arch, structures) ) datas in - (* If you're debugging, uncomment this to print out the parsed - * structures. + if debug then + List.iter ( + fun (basename, version, arch, structures) -> + printf "%s (version: %s, arch: %s):\n" basename version arch; + List.iter ( + fun (struct_name, (fields, total_size)) -> + printf " struct %s {\n" struct_name; + List.iter ( + fun (field_name, (typ, offset, size)) -> + (match typ with + | `Int -> + printf " int %s; " field_name + | `Ptr struct_name -> + printf " struct %s *%s; " struct_name field_name + | `Str width -> + printf " char %s[%d]; " field_name width + ); + printf " /* offset = %d, size = %d */\n" offset size + ) fields; + printf " } /* %d bytes */\n\n" total_size; + ) structures; + ) datas; + + (* We'll generate a code file for each structure type (eg. task_struct + * across all kernel versions), so rearrange 'datas' for that purpose. + * + * XXX This loop is O(n^3), luckily n is small! *) -(* - List.iter ( - fun (basename, version, structures) -> - printf "%s (version: %s):\n" basename version; - List.iter ( - fun (struct_name, fields) -> - printf " struct %s {\n" struct_name; - List.iter ( - fun (field_name, (typ, offset, size)) -> - (match typ with - | `Int -> printf " int %s; " field_name - | `Ptr struct_name -> - printf " struct %s *%s; " struct_name field_name - | `Str width -> - printf " char %s[%d]; " field_name width - ); - printf " /* offset = %d, size = %d */\n" offset size - ) fields; - printf " }\n\n"; - ) structures; - ) datas; -*) - - (* Let's generate some code! *) - + let files = + List.map ( + fun (name, _) -> + name, + List.filter_map ( + fun (basename, version, arch, structures) -> + try Some (basename, version, arch, List.assoc name structures) + with Not_found -> None + ) datas + ) what in + + let datas = () in ignore datas; (* garbage collect *) + + (* Get just the field types. It's plausible that a field with the + * same name has a different type between kernel versions, so we must + * check that didn't happen. + *) + let files = List.map ( + fun (struct_name, kernels) -> + let field_types = + match kernels with + | [] -> [] + | (_, _, _, (fields, _)) :: kernels -> + let field_types_of_fields fields = + List.sort ( + List.map ( + fun (field_name, (typ, _, _)) -> field_name, typ + ) fields + ) + in + let field_types = field_types_of_fields fields in + List.iter ( + fun (_, _, _, (fields, _)) -> + if field_types <> field_types_of_fields fields then + failwith (sprintf "%s: one of the structure fields changed type between kernel versions" struct_name) + ) kernels; + field_types in + (struct_name, kernels, field_types) + ) files in + + (* To minimize generated code size, we want to fold together all + * structures where the particulars (eg. offsets, sizes, endianness) + * of the fields we care about are the same -- eg. between kernel + * versions which are very similar. + *) + let endian_of_architecture arch = + if String.starts_with arch "i386" || + String.starts_with arch "i486" || + String.starts_with arch "i586" || + String.starts_with arch "i686" || + String.starts_with arch "x86_64" || + String.starts_with arch "x86-64" then + Bitstring.LittleEndian + else if String.starts_with arch "ia64" then + Bitstring.LittleEndian (* XXX usually? *) + else if String.starts_with arch "ppc" then + Bitstring.BigEndian + else if String.starts_with arch "sparc" then + Bitstring.BigEndian + else + failwith (sprintf "endian_of_architecture: cannot parse %S" arch) + in + let files = + List.map ( + fun (struct_name, kernels, field_types) -> + let hash = Hashtbl.create 13 in + let i = ref 0 in + let xs = ref [] in + let kernels = + List.map ( + fun (basename, version, arch, (fields, total_size)) -> + let key = endian_of_architecture arch, fields in + let j = + try Hashtbl.find hash key + with Not_found -> + incr i; + xs := (!i, key) :: !xs; Hashtbl.add hash key !i; + !i in + (basename, version, arch, total_size, j) + ) kernels in + struct_name, kernels, field_types, List.rev !xs + ) files in + + (* How much did we save by sharing? *) + if debug then + List.iter ( + fun (struct_name, kernels, _, parsers) -> + printf "struct %s:\n" struct_name; + printf " number of kernel versions: %d\n" (List.length kernels); + printf " number of parser functions needed after sharing: %d\n" + (List.length parsers) + ) files; - () + (* Let's generate some code! *) + let files = + List.map ( + fun (struct_name, kernels, field_types, parsers) -> + (* Dummy location required - there are no real locations for + * output files. + *) + let _loc = Loc.ghost in + + (* The structure type. *) + let struct_type, struct_sig = + let fields = List.map ( + function + | (name, `Int) -> + <:ctyp< $lid:name$ : int >> + | (name, `Ptr struct_name) -> + <:ctyp< $lid:name$ : [`$lid:struct_name$] int64 >> + | (name, `Str _) -> + <:ctyp< $lid:name$ : string >> + ) field_types in + let f, fs = match fields with + | [] -> failwith (sprintf "%s: structure has no fields" struct_name) + | f :: fs -> f, fs in + let fields = List.fold_left ( + fun fs f -> <:ctyp< $fs$ ; $f$ >> + ) f fs in + + let struct_type = <:str_item< type t = { $fields$ } >> in + let struct_sig = <:sig_item< type t = { $fields$ } >> in + struct_type, struct_sig in + + (* The shared parser functions. + * We could include bitmatch statements directly in here, but + * what happens is that the macros get expanded here, resulting + * in unreadable generated code. So instead just do a textual + * substitution later by post-processing the generated files. + * Not type-safe, but we can't have everything. + *) + let parser_stmts, parser_subs = + let parser_stmts = List.map ( + fun (i, _) -> + let fnname = sprintf "parser_%d" i in + <:str_item< + let $lid:fnname$ bits = $str:fnname$ + >> + ) parsers in + + let parser_stmts = + match parser_stmts with + | [] -> <:str_item< >> + | p :: ps -> + List.fold_left (fun ps p -> <:str_item< $ps$ $p$ >>) p ps in + + (* What gets substituted for "parser_NN" ... *) + let parser_subs = List.map ( + fun (i, (endian, fields)) -> + let fnname = sprintf "parser_%d" i in + let patterns = "" and assignments = "" in (* XXX *) + let sub = + sprintf "bitmatch bits with + | { %s } -> { %s } + | { _ } -> raise (ParseError (%S, %S, \"failed to match kernel structure\"))" + patterns assignments struct_name fnname in + fnname, sub + ) parsers in + + parser_stmts, parser_subs in + + (* Define a map from kernel versions to parsing functions. *) + let version_map = + let stmts = List.fold_left ( + fun stmts (_, version, arch, total_size, i) -> + let parserfn = sprintf "parser_%d" i in + <:str_item< + $stmts$ + let v = ($lid:parserfn$, $`int:total_size$) + let map = StringMap.add $str:version$ v map + >> + ) <:str_item< let map = StringMap.empty >> kernels in + + <:str_item< + module StringMap = Map.Make (String) + $stmts$ + >> in + + (* Code (.ml file). *) + let code = <:str_item< + let warning = "This code is automatically generated from the kernel database by kerneldb-to-parser program. Any edits you make will be lost." + exception ParseError of string ;; + $struct_type$ + $parser_stmts$ + $version_map$ + + type kernel_version = string + let known version = StringMap.mem version map + let size version = + let _, size = StringMap.find version map in + size + let get version bits = + let parsefn, _ = StringMap.find version map in + parsefn bits + >> in + + (* Interface (.mli file). *) + let interface = <:sig_item< + exception ParseError of string ;; + $struct_sig$ + + type kernel_version = string + val known : kernel_version -> bool + val size : kernel_version -> int + val get : kernel_version -> Bitstring.bitstring -> t + >> in + + (struct_name, code, interface) + ) files in + + (* Finally generate the output files. *) + List.iter ( + fun (struct_name, code, interface) -> + let output_file = outputdir // "kernel_" ^ struct_name ^ ".ml" in + printf "Writing %s implementation to %s ...\n%!" struct_name output_file; + Printers.OCaml.print_implem ~output_file code; + + let output_file = outputdir // "kernel_" ^ struct_name ^ ".mli" in + printf "Writing %s interface to %s ...\n%!" struct_name output_file; + Printers.OCaml.print_interf ~output_file interface + ) files