X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=extract%2Fcodegen%2Fkerneldb_to_parser.ml;h=1d56126b84fa964a5b585c0daa4fcc1f71aba373;hb=45b7766e66fb59ece5f07305553dc54a26b32d9d;hp=6b64786afc8511b7946fa229ae2f9c77ea035cbb;hpb=45845eeeff9ccbd4cbda63d6deaa9a29ab1abb05;p=virt-mem.git diff --git a/extract/codegen/kerneldb_to_parser.ml b/extract/codegen/kerneldb_to_parser.ml index 6b64786..1d56126 100644 --- a/extract/codegen/kerneldb_to_parser.ml +++ b/extract/codegen/kerneldb_to_parser.ml @@ -30,11 +30,28 @@ let what = [ "task_struct", ( - "struct task_struct", - [ "state"; "prio"; "normal_prio"; "static_prio"; "tasks"; "comm"] + "struct task_struct {", "};", true, + [ "state"; "prio"; "normal_prio"; "static_prio"; + "tasks'prev"; "tasks'next"; "mm"; "active_mm"; "comm"; "pid" ] + ); +(* + "mm_struct", ( + "struct mm_struct {", "};", true, + [ ] + ); +*) + "net_device", ( + "struct net_device {", "};", true, + [ "name"; "dev_addr" ] ); ] +let debug = false + +open Camlp4.PreCast +open Syntax +(*open Ast*) + open ExtList open ExtString open Printf @@ -63,7 +80,7 @@ Example (from toplevel of virt-mem source tree): let infos = Sys.readdir kernelsdir in let infos = Array.to_list infos in let infos = List.filter (fun name -> String.ends_with name ".info") infos in - let infos = List.map ((//) kernelsdir) infos in + let infos = List.map ( (//) kernelsdir) infos in (* Regular expressions. We really really should use ocaml-mikmatch ... *) let re_oldformat = Pcre.regexp "^RPM: \\d+: \\(build \\d+\\) ([-\\w]+) ([\\w.]+) ([\\w.]+) \\(.*?\\) (\\w+)" in @@ -80,31 +97,33 @@ Example (from toplevel of virt-mem source tree): let chan = open_in filename in let line = input_line chan in - let name, version = + (* Kernel version string. *) + let version, arch = if Pcre.pmatch ~rex:re_oldformat line then ( - (* If the file starts with "RPM: \d+: ..." then it's the original - * format. Everything in one line. + (* If the file starts with "RPM: \d+: ..." then it's the + * original Fedora format. Everything in one line. *) let subs = Pcre.exec ~rex:re_oldformat line in - let name = Pcre.get_substring subs 1 in + (* let name = Pcre.get_substring subs 1 in *) let version = Pcre.get_substring subs 2 in let release = Pcre.get_substring subs 3 in let arch = Pcre.get_substring subs 4 in close_in chan; - name, sprintf "%s-%s.%s" version release arch + (* XXX Map name -> PAE, hugemem etc. *) + (* name, *) sprintf "%s-%s.%s" version release arch, arch ) else ( (* New-style "key: value" entries, up to end of file or the first * blank line. *) - let name, version, release, arch = - ref "", ref "", ref "", ref "" in + let (*name,*) version, release, arch = + (*ref "",*) ref "", ref "", ref "" in let rec loop line = try let subs = Pcre.exec ~rex:re_keyvalue line in let key = Pcre.get_substring subs 1 in let value = Pcre.get_substring subs 2 in - if key = "Name" then name := value - else if key = "Version" then version := value + (*if key = "Name" then name := value + else*) if key = "Version" then version := value else if key = "Release" then release := value else if key = "Architecture" then arch := value; let line = input_line chan in @@ -114,16 +133,585 @@ Example (from toplevel of virt-mem source tree): close_in chan in loop line; - let name, version, release, arch = - !name, !version, !release, !arch in - if name = "" || version = "" || release = "" || arch = "" then + let (*name,*) version, release, arch = + (*!name,*) !version, !release, !arch in + if (*name = "" ||*) version = "" || release = "" || arch = "" then failwith (sprintf "%s: missing Name, Version, Release or Architecture key" filename); - name, sprintf "%s-%s.%s" version release arch + (* XXX Map name -> PAE, hugemem etc. *) + (* name, *) sprintf "%s-%s.%s" version release arch, arch ) in - printf "%s -> %s, %s\n" basename name version; + (*printf "%s -> %s %s\n%!" basename version arch;*) + + (basename, version, arch) + ) infos in + + (* For quick access to the opener strings, build a hash. *) + let openers = Hashtbl.create 13 in + List.iter ( + fun (name, (opener, closer, _, _)) -> + Hashtbl.add openers opener (closer, name) + ) what; + + (* Now read the data files and parse out the structures of interest. *) + let datas = List.map ( + fun (basename, version, arch) -> + let file_exists name = + try Unix.access name [Unix.F_OK]; true + with Unix.Unix_error _ -> false + in + let close_process_in cmd chan = + match Unix.close_process_in chan with + | Unix.WEXITED 0 -> () + | Unix.WEXITED i -> + eprintf "%s: command exited with code %d\n" cmd i; exit i + | Unix.WSIGNALED i -> + eprintf "%s: command exited with signal %d\n" cmd i; exit 1 + | Unix.WSTOPPED i -> + eprintf "%s: command stopped by signal %d\n" cmd i; exit 1 + in - (basename, name, version) + (* Open the data file, uncompressing it on the fly if necessary. *) + let chan, close = + if file_exists (basename ^ ".data") then + open_in (basename ^ ".data"), close_in + else if file_exists (basename ^ ".data.gz") then ( + let cmd = + sprintf "gzip -cd %s" (Filename.quote (basename ^ ".data.gz")) in + Unix.open_process_in cmd, close_process_in cmd + ) + else if file_exists (basename ^ ".data.bz2") then ( + let cmd = + sprintf "bzip2 -cd %s" (Filename.quote (basename ^ ".data.bz2")) in + Unix.open_process_in cmd, close_process_in cmd + ) else + failwith + (sprintf "%s: cannot find corresponding data file" basename) in + + (* Read the data file in, looking for structures of interest to us. *) + let bodies = Hashtbl.create 13 in + let rec loop () = + let line = input_line chan in + + (* If the line is an opener for one of the structures we + * are looking for, then for now just save all the text until + * we get to the closer line. + *) + (try + let closer, name = Hashtbl.find openers line in + let rec loop2 lines = + let line = input_line chan in + let lines = line :: lines in + if String.starts_with line closer then List.rev lines + else loop2 lines + in + + let body = + try loop2 [line] + with End_of_file -> + failwith (sprintf "%s: %s: %S not matched by closing %S" basename name line closer) in + + Hashtbl.replace bodies name body + with Not_found -> ()); + + loop () + in + (try loop () with End_of_file -> ()); + + close chan; + + (* Make sure we got all the mandatory structures. *) + List.iter ( + fun (name, (_, _, mandatory, _)) -> + if mandatory && not (Hashtbl.mem bodies name) then + failwith (sprintf "%s: structure %s not found in this kernel" basename name) + ) what; + + (basename, version, arch, bodies) ) infos in - () + (* Now parse each structure body. + * XXX This would be better as a proper lex/yacc parser. + * XXX Even better would be to have a proper interface to libdwarves. + *) + let re_offsetsize = Pcre.regexp "/\\*\\s+(\\d+)\\s+(\\d+)\\s+\\*/" in + let re_intfield = Pcre.regexp "int\\s+(\\w+);" in + let re_ptrfield = Pcre.regexp "struct\\s+(\\w+)\\s*\\*\\s*(\\w+);" in + let re_strfield = Pcre.regexp "char\\s+(\\w+)\\[(\\d+)\\];" in + let re_structopener = Pcre.regexp "(struct|union)\\s+.*{$" in + let re_structcloser = Pcre.regexp "}\\s*(\\w+)?(\\[\\d+\\])?;" in + + (* 'basename' is the source file, and second parameter ('body') is + * the list of text lines which covers this structure (minus the + * opener line). Result is the list of parsed fields from this + * structure. + *) + let rec parse basename = function + | [] -> assert false + | [_] -> [] (* Just the closer line, finished. *) + | line :: lines when Pcre.pmatch ~rex:re_structopener line -> + (* Recursively parse a sub-structure. First search for the + * corresponding closer line. + *) + let rec loop depth acc = function + | [] -> + eprintf "%s: %S has no matching close structure line\n%!" + basename line; + assert false + | line :: lines when Pcre.pmatch ~rex:re_structopener line -> + loop (depth+1) (line :: acc) lines + | line :: lines + when depth = 0 && Pcre.pmatch ~rex:re_structcloser line -> + (line :: acc), lines + | line :: lines + when depth > 0 && Pcre.pmatch ~rex:re_structcloser line -> + loop (depth-1) (line :: acc) lines + | line :: lines -> loop depth (line :: acc) lines + in + let nested_body, rest = loop 0 [] lines in + + (* Then parse the sub-structure. *) + let struct_name, nested_body = + match nested_body with + | [] -> assert false + | closer :: _ -> + let subs = Pcre.exec ~rex:re_structcloser closer in + let struct_name = + try Some (Pcre.get_substring subs 1) with Not_found -> None in + struct_name, List.rev nested_body in + let nested_fields = parse basename nested_body in + + (* Prefix the sub-fields with the name of the structure. *) + let nested_fields = + match struct_name with + | None -> nested_fields + | Some prefix -> + List.map ( + fun (name, details) -> (prefix ^ "'" ^ name, details) + ) nested_fields in + + (* Parse the rest. *) + nested_fields @ parse basename rest + + | line :: lines when Pcre.pmatch ~rex:re_intfield line -> + (* An int field. *) + let subs = Pcre.exec ~rex:re_intfield line in + let name = Pcre.get_substring subs 1 in + (try + let subs = Pcre.exec ~rex:re_offsetsize line in + let offset = int_of_string (Pcre.get_substring subs 1) in + let size = int_of_string (Pcre.get_substring subs 2) in + (name, (`Int, offset, size)) :: parse basename lines + with + Not_found -> parse basename lines + ); + + | line :: lines when Pcre.pmatch ~rex:re_ptrfield line -> + (* A pointer-to-struct field. *) + let subs = Pcre.exec ~rex:re_ptrfield line in + let struct_name = Pcre.get_substring subs 1 in + let name = Pcre.get_substring subs 2 in + (try + let subs = Pcre.exec ~rex:re_offsetsize line in + let offset = int_of_string (Pcre.get_substring subs 1) in + let size = int_of_string (Pcre.get_substring subs 2) in + (name, (`Ptr struct_name, offset, size)) + :: parse basename lines + with + Not_found -> parse basename lines + ); + + | line :: lines when Pcre.pmatch ~rex:re_strfield line -> + (* A string (char array) field. *) + let subs = Pcre.exec ~rex:re_strfield line in + let name = Pcre.get_substring subs 1 in + let width = int_of_string (Pcre.get_substring subs 2) in + (try + let subs = Pcre.exec ~rex:re_offsetsize line in + let offset = int_of_string (Pcre.get_substring subs 1) in + let size = int_of_string (Pcre.get_substring subs 2) in + (name, (`Str width, offset, size)) + :: parse basename lines + with + Not_found -> parse basename lines + ); + + | _ :: lines -> + (* Just ignore any other field we can't parse. *) + parse basename lines + + in + + let datas = List.map ( + fun (basename, version, arch, bodies) -> + let structures = List.filter_map ( + fun (struct_name, (_, _, _, wanted_fields)) -> + let body = + try Some (Hashtbl.find bodies struct_name) + with Not_found -> None in + match body with + | None -> None + | Some body -> + let body = List.tl body in (* Don't care about opener line. *) + let fields = parse basename body in + + (* Compute total size of the structure. *) + let total_size = + let fields = List.map ( + fun (_, (_, offset, size)) -> offset + size + ) fields in + List.fold_left max 0 fields in + + (* That got us all the fields, but we only care about + * the wanted_fields. + *) + let fields = List.filter ( + fun (name, _) -> List.mem name wanted_fields + ) fields in + + (* Also check we have all the wanted fields. *) + List.iter ( + fun wanted_field -> + if not (List.mem_assoc wanted_field fields) then + failwith (sprintf "%s: structure %s is missing required field %s" basename struct_name wanted_field) + ) wanted_fields; + + (* Prefix all the field names with the structure name. *) + let fields = + List.map (fun (name, details) -> + struct_name ^ "_" ^ name, details) fields in + + Some (struct_name, (fields, total_size)) + ) what in + + (basename, version, arch, structures) + ) datas in + + if debug then + List.iter ( + fun (basename, version, arch, structures) -> + printf "%s (version: %s, arch: %s):\n" basename version arch; + List.iter ( + fun (struct_name, (fields, total_size)) -> + printf " struct %s {\n" struct_name; + List.iter ( + fun (field_name, (typ, offset, size)) -> + (match typ with + | `Int -> + printf " int %s; " field_name + | `Ptr struct_name -> + printf " struct %s *%s; " struct_name field_name + | `Str width -> + printf " char %s[%d]; " field_name width + ); + printf " /* offset = %d, size = %d */\n" offset size + ) fields; + printf " } /* %d bytes */\n\n" total_size; + ) structures; + ) datas; + + (* We'll generate a code file for each structure type (eg. task_struct + * across all kernel versions), so rearrange 'datas' for that purpose. + * + * XXX This loop is O(n^3), luckily n is small! + *) + let files = + List.map ( + fun (name, _) -> + name, + List.filter_map ( + fun (basename, version, arch, structures) -> + try Some (basename, version, arch, List.assoc name structures) + with Not_found -> None + ) datas + ) what in + + let datas = () in ignore datas; (* garbage collect *) + + (* Get just the field types. It's plausible that a field with the + * same name has a different type between kernel versions, so we must + * check that didn't happen. + *) + let files = List.map ( + fun (struct_name, kernels) -> + let field_types = + match kernels with + | [] -> [] + | (_, _, _, (fields, _)) :: kernels -> + let field_types_of_fields fields = + List.sort ( + List.map ( + fun (field_name, (typ, _, _)) -> field_name, typ + ) fields + ) + in + let field_types = field_types_of_fields fields in + List.iter ( + fun (_, _, _, (fields, _)) -> + if field_types <> field_types_of_fields fields then + failwith (sprintf "%s: one of the structure fields changed type between kernel versions" struct_name) + ) kernels; + field_types in + (struct_name, kernels, field_types) + ) files in + + (* To minimize generated code size, we want to fold together all + * structures where the particulars (eg. offsets, sizes, endianness) + * of the fields we care about are the same -- eg. between kernel + * versions which are very similar. + *) + let endian_of_architecture arch = + if String.starts_with arch "i386" || + String.starts_with arch "i486" || + String.starts_with arch "i586" || + String.starts_with arch "i686" || + String.starts_with arch "x86_64" || + String.starts_with arch "x86-64" then + Bitstring.LittleEndian + else if String.starts_with arch "ia64" then + Bitstring.LittleEndian (* XXX usually? *) + else if String.starts_with arch "ppc" then + Bitstring.BigEndian + else if String.starts_with arch "sparc" then + Bitstring.BigEndian + else + failwith (sprintf "endian_of_architecture: cannot parse %S" arch) + in + + let files = + List.map ( + fun (struct_name, kernels, field_types) -> + let hash = Hashtbl.create 13 in + let i = ref 0 in + let xs = ref [] in + let kernels = + List.map ( + fun (basename, version, arch, (fields, total_size)) -> + let key = endian_of_architecture arch, fields in + let j = + try Hashtbl.find hash key + with Not_found -> + incr i; + xs := (!i, key) :: !xs; Hashtbl.add hash key !i; + !i in + (basename, version, arch, total_size, j) + ) kernels in + struct_name, kernels, field_types, List.rev !xs + ) files in + + (* How much did we save by sharing? *) + if debug then + List.iter ( + fun (struct_name, kernels, _, parsers) -> + printf "struct %s:\n" struct_name; + printf " number of kernel versions: %d\n" (List.length kernels); + printf " number of parser functions needed after sharing: %d\n" + (List.length parsers) + ) files; + + (* Let's generate some code! *) + let files = + List.map ( + fun (struct_name, kernels, field_types, parsers) -> + (* Dummy location required - there are no real locations for + * output files. + *) + let _loc = Loc.ghost in + + (* The structure type. *) + let struct_type, struct_sig = + let fields = List.map ( + function + | (name, `Int) -> + <:ctyp< $lid:name$ : int64 >> + | (name, `Ptr _) -> + <:ctyp< $lid:name$ : Virt_mem_mmap.addr >> + | (name, `Str _) -> + <:ctyp< $lid:name$ : string >> + ) field_types in + let f, fs = match fields with + | [] -> failwith (sprintf "%s: structure has no fields" struct_name) + | f :: fs -> f, fs in + let fields = List.fold_left ( + fun fs f -> <:ctyp< $fs$ ; $f$ >> + ) f fs in + + let struct_type = <:str_item< type t = { $fields$ } >> in + let struct_sig = <:sig_item< type t = { $fields$ } >> in + struct_type, struct_sig in + + (* The shared parser functions. + * + * We could include bitmatch statements directly in here, but + * what happens is that the macros get expanded here, resulting + * in (even more) unreadable generated code. So instead just + * do a textual substitution later by post-processing the + * generated files. Not type-safe, but we can't have + * everything. + *) + let parser_stmts, parser_subs = + let parser_stmts = List.map ( + fun (i, _) -> + let fnname = sprintf "parser_%d" i in + <:str_item< + let $lid:fnname$ bits = $str:fnname$ + >> + ) parsers in + + let parser_stmts = + match parser_stmts with + | [] -> <:str_item< >> + | p :: ps -> + List.fold_left (fun ps p -> <:str_item< $ps$ $p$ >>) p ps in + + (* What gets substituted for "parser_NN" ... *) + let parser_subs = List.map ( + fun (i, (endian, fields)) -> + let fnname = sprintf "parser_%d" i in + let endian = + match endian with + | Bitstring.LittleEndian -> "littleendian" + | Bitstring.BigEndian -> "bigendian" + | _ -> assert false in + let patterns = + (* Fields must be sorted by offset, otherwise bitmatch + * will complain. + *) + let cmp (_, (_, o1, _)) (_, (_, o2, _)) = compare o1 o2 in + let fields = List.sort ~cmp fields in + String.concat ";\n " ( + List.map ( + function + | (field_name, (`Int, offset, size)) + | (field_name, (`Ptr _, offset, size)) -> + (* 'zero+' is a hack to force the type to int64. *) + sprintf "%s : zero+%d : offset(%d), %s" + field_name (size*8) (offset*8) endian + | (field_name, (`Str width, offset, size)) -> + sprintf "%s : %d : offset(%d), string" + field_name (width*8) (offset*8) + ) fields + ) in + let assignments = + String.concat ";\n " ( + List.map ( + function + | (field_name, (`Ptr "list_head", offset, size)) -> + sprintf "%s = Int64.sub %s %dL" field_name field_name offset + | (field_name, _) -> + sprintf "%s = %s" field_name field_name + ) fields + ) in + + let sub = + sprintf "\ + bitmatch bits with + | { %s } -> { %s } + | { _ } -> raise (ParseError (%S, %S, \"failed to match kernel structure\"))" + patterns assignments struct_name fnname in + + fnname, sub + ) parsers in + + parser_stmts, parser_subs in + + (* Define a map from kernel versions to parsing functions. *) + let version_map = + let stmts = List.fold_left ( + fun stmts (_, version, arch, total_size, i) -> + let parserfn = sprintf "parser_%d" i in + <:str_item< + $stmts$ + let v = ($lid:parserfn$, $`int:total_size$) + let map = StringMap.add $str:version$ v map + >> + ) <:str_item< let map = StringMap.empty >> kernels in + + <:str_item< + module StringMap = Map.Make (String) ;; + $stmts$ + >> in + + (* Code (.ml file). *) + let code = <:str_item< + let warning = "This code is automatically generated from the kernel database by kerneldb-to-parser program. Any edits you make will be lost." + let zero = 0 + exception ParseError of string * string * string;; + $struct_type$ + $parser_stmts$ + $version_map$ + + type kernel_version = string + let $lid:struct_name^"_known"$ version = StringMap.mem version map + let $lid:struct_name^"_size"$ version = + let _, size = StringMap.find version map in + size + let $lid:struct_name^"_of_bits"$ version bits = + let parsefn, _ = StringMap.find version map in + parsefn bits + let $lid:"get_"^struct_name$ version mem addr = + let parsefn, size = StringMap.find version map in + let bytes = Virt_mem_mmap.get_bytes mem addr size in + let bits = Bitstring.bitstring_of_string bytes in + parsefn bits + >> in + + (* Interface (.mli file). *) + let interface = <:sig_item< + exception ParseError of string * string * string;; + $struct_sig$ + + type kernel_version = string + val $lid:struct_name^"_known"$ : kernel_version -> bool + val $lid:struct_name^"_size"$ : kernel_version -> int + val $lid:struct_name^"_of_bits"$ : + kernel_version -> Bitstring.bitstring -> t + val $lid:"get_"^struct_name$ : kernel_version -> + ('a, 'b, [`HasMapping]) Virt_mem_mmap.t -> Virt_mem_mmap.addr -> t + >> in + + (struct_name, code, interface, parser_subs) + ) files in + + (* Finally generate the output files. *) + let re_subst = Pcre.regexp "^(.*)\"(parser_\\d+)\"(.*)$" in + + List.iter ( + fun (struct_name, code, interface, parser_subs) -> + (* Interface (.mli file). *) + let output_file = outputdir // "kernel_" ^ struct_name ^ ".mli" in + printf "Writing %s interface to %s ...\n%!" struct_name output_file; + Printers.OCaml.print_interf ~output_file interface; + + (* Implementation (.ml file). *) + let output_file = outputdir // "kernel_" ^ struct_name ^ ".ml" in + printf "Writing %s implementation to %s ...\n%!" struct_name output_file; + + let new_output_file = output_file ^ ".new" in + Printers.OCaml.print_implem ~output_file:new_output_file code; + + (* Substitute the parser bodies in the output file. *) + let ichan = open_in new_output_file in + let ochan = open_out output_file in + + let rec loop () = + let line = input_line ichan in + let line = + if Pcre.pmatch ~rex:re_subst line then ( + let subs = Pcre.exec ~rex:re_subst line in + let start = Pcre.get_substring subs 1 in + let template = Pcre.get_substring subs 2 in + let rest = Pcre.get_substring subs 3 in + let sub = List.assoc template parser_subs in + start ^ sub ^ rest + ) else line in + output_string ochan line; output_char ochan '\n'; + loop () + in + (try loop () with End_of_file -> ()); + + close_out ochan; + close_in ichan; + + Unix.unlink new_output_file + ) files