X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=extract%2Fcodegen%2Fkerneldb_to_parser.ml;h=177d607a5d18fb36ee3fa0ac9d617d65b8996dcb;hb=c15c1624692d506eefe5f2cb2d775a6fb9127589;hp=61fc8b0354775ce591241123e0186f0422207c07;hpb=c7603296c4f420bfc7db9a7dbf72efb0fec97b4d;p=virt-mem.git diff --git a/extract/codegen/kerneldb_to_parser.ml b/extract/codegen/kerneldb_to_parser.ml index 61fc8b0..177d607 100644 --- a/extract/codegen/kerneldb_to_parser.ml +++ b/extract/codegen/kerneldb_to_parser.ml @@ -28,22 +28,107 @@ and fields we try to parse. *) -let what = [ - "task_struct", ( - "struct task_struct {", "};", true, - [ "state"; "prio"; "normal_prio"; "static_prio"; - "tasks'prev"; "tasks'next"; "comm"] - ); +type struct_t = { + opener : string; (* String in pa_hole file which starts this struct. *) + closer : string; (* String in pa_hole file which ends this struct. *) + mandatory_struct : bool; (* Is this struct mandatory? *) + fields : (string * field_t) list; (* List of interesting fields. *) +} +and field_t = { + mandatory_field : bool; (* Is this field mandatory? *) + list_head_adjustment : bool; (* Only applies if the field points to a + * struct list_head: If true, then we do the + * list_head adjustment, so the field points + * to the start of the structure. If false, + * leave the pointer intact. The list_head + * adjustment only works if the list_head + * is in the same type of structure. + *) +} + +let ordinary_field = { mandatory_field = true; list_head_adjustment = true; } + +(*---------------------------------------------------------------------- + * This controls what structures & fields we will parse out. + *----------------------------------------------------------------------*) +let structs = [ + "task_struct", { + opener = "struct task_struct {"; closer = "};"; mandatory_struct = true; + fields = [ + "state", ordinary_field; + "prio", ordinary_field; + "normal_prio", ordinary_field; + "static_prio", ordinary_field; + "tasks'prev", ordinary_field; + "tasks'next", ordinary_field; + "mm", ordinary_field; + "active_mm", ordinary_field; + "comm", ordinary_field; + "pid", ordinary_field; + ] + }; (* "mm_struct", ( "struct mm_struct {", "};", true, [ ] ); *) - "net_device", ( - "struct net_device {", "};", true, - [ "name"; "dev_addr" ] - ); + "net_device", { + opener = "struct net_device {"; closer = "};"; mandatory_struct = true; + fields = [ + "dev_list'prev", { mandatory_field = false; list_head_adjustment = true }; + "dev_list'next", { mandatory_field = false; list_head_adjustment = true }; + "next", { mandatory_field = false; list_head_adjustment = true }; + "name", ordinary_field; + "flags", ordinary_field; + "operstate", ordinary_field; + "mtu", ordinary_field; + "perm_addr", ordinary_field; + "addr_len", ordinary_field; + "ip_ptr", ordinary_field; + "ip6_ptr", ordinary_field; + ] + }; + "net", { + opener = "struct net {"; closer = "};"; mandatory_struct = false; + fields = [ + "dev_base_head'next", + (* Don't do list_head adjustment on this field, because it points + * to a net_device struct. + *) + { mandatory_field = true; list_head_adjustment = false }; + ] + }; + "in_device", { + opener = "struct in_device {"; closer = "};"; mandatory_struct = true; + fields = [ + "ifa_list", ordinary_field; + ]; + }; + "inet6_dev", { + opener = "struct inet6_dev {"; closer = "};"; mandatory_struct = true; + fields = [ + "addr_list", ordinary_field; + ]; + }; + "in_ifaddr", { + opener = "struct in_ifaddr {"; closer = "};"; mandatory_struct = true; + fields = [ + "ifa_next", ordinary_field; + "ifa_local", ordinary_field; + "ifa_address", ordinary_field; + "ifa_mask", ordinary_field; + "ifa_broadcast", ordinary_field; + ]; + }; + "inet6_ifaddr", { + opener = "struct inet6_ifaddr {"; closer = "};"; mandatory_struct = true; + fields = [ + (*"addr'in6_u'u6_addr8", ordinary_field;*) + "prefix_len", ordinary_field; + "lst_next", ordinary_field; + ]; + }; ] let debug = true @@ -56,8 +141,60 @@ open ExtList open ExtString open Printf +module PP = Pahole_parser + let (//) = Filename.concat +(* Couple of handy camlp4 construction functions which do some + * things that ought to be easy/obvious but aren't. + * + * 'concat_str_items' concatenates a list of str_item together into + * one big str_item. + * + * 'concat_record_fields' concatenates a list of records fields into + * a record. The list must have at least one element. + * + * 'build_record' builds a record out of record fields. + * + * 'build_tuple_from_exprs' builds an arbitrary length tuple from + * a list of expressions of length >= 2. + * + * Thanks to bluestorm on #ocaml for getting these working. + *) +let concat_str_items _loc items = + match items with + | [] -> <:str_item< >> + | x :: xs -> + List.fold_left (fun xs x -> <:str_item< $xs$ $x$ >>) x xs + +let concat_sig_items _loc items = + match items with + | [] -> <:sig_item< >> + | x :: xs -> + List.fold_left (fun xs x -> <:sig_item< $xs$ $x$ >>) x xs + +let concat_record_fields _loc fields = + match fields with + | [] -> assert false + | f :: fs -> + List.fold_left (fun fs f -> <:ctyp< $fs$ ; $f$ >>) f fs + +let concat_record_bindings _loc rbs = + match rbs with + | [] -> assert false + | rb :: rbs -> + List.fold_left (fun rbs rb -> <:rec_binding< $rbs$ ; $rb$ >>) rb rbs + +let build_record _loc rbs = + Ast.ExRec (_loc, rbs, Ast.ExNil _loc) + +let build_tuple_from_exprs _loc exprs = + match exprs with + | [] | [_] -> assert false + | x :: xs -> + Ast.ExTup (_loc, + List.fold_left (fun xs x -> Ast.ExCom (_loc, x, xs)) x xs) + let () = let args = Array.to_list Sys.argv in @@ -76,367 +213,196 @@ Example (from toplevel of virt-mem source tree): " arg0 arg0 arg0; exit 2 in - (* Get the *.info files from the kernels database. *) - let infos = Sys.readdir kernelsdir in - let infos = Array.to_list infos in - let infos = List.filter (fun name -> String.ends_with name ".info") infos in - let infos = List.map ((//) kernelsdir) infos in + let kernels = PP.list_kernels kernelsdir in + let nr_kernels = List.length kernels in - (* Regular expressions. We really really should use ocaml-mikmatch ... *) - let re_oldformat = Pcre.regexp "^RPM: \\d+: \\(build \\d+\\) ([-\\w]+) ([\\w.]+) ([\\w.]+) \\(.*?\\) (\\w+)" in - let re_keyvalue = Pcre.regexp "^(\\w+): (.*)" in + let kernels = List.mapi ( + fun i info -> + printf "Loading kernel data file %d/%d\r%!" (i+1) nr_kernels; - (* Parse in the *.info files. These have historically had a few different - * formats that we need to support. - *) - let infos = List.map ( - fun filename -> - (* Get the basename (for getting the .data file later on). *) - let basename = Filename.chop_suffix filename ".info" in - - let chan = open_in filename in - let line = input_line chan in - - (* Kernel version string. *) - let version, arch = - if Pcre.pmatch ~rex:re_oldformat line then ( - (* If the file starts with "RPM: \d+: ..." then it's the - * original Fedora format. Everything in one line. - *) - let subs = Pcre.exec ~rex:re_oldformat line in - (* let name = Pcre.get_substring subs 1 in *) - let version = Pcre.get_substring subs 2 in - let release = Pcre.get_substring subs 3 in - let arch = Pcre.get_substring subs 4 in - close_in chan; - (* XXX Map name -> PAE, hugemem etc. *) - (* name, *) sprintf "%s-%s.%s" version release arch, arch - ) else ( - (* New-style "key: value" entries, up to end of file or the first - * blank line. - *) - let (*name,*) version, release, arch = - (*ref "",*) ref "", ref "", ref "" in - let rec loop line = - try - let subs = Pcre.exec ~rex:re_keyvalue line in - let key = Pcre.get_substring subs 1 in - let value = Pcre.get_substring subs 2 in - (*if key = "Name" then name := value - else*) if key = "Version" then version := value - else if key = "Release" then release := value - else if key = "Architecture" then arch := value; - let line = input_line chan in - loop line - with - Not_found | End_of_file -> - close_in chan - in - loop line; - let (*name,*) version, release, arch = - (*!name,*) !version, !release, !arch in - if (*name = "" ||*) version = "" || release = "" || arch = "" then - failwith (sprintf "%s: missing Name, Version, Release or Architecture key" filename); - (* XXX Map name -> PAE, hugemem etc. *) - (* name, *) sprintf "%s-%s.%s" version release arch, arch - ) in - - (*printf "%s -> %s %s\n%!" basename version arch;*) - - (basename, version, arch) - ) infos in - - (* For quick access to the opener strings, build a hash. *) - let openers = Hashtbl.create 13 in - List.iter ( - fun (name, (opener, closer, _, _)) -> - Hashtbl.add openers opener (closer, name) - ) what; - - (* Now read the data files and parse out the structures of interest. *) - let datas = List.map ( - fun (basename, version, arch) -> - let file_exists name = - try Unix.access name [Unix.F_OK]; true - with Unix.Unix_error _ -> false - in - let close_process_in cmd chan = - match Unix.close_process_in chan with - | Unix.WEXITED 0 -> () - | Unix.WEXITED i -> - eprintf "%s: command exited with code %d\n" cmd i; exit i - | Unix.WSIGNALED i -> - eprintf "%s: command exited with signal %d\n" cmd i; exit 1 - | Unix.WSTOPPED i -> - eprintf "%s: command stopped by signal %d\n" cmd i; exit 1 - in - - (* Open the data file, uncompressing it on the fly if necessary. *) - let chan, close = - if file_exists (basename ^ ".data") then - open_in (basename ^ ".data"), close_in - else if file_exists (basename ^ ".data.gz") then ( - let cmd = - sprintf "gzip -cd %s" (Filename.quote (basename ^ ".data.gz")) in - Unix.open_process_in cmd, close_process_in cmd - ) - else if file_exists (basename ^ ".data.bz2") then ( - let cmd = - sprintf "bzip2 -cd %s" (Filename.quote (basename ^ ".data.bz2")) in - Unix.open_process_in cmd, close_process_in cmd - ) else - failwith - (sprintf "%s: cannot find corresponding data file" basename) in - - (* Read the data file in, looking for structures of interest to us. *) - let bodies = Hashtbl.create 13 in - let rec loop () = - let line = input_line chan in - - (* If the line is an opener for one of the structures we - * are looking for, then for now just save all the text until - * we get to the closer line. - *) - (try - let closer, name = Hashtbl.find openers line in - let rec loop2 lines = - let line = input_line chan in - let lines = line :: lines in - if String.starts_with line closer then List.rev lines - else loop2 lines - in - - let body = - try loop2 [line] - with End_of_file -> - failwith (sprintf "%s: %s: %S not matched by closing %S" basename name line closer) in - - Hashtbl.replace bodies name body - with Not_found -> ()); + let struct_names = List.map fst structs in + let structures = PP.load_structures info struct_names in - loop () - in - (try loop () with End_of_file -> ()); - - close chan; - - (* Make sure we got all the mandatory structures. *) + (* Make sure we got all the mandatory structures & fields. *) List.iter ( - fun (name, (_, _, mandatory, _)) -> - if mandatory && not (Hashtbl.mem bodies name) then - failwith (sprintf "%s: structure %s not found in this kernel" basename name) - ) what; + fun (struct_name, + { mandatory_struct = mandatory; fields = wanted_fields }) -> + try + let s = + List.find (fun s -> struct_name = s.PP.struct_name) + structures in + + (* Check we have all the mandatory fields. *) + let all_fields = s.PP.struct_fields in + List.iter ( + fun (wanted_field, { mandatory_field = mandatory }) -> + let got_it = + List.exists ( + fun { PP.field_name = name } -> name = wanted_field + ) all_fields in + if mandatory && not got_it then ( + eprintf "%s: structure %s is missing required field %s\n" + info.PP.basename struct_name wanted_field; + eprintf "fields found in this structure:\n"; + List.iter ( + fun { PP.field_name = name } -> eprintf "\t%s\n" name + ) all_fields; + exit 1 + ); + ) wanted_fields + + with Not_found -> + if mandatory then + failwith (sprintf "%s: structure %s not found in this kernel" + info.PP.basename struct_name) + ) structs; + + let structures = + List.map ( + fun ({ PP.struct_name = struct_name; PP.struct_fields = fields } + as structure) -> + let { fields = wanted_fields } = List.assoc struct_name structs in + + (* That got us all the fields, but we only care about + * the wanted_fields. + *) + let fields = List.filter ( + fun { PP.field_name = name } -> List.mem_assoc name wanted_fields + ) fields in + + (* Prefix all the field names with the structure name. *) + let fields = + List.map ( + fun ({ PP.field_name = name } as field) -> + let name = struct_name ^ "_" ^ name in + { field with PP.field_name = name } + ) fields in + { structure with PP.struct_fields = fields } + ) structures in - (basename, version, arch, bodies) - ) infos in + (info, structures) + ) kernels in - (* Now parse each structure body. - * XXX This would be better as a proper lex/yacc parser. - * XXX Even better would be to have a proper interface to libdwarves. - *) - let re_offsetsize = Pcre.regexp "/\\*\\s+(\\d+)\\s+(\\d+)\\s+\\*/" in - let re_intfield = Pcre.regexp "int\\s+(\\w+);" in - let re_ptrfield = Pcre.regexp "struct\\s+(\\w+)\\s*\\*\\s*(\\w+);" in - let re_strfield = Pcre.regexp "char\\s+(\\w+)\\[(\\d+)\\];" in - let re_structopener = Pcre.regexp "(struct|union)\\s+.*{$" in - let re_structcloser = Pcre.regexp "}\\s*(\\w+)?(\\[\\d+\\])?;" in - - (* 'basename' is the source file, and second parameter ('body') is - * the list of text lines which covers this structure (minus the - * opener line). Result is the list of parsed fields from this - * structure. - *) - let rec parse basename = function - | [] -> assert false - | [_] -> [] (* Just the closer line, finished. *) - | line :: lines when Pcre.pmatch ~rex:re_structopener line -> - (* Recursively parse a sub-structure. First search for the - * corresponding closer line. - *) - let rec loop depth acc = function - | [] -> - eprintf "%s: %S has no matching close structure line\n%!" - basename line; - assert false - | line :: lines when Pcre.pmatch ~rex:re_structopener line -> - loop (depth+1) (line :: acc) lines - | line :: lines - when depth = 0 && Pcre.pmatch ~rex:re_structcloser line -> - (line :: acc), lines - | line :: lines - when depth > 0 && Pcre.pmatch ~rex:re_structcloser line -> - loop (depth-1) (line :: acc) lines - | line :: lines -> loop depth (line :: acc) lines - in - let nested_body, rest = loop 0 [] lines in - - (* Then parse the sub-structure. *) - let struct_name, nested_body = - match nested_body with - | [] -> assert false - | closer :: _ -> - let subs = Pcre.exec ~rex:re_structcloser closer in - let struct_name = - try Some (Pcre.get_substring subs 1) with Not_found -> None in - struct_name, List.rev nested_body in - let nested_fields = parse basename nested_body in - - (* Prefix the sub-fields with the name of the structure. *) - let nested_fields = - match struct_name with - | None -> nested_fields - | Some prefix -> - List.map ( - fun (name, details) -> (prefix ^ "'" ^ name, details) - ) nested_fields in - - (* Parse the rest. *) - nested_fields @ parse basename rest - - | line :: lines when Pcre.pmatch ~rex:re_intfield line -> - (* An int field. *) - let subs = Pcre.exec ~rex:re_intfield line in - let name = Pcre.get_substring subs 1 in - (try - let subs = Pcre.exec ~rex:re_offsetsize line in - let offset = int_of_string (Pcre.get_substring subs 1) in - let size = int_of_string (Pcre.get_substring subs 2) in - (name, (`Int, offset, size)) :: parse basename lines - with - Not_found -> parse basename lines - ); - - | line :: lines when Pcre.pmatch ~rex:re_ptrfield line -> - (* A pointer-to-struct field. *) - let subs = Pcre.exec ~rex:re_ptrfield line in - let struct_name = Pcre.get_substring subs 1 in - let name = Pcre.get_substring subs 2 in - (try - let subs = Pcre.exec ~rex:re_offsetsize line in - let offset = int_of_string (Pcre.get_substring subs 1) in - let size = int_of_string (Pcre.get_substring subs 2) in - (name, (`Ptr struct_name, offset, size)) :: parse basename lines - with - Not_found -> parse basename lines - ); - - | line :: lines when Pcre.pmatch ~rex:re_strfield line -> - (* A string (char array) field. *) - let subs = Pcre.exec ~rex:re_strfield line in - let name = Pcre.get_substring subs 1 in - let width = int_of_string (Pcre.get_substring subs 2) in - (try - let subs = Pcre.exec ~rex:re_offsetsize line in - let offset = int_of_string (Pcre.get_substring subs 1) in - let size = int_of_string (Pcre.get_substring subs 2) in - (name, (`Str width, offset, size)) :: parse basename lines - with - Not_found -> parse basename lines - ); - - | _ :: lines -> - (* Just ignore any other field we can't parse. *) - parse basename lines + if debug then + List.iter ( + fun (info, structures) -> + printf "%s ----------\n" (PP.string_of_info info); + List.iter ( + fun structure -> + printf "%s\n\n" (PP.string_of_structure structure); + ) structures; + ) kernels; - in + (* First output file is a simple list of kernels, to support the + * 'virt-mem --list-kernels' option. + *) + let () = + let _loc = Loc.ghost in - let datas = List.map ( - fun (basename, version, arch, bodies) -> - let structures = List.filter_map ( - fun (name, (_, _, _, wanted_fields)) -> - let body = - try Some (Hashtbl.find bodies name) with Not_found -> None in - match body with - | None -> None - | Some body -> - let body = List.tl body in (* Don't care about opener line. *) - let fields = parse basename body in - - (* That got us all the fields, but we only care about - * the wanted_fields. - *) - let fields = List.filter ( - fun (name, _) -> List.mem name wanted_fields - ) fields in + let versions = List.map ( + fun ({ PP.kernel_version = version }, _) -> version + ) kernels in - (* Also check we have all the wanted fields. *) - List.iter ( - fun wanted_field -> - if not (List.mem_assoc wanted_field fields) then - failwith (sprintf "%s: structure %s is missing required field %s" basename name wanted_field) - ) wanted_fields; + (* Sort them in reverse because we are going to generate the + * final list in reverse. + *) + let cmp a b = compare b a in + let versions = List.sort ~cmp versions in - Some (name, fields) - ) what in + let xs = + List.fold_left (fun xs version -> <:expr< $str:version$ :: $xs$ >>) + <:expr< [] >> versions in - (basename, version, arch, structures) - ) datas in + let code = <:str_item< + let kernels = $xs$ + >> in - if debug then - List.iter ( - fun (basename, version, arch, structures) -> - printf "%s (version: %s, arch: %s):\n" basename version arch; - List.iter ( - fun (struct_name, fields) -> - printf " struct %s {\n" struct_name; - List.iter ( - fun (field_name, (typ, offset, size)) -> - (match typ with - | `Int -> - printf " int %s; " field_name - | `Ptr struct_name -> - printf " struct %s *%s; " struct_name field_name - | `Str width -> - printf " char %s[%d]; " field_name width - ); - printf " /* offset = %d, size = %d */\n" offset size - ) fields; - printf " }\n\n"; - ) structures; - ) datas; + let output_file = outputdir // "virt_mem_kernels.ml" in + printf "Writing list of kernels to %s ...\n%!" output_file; + Printers.OCaml.print_implem ~output_file code in (* We'll generate a code file for each structure type (eg. task_struct - * across all kernel versions), so rearrange 'datas' for that purpose. + * across all kernel versions), so rearrange 'kernels' for that purpose. * * XXX This loop is O(n^3), luckily n is small! *) let files = List.map ( - fun (name, _) -> - name, - List.filter_map ( - fun (basename, version, arch, structures) -> - try Some (basename, version, arch, List.assoc name structures) - with Not_found -> None - ) datas - ) what in + fun (struct_name, _) -> + let kernels = + List.filter_map ( + fun (info, structures) -> + try + let structure = + List.find ( + fun { PP.struct_name = name } -> name = struct_name + ) structures in + Some (info, structure) + with Not_found -> + None + ) kernels in + + (* Sort the kernels, which makes the generated output more stable + * and makes patches more useful. + *) + let kernels = List.sort kernels in - let datas = () in ignore datas; (* garbage collect *) + struct_name, kernels + ) structs in - (* Get just the field types. It's plausible that a field with the - * same name has a different type between kernel versions, so we must - * check that didn't happen. + let kernels = () in ignore kernels; (* garbage collect *) + +(* + (* Get just the field types. + * + * It's plausible that a field with the same name has a different + * type between kernel versions, so we must check that didn't + * happen. + * + * This is complicated because of non-mandatory fields, which don't + * appear in every kernel version. *) let files = List.map ( fun (struct_name, kernels) -> let field_types = - match kernels with - | [] -> [] - | (_, _, _, fields) :: kernels -> - let field_types_of_fields fields = - List.sort ( - List.map ( - fun (field_name, (typ, _, _)) -> field_name, typ - ) fields - ) - in - let field_types = field_types_of_fields fields in - List.iter ( - fun (_, _, _, fields) -> - if field_types <> field_types_of_fields fields then - failwith (sprintf "%s: one of the structure fields changed type between kernel versions" struct_name) - ) kernels; - field_types in + (* Get the list of fields expected in this structure. *) + let { fields = struct_fields } = List.assoc struct_name structs in + + (* Get the list of fields that we found in each kernel version. *) + let found_fields = + List.flatten + (List.map (fun (_, _, _, (fields, _)) -> fields) kernels) in + + (* Determine a hash from each field name to the type. As we add + * fields, we might get a conflicting type (meaning the type + * changed between kernel versions). + *) + let hash = Hashtbl.create 13 in + + List.iter ( + fun (field_name, (typ, _, _)) -> + try + let field_type = Hashtbl.find hash field_name in + if typ <> field_type then + failwith (sprintf "%s.%s: structure field changed type between kernel versions" struct_name field_name); + with Not_found -> + Hashtbl.add hash field_name typ + ) found_fields; + + (* Now get a type for each structure field. *) + List.filter_map ( + fun (field_name, ft) -> + try + let field_name = struct_name ^ "_" ^ field_name in + let typ = Hashtbl.find hash field_name in + Some (field_name, (typ, ft)) + with Not_found -> + let msg = + sprintf "%s.%s: this field was not found in any kernel version" + struct_name field_name in + if ft.mandatory_field then failwith msg else prerr_endline msg; + None + ) struct_fields in (struct_name, kernels, field_types) ) files in @@ -471,7 +437,7 @@ Example (from toplevel of virt-mem source tree): let xs = ref [] in let kernels = List.map ( - fun (basename, version, arch, fields) -> + fun (basename, version, arch, (fields, total_size)) -> let key = endian_of_architecture arch, fields in let j = try Hashtbl.find hash key @@ -479,9 +445,10 @@ Example (from toplevel of virt-mem source tree): incr i; xs := (!i, key) :: !xs; Hashtbl.add hash key !i; !i in - (basename, version, arch, j) + (basename, version, arch, total_size, j) ) kernels in - struct_name, kernels, field_types, List.rev !xs + let parsers = List.rev !xs in + struct_name, kernels, field_types, parsers ) files in (* How much did we save by sharing? *) @@ -494,6 +461,25 @@ Example (from toplevel of virt-mem source tree): (List.length parsers) ) files; + (* Extend the parsers fields by adding on any optional fields which + * are not actually present in the specific kernel. + *) + let files = + List.map ( + fun (struct_name, kernels, field_types, parsers) -> + let parsers = List.map ( + fun (i, (endian, fields)) -> + let fields_not_present = + List.filter_map ( + fun (field_name, _) -> + if List.mem_assoc field_name fields then None + else Some field_name + ) field_types in + (i, (endian, fields, fields_not_present)) + ) parsers in + (struct_name, kernels, field_types, parsers) + ) files in + (* Let's generate some code! *) let files = List.map ( @@ -507,41 +493,302 @@ Example (from toplevel of virt-mem source tree): let struct_type, struct_sig = let fields = List.map ( function - | (name, `Int) -> - <:ctyp< $lid:name$ : int >> - | (name, `Ptr struct_name) -> - <:ctyp< $lid:name$ : (*`$str:struct_name$*) int64 >> - | (name, `Str _) -> + | (name, (`Int, { mandatory_field = true })) -> + <:ctyp< $lid:name$ : int64 >> + | (name, (`Int, { mandatory_field = false })) -> + <:ctyp< $lid:name$ : int64 option >> + | (name, ((`VoidPtr|`Ptr _), { mandatory_field = true })) -> + <:ctyp< $lid:name$ : Virt_mem_mmap.addr >> + | (name, ((`VoidPtr|`Ptr _), { mandatory_field = false })) -> + <:ctyp< $lid:name$ : Virt_mem_mmap.addr option >> + | (name, (`Str _, { mandatory_field = true })) -> <:ctyp< $lid:name$ : string >> + | (name, (`Str _, { mandatory_field = false })) -> + <:ctyp< $lid:name$ : string option >> ) field_types in - let f, fs = match fields with - | [] -> failwith (sprintf "%s: structure has no fields" struct_name) - | f :: fs -> f, fs in - let fields = List.fold_left ( - fun fs f -> <:ctyp< $fs$ ; $f$ >> - ) f fs in - + let fields = concat_record_fields _loc fields in let struct_type = <:str_item< type t = { $fields$ } >> in let struct_sig = <:sig_item< type t = { $fields$ } >> in struct_type, struct_sig in + (* Create a "field signature" which describes certain aspects + * of the fields which vary between kernel versions. + *) + let fieldsig_type, fieldsigs = + let fieldsig_type = + let fields = List.map ( + fun (name, _) -> + let fsname = "__fs_" ^ name in + <:ctyp< $lid:fsname$ : Virt_mem_types.fieldsig >> + ) field_types in + let fields = concat_record_fields _loc fields in + <:str_item< type fs_t = { $fields$ } >> in + + let fieldsigs = List.map ( + fun (i, (_, fields, fields_not_present)) -> + let make_fieldsig field_name available offset = + let available = + if available then <:expr< true >> else <:expr< false >> in + let fsname = "__fs_" ^ field_name in + <:rec_binding< + $lid:fsname$ = + { Virt_mem_types.field_available = $available$; + field_offset = $`int:offset$ } + >> + in + let fields = List.map ( + fun (field_name, (_, offset, _)) -> + make_fieldsig field_name true offset + ) fields in + let fields_not_present = List.map ( + fun field_name -> + make_fieldsig field_name false (-1) + ) fields_not_present in + + let fieldsigs = fields @ fields_not_present in + let fsname = sprintf "fieldsig_%d" i in + let fieldsigs = concat_record_bindings _loc fieldsigs in + let fieldsigs = build_record _loc fieldsigs in + <:str_item< + let $lid:fsname$ = $fieldsigs$ + >> + ) parsers in + + let fieldsigs = concat_str_items _loc fieldsigs in + + fieldsig_type, fieldsigs in + + (* The shared parser functions. + * + * We could include bitmatch statements directly in here, but + * what happens is that the macros get expanded here, resulting + * in (even more) unreadable generated code. So instead just + * do a textual substitution later by post-processing the + * generated files. Not type-safe, but we can't have + * everything. + *) + let parser_stmts, parser_subs = + let parser_stmts = List.map ( + fun (i, _) -> + let fnname = sprintf "parser_%d" i in + <:str_item< + let $lid:fnname$ bits = $str:fnname$ + >> + ) parsers in + + let parser_stmts = concat_str_items _loc parser_stmts in + + (* What gets substituted for "parser_NN" ... *) + let parser_subs = List.map ( + fun (i, (endian, fields, fields_not_present)) -> + let fnname = sprintf "parser_%d" i in + let endian = + match endian with + | Bitstring.LittleEndian -> "littleendian" + | Bitstring.BigEndian -> "bigendian" + | _ -> assert false in + let patterns = + (* Fields must be sorted by offset, otherwise bitmatch + * will complain. + *) + let cmp (_, (_, o1, _)) (_, (_, o2, _)) = compare o1 o2 in + let fields = List.sort ~cmp fields in + String.concat ";\n " ( + List.map ( + function + | (field_name, ((`Int|`Ptr _|`VoidPtr), offset, size)) -> + (* 'zero+' is a hack to force the type to int64. *) + sprintf "%s : zero+%d : offset(%d), %s" + field_name (size*8) (offset*8) endian + | (field_name, (`Str width, offset, size)) -> + sprintf "%s : %d : offset(%d), string" + field_name (width*8) (offset*8) + ) fields + ) in + let assignments = + List.map ( + fun (field_name, typ) -> + let (_, { mandatory_field = mandatory; + list_head_adjustment = list_head_adjustment }) = + try List.assoc field_name field_types + with Not_found -> + failwith (sprintf "%s: not found in field_types" + field_name) in + match typ, mandatory, list_head_adjustment with + | (`Ptr "list_head", offset, size), true, true -> + sprintf "%s = Int64.sub %s %dL" + field_name field_name offset + | (`Ptr "list_head", offset, size), false, true -> + sprintf "%s = Some (Int64.sub %s %dL)" + field_name field_name offset + | _, true, _ -> + sprintf "%s = %s" field_name field_name + | _, false, _ -> + sprintf "%s = Some %s" field_name field_name + ) fields in + let assignments_not_present = + List.map ( + fun field_name -> sprintf "%s = None" field_name + ) fields_not_present in + + let assignments = + String.concat ";\n " + (assignments @ assignments_not_present) in + + let sub = + sprintf " + bitmatch bits with + | { %s } -> + { %s } + | { _ } -> + raise (Virt_mem_types.ParseError (struct_name, %S, match_err))" + patterns assignments fnname in + + fnname, sub + ) parsers in + + parser_stmts, parser_subs in + + (* Define a map from kernel versions to parsing functions. *) + let version_map = + let stmts = List.fold_left ( + fun stmts (_, version, arch, total_size, i) -> + let parserfn = sprintf "parser_%d" i in + let fsname = sprintf "fieldsig_%d" i in + <:str_item< + $stmts$ + let v = ($lid:parserfn$, $`int:total_size$, $lid:fsname$) + let map = StringMap.add $str:version$ v map + >> + ) <:str_item< let map = StringMap.empty >> kernels in + + <:str_item< + module StringMap = Map.Make (String) ;; + $stmts$ + >> in + + (* Accessors for the field signatures. *) + let fsaccess, fsaccess_sig = + let fields = List.map ( + fun (field_name, _) -> + let fsname = "__fs_" ^ field_name in + <:str_item< + let $lid:"field_signature_of_"^field_name$ version = + let _, _, fs = StringMap.find version map in + fs.$lid:fsname$ + >> + ) field_types in + + let fsaccess = concat_str_items _loc fields in + + let fields = List.map ( + fun (field_name, _) -> + <:sig_item< + val $lid:"field_signature_of_"^field_name$ : kernel_version -> + Virt_mem_types.fieldsig + >> + ) field_types in + + let fsaccess_sig = concat_sig_items _loc fields in + + fsaccess, fsaccess_sig in + + (* Code (.ml file). *) let code = <:str_item< + let zero = 0 + let struct_name = $str:struct_name$ + let match_err = "failed to match kernel structure" ;; $struct_type$ + $fieldsig_type$ + $fieldsigs$ + $parser_stmts$ + $version_map$ + + type kernel_version = string + let $lid:struct_name^"_known"$ version = StringMap.mem version map + let $lid:struct_name^"_size"$ version = + let _, size, _ = StringMap.find version map in + size + let $lid:struct_name^"_of_bits"$ version bits = + let parsefn, _, _ = StringMap.find version map in + parsefn bits + let $lid:"get_"^struct_name$ version mem addr = + let parsefn, size, _ = StringMap.find version map in + let bytes = Virt_mem_mmap.get_bytes mem addr size in + let bits = Bitstring.bitstring_of_string bytes in + parsefn bits ;; + $fsaccess$ >> in + (* Interface (.mli file). *) let interface = <:sig_item< $struct_sig$ + + val struct_name : string + type kernel_version = string + val $lid:struct_name^"_known"$ : kernel_version -> bool + val $lid:struct_name^"_size"$ : kernel_version -> int + val $lid:struct_name^"_of_bits"$ : + kernel_version -> Bitstring.bitstring -> t + val $lid:"get_"^struct_name$ : kernel_version -> + ('a, 'b, [`HasMapping]) Virt_mem_mmap.t -> Virt_mem_mmap.addr -> t;; + $fsaccess_sig$ >> in - (struct_name, code, interface) + (struct_name, code, interface, parser_subs) ) files in (* Finally generate the output files. *) + let re_subst = Pcre.regexp "^(.*)\"(parser_\\d+)\"(.*)$" in + List.iter ( - fun (struct_name, code, interface) -> + fun (struct_name, code, interface, parser_subs) -> + (* Interface (.mli file). *) + let output_file = outputdir // "kernel_" ^ struct_name ^ ".mli" in + printf "Writing %s interface to %s ...\n%!" struct_name output_file; + Printers.OCaml.print_interf ~output_file interface; + + (* Implementation (.ml file). *) let output_file = outputdir // "kernel_" ^ struct_name ^ ".ml" in - Printers.OCaml.print_implem ~output_file code; + printf "Writing %s implementation to %s ...\n%!" struct_name output_file; + + let new_output_file = output_file ^ ".new" in + Printers.OCaml.print_implem ~output_file:new_output_file code; + + (* Substitute the parser bodies in the output file. *) + let ichan = open_in new_output_file in + let ochan = open_out output_file in + + output_string ochan "\ +(* WARNING: This file and the corresponding mli (interface) are + * automatically generated by the extract/codegen/kerneldb_to_parser.ml + * program. + * + * Any edits you make to this file will be lost. + * + * To update this file from the latest kernel database, it is recommended + * that you do 'make update-kernel-structs'. + *)\n\n"; - let output_file = outputdir // "kernel_" ^ struct_name ^ ".mli" in - Printers.OCaml.print_interf ~output_file interface + let rec loop () = + let line = input_line ichan in + let line = + if Pcre.pmatch ~rex:re_subst line then ( + let subs = Pcre.exec ~rex:re_subst line in + let start = Pcre.get_substring subs 1 in + let template = Pcre.get_substring subs 2 in + let rest = Pcre.get_substring subs 3 in + let sub = List.assoc template parser_subs in + start ^ sub ^ rest + ) else line in + output_string ochan line; output_char ochan '\n'; + loop () + in + (try loop () with End_of_file -> ()); + + close_out ochan; + close_in ichan; + + Unix.unlink new_output_file ) files +*)