"task_struct", (
"struct task_struct {", "};", true,
[ "state"; "prio"; "normal_prio"; "static_prio";
- "tasks.prev"; "tasks.next"; "comm"]
+ "tasks'prev"; "tasks'next"; "mm"; "active_mm"; "comm"; "pid" ]
);
+(*
"mm_struct", (
"struct mm_struct {", "};", true,
[ ]
);
+*)
"net_device", (
"struct net_device {", "};", true,
[ "name"; "dev_addr" ]
);
]
+let debug = false
+
+open Camlp4.PreCast
+open Syntax
+(*open Ast*)
+
open ExtList
open ExtString
open Printf
let infos = Sys.readdir kernelsdir in
let infos = Array.to_list infos in
let infos = List.filter (fun name -> String.ends_with name ".info") infos in
- let infos = List.map ((//) kernelsdir) infos in
+ let infos = List.map ( (//) kernelsdir) infos in
(* Regular expressions. We really really should use ocaml-mikmatch ... *)
let re_oldformat = Pcre.regexp "^RPM: \\d+: \\(build \\d+\\) ([-\\w]+) ([\\w.]+) ([\\w.]+) \\(.*?\\) (\\w+)" in
let line = input_line chan in
(* Kernel version string. *)
- let version =
+ let version, arch =
if Pcre.pmatch ~rex:re_oldformat line then (
(* If the file starts with "RPM: \d+: ..." then it's the
* original Fedora format. Everything in one line.
let arch = Pcre.get_substring subs 4 in
close_in chan;
(* XXX Map name -> PAE, hugemem etc. *)
- (* name, *) sprintf "%s-%s.%s" version release arch
+ (* name, *) sprintf "%s-%s.%s" version release arch, arch
) else (
(* New-style "key: value" entries, up to end of file or the first
* blank line.
if (*name = "" ||*) version = "" || release = "" || arch = "" then
failwith (sprintf "%s: missing Name, Version, Release or Architecture key" filename);
(* XXX Map name -> PAE, hugemem etc. *)
- (* name, *) sprintf "%s-%s.%s" version release arch
+ (* name, *) sprintf "%s-%s.%s" version release arch, arch
) in
- (*printf "%s -> %s\n%!" basename version;*)
+ (*printf "%s -> %s %s\n%!" basename version arch;*)
- (basename, version)
+ (basename, version, arch)
) infos in
(* For quick access to the opener strings, build a hash. *)
(* Now read the data files and parse out the structures of interest. *)
let datas = List.map (
- fun (basename, version) ->
+ fun (basename, version, arch) ->
let file_exists name =
try Unix.access name [Unix.F_OK]; true
with Unix.Unix_error _ -> false
failwith (sprintf "%s: structure %s not found in this kernel" basename name)
) what;
- (basename, version, bodies)
+ (basename, version, arch, bodies)
) infos in
(* Now parse each structure body.
match struct_name with
| None -> nested_fields
| Some prefix ->
- List.map (
- fun (name, details) -> (prefix ^ "." ^ name, details)
- ) nested_fields in
+ List.map (
+ fun (name, details) -> (prefix ^ "'" ^ name, details)
+ ) nested_fields in
(* Parse the rest. *)
nested_fields @ parse basename rest
let subs = Pcre.exec ~rex:re_offsetsize line in
let offset = int_of_string (Pcre.get_substring subs 1) in
let size = int_of_string (Pcre.get_substring subs 2) in
- (name, (`Ptr struct_name, offset, size)) :: parse basename lines
+ (name, (`Ptr struct_name, offset, size))
+ :: parse basename lines
with
Not_found -> parse basename lines
);
let subs = Pcre.exec ~rex:re_offsetsize line in
let offset = int_of_string (Pcre.get_substring subs 1) in
let size = int_of_string (Pcre.get_substring subs 2) in
- (name, (`Str width, offset, size)) :: parse basename lines
+ (name, (`Str width, offset, size))
+ :: parse basename lines
with
Not_found -> parse basename lines
);
in
let datas = List.map (
- fun (basename, version, bodies) ->
+ fun (basename, version, arch, bodies) ->
let structures = List.filter_map (
- fun (name, (_, _, _, wanted_fields)) ->
+ fun (struct_name, (_, _, _, wanted_fields)) ->
let body =
- try Some (Hashtbl.find bodies name) with Not_found -> None in
+ try Some (Hashtbl.find bodies struct_name)
+ with Not_found -> None in
match body with
| None -> None
| Some body ->
let body = List.tl body in (* Don't care about opener line. *)
let fields = parse basename body in
+ (* Compute total size of the structure. *)
+ let total_size =
+ let fields = List.map (
+ fun (_, (_, offset, size)) -> offset + size
+ ) fields in
+ List.fold_left max 0 fields in
+
(* That got us all the fields, but we only care about
* the wanted_fields.
*)
List.iter (
fun wanted_field ->
if not (List.mem_assoc wanted_field fields) then
- failwith (sprintf "%s: structure %s is missing required field %s" basename name wanted_field)
+ failwith (sprintf "%s: structure %s is missing required field %s" basename struct_name wanted_field)
) wanted_fields;
- Some (name, fields)
+ (* Prefix all the field names with the structure name. *)
+ let fields =
+ List.map (fun (name, details) ->
+ struct_name ^ "_" ^ name, details) fields in
+
+ Some (struct_name, (fields, total_size))
) what in
- (basename, version, structures)
+ (basename, version, arch, structures)
) datas in
- (* If you're debugging, uncomment this to print out the parsed
- * structures.
+ if debug then
+ List.iter (
+ fun (basename, version, arch, structures) ->
+ printf "%s (version: %s, arch: %s):\n" basename version arch;
+ List.iter (
+ fun (struct_name, (fields, total_size)) ->
+ printf " struct %s {\n" struct_name;
+ List.iter (
+ fun (field_name, (typ, offset, size)) ->
+ (match typ with
+ | `Int ->
+ printf " int %s; " field_name
+ | `Ptr struct_name ->
+ printf " struct %s *%s; " struct_name field_name
+ | `Str width ->
+ printf " char %s[%d]; " field_name width
+ );
+ printf " /* offset = %d, size = %d */\n" offset size
+ ) fields;
+ printf " } /* %d bytes */\n\n" total_size;
+ ) structures;
+ ) datas;
+
+ (* We'll generate a code file for each structure type (eg. task_struct
+ * across all kernel versions), so rearrange 'datas' for that purpose.
+ *
+ * XXX This loop is O(n^3), luckily n is small!
*)
-(*
- List.iter (
- fun (basename, version, structures) ->
- printf "%s (version: %s):\n" basename version;
- List.iter (
- fun (struct_name, fields) ->
- printf " struct %s {\n" struct_name;
- List.iter (
- fun (field_name, (typ, offset, size)) ->
- (match typ with
- | `Int -> printf " int %s; " field_name
- | `Ptr struct_name ->
- printf " struct %s *%s; " struct_name field_name
- | `Str width ->
- printf " char %s[%d]; " field_name width
- );
- printf " /* offset = %d, size = %d */\n" offset size
- ) fields;
- printf " }\n\n";
- ) structures;
- ) datas;
-*)
+ let files =
+ List.map (
+ fun (name, _) ->
+ let kernels =
+ List.filter_map (
+ fun (basename, version, arch, structures) ->
+ try Some (basename, version, arch, List.assoc name structures)
+ with Not_found -> None
+ ) datas in
+
+ (* Sort the kernels, which makes the generated output more stable
+ * and makes patches more useful.
+ *)
+ let kernels = List.sort kernels in
+
+ name, kernels
+ ) what in
+
+ let datas = () in ignore datas; (* garbage collect *)
+
+ (* Get just the field types. It's plausible that a field with the
+ * same name has a different type between kernel versions, so we must
+ * check that didn't happen.
+ *)
+ let files = List.map (
+ fun (struct_name, kernels) ->
+ let field_types =
+ match kernels with
+ | [] -> []
+ | (_, _, _, (fields, _)) :: kernels ->
+ let field_types_of_fields fields =
+ List.sort (
+ List.map (
+ fun (field_name, (typ, _, _)) -> field_name, typ
+ ) fields
+ )
+ in
+ let field_types = field_types_of_fields fields in
+ List.iter (
+ fun (_, _, _, (fields, _)) ->
+ if field_types <> field_types_of_fields fields then
+ failwith (sprintf "%s: one of the structure fields changed type between kernel versions" struct_name)
+ ) kernels;
+ field_types in
+ (struct_name, kernels, field_types)
+ ) files in
+
+ (* To minimize generated code size, we want to fold together all
+ * structures where the particulars (eg. offsets, sizes, endianness)
+ * of the fields we care about are the same -- eg. between kernel
+ * versions which are very similar.
+ *)
+ let endian_of_architecture arch =
+ if String.starts_with arch "i386" ||
+ String.starts_with arch "i486" ||
+ String.starts_with arch "i586" ||
+ String.starts_with arch "i686" ||
+ String.starts_with arch "x86_64" ||
+ String.starts_with arch "x86-64" then
+ Bitstring.LittleEndian
+ else if String.starts_with arch "ia64" then
+ Bitstring.LittleEndian (* XXX usually? *)
+ else if String.starts_with arch "ppc" then
+ Bitstring.BigEndian
+ else if String.starts_with arch "sparc" then
+ Bitstring.BigEndian
+ else
+ failwith (sprintf "endian_of_architecture: cannot parse %S" arch)
+ in
+
+ let files =
+ List.map (
+ fun (struct_name, kernels, field_types) ->
+ let hash = Hashtbl.create 13 in
+ let i = ref 0 in
+ let xs = ref [] in
+ let kernels =
+ List.map (
+ fun (basename, version, arch, (fields, total_size)) ->
+ let key = endian_of_architecture arch, fields in
+ let j =
+ try Hashtbl.find hash key
+ with Not_found ->
+ incr i;
+ xs := (!i, key) :: !xs; Hashtbl.add hash key !i;
+ !i in
+ (basename, version, arch, total_size, j)
+ ) kernels in
+ let parsers = List.rev !xs in
+ struct_name, kernels, field_types, parsers
+ ) files in
+
+ (* How much did we save by sharing? *)
+ if debug then
+ List.iter (
+ fun (struct_name, kernels, _, parsers) ->
+ printf "struct %s:\n" struct_name;
+ printf " number of kernel versions: %d\n" (List.length kernels);
+ printf " number of parser functions needed after sharing: %d\n"
+ (List.length parsers)
+ ) files;
(* Let's generate some code! *)
-
+ let files =
+ List.map (
+ fun (struct_name, kernels, field_types, parsers) ->
+ (* Dummy location required - there are no real locations for
+ * output files.
+ *)
+ let _loc = Loc.ghost in
+
+ (* The structure type. *)
+ let struct_type, struct_sig =
+ let fields = List.map (
+ function
+ | (name, `Int) ->
+ <:ctyp< $lid:name$ : int64 >>
+ | (name, `Ptr _) ->
+ <:ctyp< $lid:name$ : Virt_mem_mmap.addr >>
+ | (name, `Str _) ->
+ <:ctyp< $lid:name$ : string >>
+ ) field_types in
+ let f, fs = match fields with
+ | [] -> failwith (sprintf "%s: structure has no fields" struct_name)
+ | f :: fs -> f, fs in
+ let fields = List.fold_left (
+ fun fs f -> <:ctyp< $fs$ ; $f$ >>
+ ) f fs in
+
+ let struct_type = <:str_item< type t = { $fields$ } >> in
+ let struct_sig = <:sig_item< type t = { $fields$ } >> in
+ struct_type, struct_sig in
+
+ (* The shared parser functions.
+ *
+ * We could include bitmatch statements directly in here, but
+ * what happens is that the macros get expanded here, resulting
+ * in (even more) unreadable generated code. So instead just
+ * do a textual substitution later by post-processing the
+ * generated files. Not type-safe, but we can't have
+ * everything.
+ *)
+ let parser_stmts, parser_subs =
+ let parser_stmts = List.map (
+ fun (i, _) ->
+ let fnname = sprintf "parser_%d" i in
+ <:str_item<
+ let $lid:fnname$ bits = $str:fnname$
+ >>
+ ) parsers in
+
+ let parser_stmts =
+ match parser_stmts with
+ | [] -> <:str_item< >>
+ | p :: ps ->
+ List.fold_left (fun ps p -> <:str_item< $ps$ $p$ >>) p ps in
+
+ (* What gets substituted for "parser_NN" ... *)
+ let parser_subs = List.map (
+ fun (i, (endian, fields)) ->
+ let fnname = sprintf "parser_%d" i in
+ let endian =
+ match endian with
+ | Bitstring.LittleEndian -> "littleendian"
+ | Bitstring.BigEndian -> "bigendian"
+ | _ -> assert false in
+ let patterns =
+ (* Fields must be sorted by offset, otherwise bitmatch
+ * will complain.
+ *)
+ let cmp (_, (_, o1, _)) (_, (_, o2, _)) = compare o1 o2 in
+ let fields = List.sort ~cmp fields in
+ String.concat ";\n " (
+ List.map (
+ function
+ | (field_name, (`Int, offset, size))
+ | (field_name, (`Ptr _, offset, size)) ->
+ (* 'zero+' is a hack to force the type to int64. *)
+ sprintf "%s : zero+%d : offset(%d), %s"
+ field_name (size*8) (offset*8) endian
+ | (field_name, (`Str width, offset, size)) ->
+ sprintf "%s : %d : offset(%d), string"
+ field_name (width*8) (offset*8)
+ ) fields
+ ) in
+ let assignments =
+ String.concat ";\n " (
+ List.map (
+ function
+ | (field_name, (`Ptr "list_head", offset, size)) ->
+ sprintf "%s = Int64.sub %s %dL" field_name field_name offset
+ | (field_name, _) ->
+ sprintf "%s = %s" field_name field_name
+ ) fields
+ ) in
+
+ let sub =
+ sprintf "
+ bitmatch bits with
+ | { %s } ->
+ { %s }
+ | { _ } ->
+ raise (ParseError (struct_name, %S, match_err))"
+ patterns assignments fnname in
+
+ fnname, sub
+ ) parsers in
+
+ parser_stmts, parser_subs in
+
+ (* Define a map from kernel versions to parsing functions. *)
+ let version_map =
+ let stmts = List.fold_left (
+ fun stmts (_, version, arch, total_size, i) ->
+ let parserfn = sprintf "parser_%d" i in
+ <:str_item<
+ $stmts$
+ let v = ($lid:parserfn$, $`int:total_size$)
+ let map = StringMap.add $str:version$ v map
+ >>
+ ) <:str_item< let map = StringMap.empty >> kernels in
+
+ <:str_item<
+ module StringMap = Map.Make (String) ;;
+ $stmts$
+ >> in
+
+ (* Code (.ml file). *)
+ let code = <:str_item<
+ let warning = "This code is automatically generated from the kernel database by kerneldb-to-parser program. Any edits you make will be lost."
+ let zero = 0
+ let struct_name = $str:struct_name$
+ let match_err = "failed to match kernel structure"
+ exception ParseError of string * string * string;;
+ $struct_type$
+ $parser_stmts$
+ $version_map$
+
+ type kernel_version = string
+ let $lid:struct_name^"_known"$ version = StringMap.mem version map
+ let $lid:struct_name^"_size"$ version =
+ let _, size = StringMap.find version map in
+ size
+ let $lid:struct_name^"_of_bits"$ version bits =
+ let parsefn, _ = StringMap.find version map in
+ parsefn bits
+ let $lid:"get_"^struct_name$ version mem addr =
+ let parsefn, size = StringMap.find version map in
+ let bytes = Virt_mem_mmap.get_bytes mem addr size in
+ let bits = Bitstring.bitstring_of_string bytes in
+ parsefn bits
+ >> in
+
+ (* Interface (.mli file). *)
+ let interface = <:sig_item<
+ exception ParseError of string * string * string;;
+ $struct_sig$
+
+ val struct_name : string
+ type kernel_version = string
+ val $lid:struct_name^"_known"$ : kernel_version -> bool
+ val $lid:struct_name^"_size"$ : kernel_version -> int
+ val $lid:struct_name^"_of_bits"$ :
+ kernel_version -> Bitstring.bitstring -> t
+ val $lid:"get_"^struct_name$ : kernel_version ->
+ ('a, 'b, [`HasMapping]) Virt_mem_mmap.t -> Virt_mem_mmap.addr -> t
+ >> in
+
+ (struct_name, code, interface, parser_subs)
+ ) files in
+
+ (* Finally generate the output files. *)
+ let re_subst = Pcre.regexp "^(.*)\"(parser_\\d+)\"(.*)$" in
+
+ List.iter (
+ fun (struct_name, code, interface, parser_subs) ->
+ (* Interface (.mli file). *)
+ let output_file = outputdir // "kernel_" ^ struct_name ^ ".mli" in
+ printf "Writing %s interface to %s ...\n%!" struct_name output_file;
+ Printers.OCaml.print_interf ~output_file interface;
+
+ (* Implementation (.ml file). *)
+ let output_file = outputdir // "kernel_" ^ struct_name ^ ".ml" in
+ printf "Writing %s implementation to %s ...\n%!" struct_name output_file;
+
+ let new_output_file = output_file ^ ".new" in
+ Printers.OCaml.print_implem ~output_file:new_output_file code;
+
+ (* Substitute the parser bodies in the output file. *)
+ let ichan = open_in new_output_file in
+ let ochan = open_out output_file in
+
+ let rec loop () =
+ let line = input_line ichan in
+ let line =
+ if Pcre.pmatch ~rex:re_subst line then (
+ let subs = Pcre.exec ~rex:re_subst line in
+ let start = Pcre.get_substring subs 1 in
+ let template = Pcre.get_substring subs 2 in
+ let rest = Pcre.get_substring subs 3 in
+ let sub = List.assoc template parser_subs in
+ start ^ sub ^ rest
+ ) else line in
+ output_string ochan line; output_char ochan '\n';
+ loop ()
+ in
+ (try loop () with End_of_file -> ());
+ close_out ochan;
+ close_in ichan;
- ()
+ Unix.unlink new_output_file
+ ) files