X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=extract%2Fcodegen%2Fkerneldb_to_parser.ml;h=177d607a5d18fb36ee3fa0ac9d617d65b8996dcb;hb=c15c1624692d506eefe5f2cb2d775a6fb9127589;hp=e4c751971d7b5c16bbe391ff9f9facaafe52fffd;hpb=2174887a4382d491f67058a8336cfe57c2faa746;p=virt-mem.git diff --git a/extract/codegen/kerneldb_to_parser.ml b/extract/codegen/kerneldb_to_parser.ml index e4c7519..177d607 100644 --- a/extract/codegen/kerneldb_to_parser.ml +++ b/extract/codegen/kerneldb_to_parser.ml @@ -131,7 +131,7 @@ let structs = [ }; ] -let debug = false +let debug = true open Camlp4.PreCast open Syntax @@ -141,6 +141,8 @@ open ExtList open ExtString open Printf +module PP = Pahole_parser + let (//) = Filename.concat (* Couple of handy camlp4 construction functions which do some @@ -211,366 +213,83 @@ Example (from toplevel of virt-mem source tree): " arg0 arg0 arg0; exit 2 in - (* Get the *.info files from the kernels database. *) - let infos = Sys.readdir kernelsdir in - let infos = Array.to_list infos in - let infos = List.filter (fun name -> String.ends_with name ".info") infos in - let infos = List.map ( (//) kernelsdir) infos in - - (* Regular expressions. We really really should use ocaml-mikmatch ... *) - let re_oldformat = Pcre.regexp "^RPM: \\d+: \\(build \\d+\\) ([-\\w]+) ([\\w.]+) ([\\w.]+) \\(.*?\\) (\\w+)" in - let re_keyvalue = Pcre.regexp "^(\\w+): (.*)" in + let kernels = PP.list_kernels kernelsdir in + let nr_kernels = List.length kernels in - (* Parse in the *.info files. These have historically had a few different - * formats that we need to support. - *) - let infos = List.map ( - fun filename -> - (* Get the basename (for getting the .data file later on). *) - let basename = Filename.chop_suffix filename ".info" in - - let chan = open_in filename in - let line = input_line chan in - - (* Kernel version string. *) - let version, arch = - if Pcre.pmatch ~rex:re_oldformat line then ( - (* If the file starts with "RPM: \d+: ..." then it's the - * original Fedora format. Everything in one line. - *) - let subs = Pcre.exec ~rex:re_oldformat line in - (* let name = Pcre.get_substring subs 1 in *) - let version = Pcre.get_substring subs 2 in - let release = Pcre.get_substring subs 3 in - let arch = Pcre.get_substring subs 4 in - close_in chan; - (* XXX Map name -> PAE, hugemem etc. *) - (* name, *) sprintf "%s-%s.%s" version release arch, arch - ) else ( - (* New-style "key: value" entries, up to end of file or the first - * blank line. - *) - let (*name,*) version, release, arch = - (*ref "",*) ref "", ref "", ref "" in - let rec loop line = - try - let subs = Pcre.exec ~rex:re_keyvalue line in - let key = Pcre.get_substring subs 1 in - let value = Pcre.get_substring subs 2 in - (*if key = "Name" then name := value - else*) if key = "Version" then version := value - else if key = "Release" then release := value - else if key = "Architecture" then arch := value; - let line = input_line chan in - loop line - with - Not_found | End_of_file -> - close_in chan - in - loop line; - let (*name,*) version, release, arch = - (*!name,*) !version, !release, !arch in - if (*name = "" ||*) version = "" || release = "" || arch = "" then - failwith (sprintf "%s: missing Name, Version, Release or Architecture key" filename); - (* XXX Map name -> PAE, hugemem etc. *) - (* name, *) sprintf "%s-%s.%s" version release arch, arch - ) in - - (*printf "%s -> %s %s\n%!" basename version arch;*) - - (basename, version, arch) - ) infos in - - let nr_kernels = List.length infos in - - (* For quick access to the opener strings, build a hash. *) - let openers = Hashtbl.create 13 in - List.iter ( - fun (name, { opener = opener; closer = closer }) -> - Hashtbl.add openers opener (closer, name) - ) structs; - - (* Now read the data files and parse out the structures of interest. *) let kernels = List.mapi ( - fun i (basename, version, arch) -> + fun i info -> printf "Loading kernel data file %d/%d\r%!" (i+1) nr_kernels; - let file_exists name = - try Unix.access name [Unix.F_OK]; true - with Unix.Unix_error _ -> false - in - let close_process_in cmd chan = - match Unix.close_process_in chan with - | Unix.WEXITED 0 -> () - | Unix.WEXITED i -> - eprintf "%s: command exited with code %d\n" cmd i; exit i - | Unix.WSIGNALED i -> - eprintf "%s: command exited with signal %d\n" cmd i; exit 1 - | Unix.WSTOPPED i -> - eprintf "%s: command stopped by signal %d\n" cmd i; exit 1 - in - - (* Open the data file, uncompressing it on the fly if necessary. *) - let chan, close = - if file_exists (basename ^ ".data") then - open_in (basename ^ ".data"), close_in - else if file_exists (basename ^ ".data.gz") then ( - let cmd = - sprintf "gzip -cd %s" (Filename.quote (basename ^ ".data.gz")) in - Unix.open_process_in cmd, close_process_in cmd - ) - else if file_exists (basename ^ ".data.bz2") then ( - let cmd = - sprintf "bzip2 -cd %s" (Filename.quote (basename ^ ".data.bz2")) in - Unix.open_process_in cmd, close_process_in cmd - ) else - failwith - (sprintf "%s: cannot find corresponding data file" basename) in - - (* Read the data file in, looking for structures of interest to us. *) - let bodies = Hashtbl.create 13 in - let rec loop () = - let line = input_line chan in - - (* If the line is an opener for one of the structures we - * are looking for, then for now just save all the text until - * we get to the closer line. - *) - (try - let closer, name = Hashtbl.find openers line in - let rec loop2 lines = - let line = input_line chan in - let lines = line :: lines in - if String.starts_with line closer then List.rev lines - else loop2 lines - in - - let body = - try loop2 [line] - with End_of_file -> - failwith (sprintf "%s: %s: %S not matched by closing %S" basename name line closer) in - - Hashtbl.replace bodies name body - with Not_found -> ()); - - loop () - in - (try loop () with End_of_file -> ()); - - close chan; + let struct_names = List.map fst structs in + let structures = PP.load_structures info struct_names in - (* Make sure we got all the mandatory structures. *) + (* Make sure we got all the mandatory structures & fields. *) List.iter ( - fun (name, { mandatory_struct = mandatory }) -> - if mandatory && not (Hashtbl.mem bodies name) then - failwith (sprintf "%s: structure %s not found in this kernel" basename name) - ) structs; - - (basename, version, arch, bodies) - ) infos in - - (* Now parse each structure body. - * XXX This would be better as a proper lex/yacc parser. - * XXX Even better would be to have a proper interface to libdwarves. - *) - let re_offsetsize = Pcre.regexp "/\\*\\s+(\\d+)\\s+(\\d+)\\s+\\*/" in - let re_intfield = Pcre.regexp "(?:int|char)\\s+(\\w+);" in - let re_ptrfield = Pcre.regexp "struct\\s+(\\w+)\\s*\\*\\s*(\\w+);" in - let re_voidptrfield = Pcre.regexp "void\\s*\\*\\s*(\\w+);" in - let re_strfield = Pcre.regexp "char\\s+(\\w+)\\[(\\d+)\\];" in - let re_structopener = Pcre.regexp "(struct|union)\\s+.*{$" in - let re_structcloser = Pcre.regexp "}\\s*(\\w+)?(\\[\\d+\\])?;" in - - (* 'basename' is the source file, and second parameter ('body') is - * the list of text lines which covers this structure (minus the - * opener line). Result is the list of parsed fields from this - * structure. - *) - let rec parse basename = function - | [] -> assert false - | [_] -> [] (* Just the closer line, finished. *) - | line :: lines when Pcre.pmatch ~rex:re_structopener line -> - (* Recursively parse a sub-structure. First search for the - * corresponding closer line. - *) - let rec loop depth acc = function - | [] -> - eprintf "%s: %S has no matching close structure line\n%!" - basename line; - assert false - | line :: lines when Pcre.pmatch ~rex:re_structopener line -> - loop (depth+1) (line :: acc) lines - | line :: lines - when depth = 0 && Pcre.pmatch ~rex:re_structcloser line -> - (line :: acc), lines - | line :: lines - when depth > 0 && Pcre.pmatch ~rex:re_structcloser line -> - loop (depth-1) (line :: acc) lines - | line :: lines -> loop depth (line :: acc) lines - in - let nested_body, rest = loop 0 [] lines in - - (* Then parse the sub-structure. *) - let struct_name, nested_body = - match nested_body with - | [] -> assert false - | closer :: _ -> - let subs = Pcre.exec ~rex:re_structcloser closer in - let struct_name = - try Some (Pcre.get_substring subs 1) with Not_found -> None in - struct_name, List.rev nested_body in - let nested_fields = parse basename nested_body in - - (* Prefix the sub-fields with the name of the structure. *) - let nested_fields = - match struct_name with - | None -> nested_fields - | Some prefix -> - List.map ( - fun (name, details) -> (prefix ^ "'" ^ name, details) - ) nested_fields in - - (* Parse the rest. *) - nested_fields @ parse basename rest - - | line :: lines when Pcre.pmatch ~rex:re_intfield line -> - (* An int field. *) - let subs = Pcre.exec ~rex:re_intfield line in - let name = Pcre.get_substring subs 1 in - (try - let subs = Pcre.exec ~rex:re_offsetsize line in - let offset = int_of_string (Pcre.get_substring subs 1) in - let size = int_of_string (Pcre.get_substring subs 2) in - (name, (`Int, offset, size)) :: parse basename lines - with - Not_found -> parse basename lines - ); - - | line :: lines when Pcre.pmatch ~rex:re_ptrfield line -> - (* A pointer-to-struct field. *) - let subs = Pcre.exec ~rex:re_ptrfield line in - let struct_name = Pcre.get_substring subs 1 in - let name = Pcre.get_substring subs 2 in - (try - let subs = Pcre.exec ~rex:re_offsetsize line in - let offset = int_of_string (Pcre.get_substring subs 1) in - let size = int_of_string (Pcre.get_substring subs 2) in - (name, (`Ptr struct_name, offset, size)) - :: parse basename lines - with - Not_found -> parse basename lines - ); - - | line :: lines when Pcre.pmatch ~rex:re_voidptrfield line -> - (* A void* field. *) - let subs = Pcre.exec ~rex:re_voidptrfield line in - let name = Pcre.get_substring subs 1 in - (try - let subs = Pcre.exec ~rex:re_offsetsize line in - let offset = int_of_string (Pcre.get_substring subs 1) in - let size = int_of_string (Pcre.get_substring subs 2) in - (name, (`VoidPtr, offset, size)) - :: parse basename lines - with - Not_found -> parse basename lines - ); - - | line :: lines when Pcre.pmatch ~rex:re_strfield line -> - (* A string (char array) field. *) - let subs = Pcre.exec ~rex:re_strfield line in - let name = Pcre.get_substring subs 1 in - let width = int_of_string (Pcre.get_substring subs 2) in - (try - let subs = Pcre.exec ~rex:re_offsetsize line in - let offset = int_of_string (Pcre.get_substring subs 1) in - let size = int_of_string (Pcre.get_substring subs 2) in - (name, (`Str width, offset, size)) - :: parse basename lines - with - Not_found -> parse basename lines - ); - - | _ :: lines -> - (* Just ignore any other field we can't parse. *) - parse basename lines + fun (struct_name, + { mandatory_struct = mandatory; fields = wanted_fields }) -> + try + let s = + List.find (fun s -> struct_name = s.PP.struct_name) + structures in + + (* Check we have all the mandatory fields. *) + let all_fields = s.PP.struct_fields in + List.iter ( + fun (wanted_field, { mandatory_field = mandatory }) -> + let got_it = + List.exists ( + fun { PP.field_name = name } -> name = wanted_field + ) all_fields in + if mandatory && not got_it then ( + eprintf "%s: structure %s is missing required field %s\n" + info.PP.basename struct_name wanted_field; + eprintf "fields found in this structure:\n"; + List.iter ( + fun { PP.field_name = name } -> eprintf "\t%s\n" name + ) all_fields; + exit 1 + ); + ) wanted_fields - in + with Not_found -> + if mandatory then + failwith (sprintf "%s: structure %s not found in this kernel" + info.PP.basename struct_name) + ) structs; - let kernels = List.map ( - fun (basename, version, arch, bodies) -> - let structures = List.filter_map ( - fun (struct_name, { fields = wanted_fields }) -> - let body = - try Some (Hashtbl.find bodies struct_name) - with Not_found -> None in - match body with - | None -> None - | Some body -> - let body = List.tl body in (* Don't care about opener line. *) - let fields = parse basename body in - - (* Compute total size of the structure. *) - let total_size = - let fields = List.map ( - fun (_, (_, offset, size)) -> offset + size - ) fields in - List.fold_left max 0 fields in - - (* That got us all the fields, but we only care about - * the wanted_fields. - *) - let all_fields = fields in - let fields = List.filter ( - fun (name, _) -> List.mem_assoc name wanted_fields + let structures = + List.map ( + fun ({ PP.struct_name = struct_name; PP.struct_fields = fields } + as structure) -> + let { fields = wanted_fields } = List.assoc struct_name structs in + + (* That got us all the fields, but we only care about + * the wanted_fields. + *) + let fields = List.filter ( + fun { PP.field_name = name } -> List.mem_assoc name wanted_fields + ) fields in + + (* Prefix all the field names with the structure name. *) + let fields = + List.map ( + fun ({ PP.field_name = name } as field) -> + let name = struct_name ^ "_" ^ name in + { field with PP.field_name = name } ) fields in + { structure with PP.struct_fields = fields } + ) structures in - (* Also check we have all the mandatory fields. *) - List.iter ( - fun (wanted_field, { mandatory_field = mandatory }) -> - if mandatory && not (List.mem_assoc wanted_field fields) - then ( - eprintf "%s: structure %s is missing required field %s\n" basename struct_name wanted_field; - eprintf "fields found in this structure:\n"; - List.iter ( - fun (name, _) -> eprintf "\t%s\n" name - ) all_fields; - exit 1 - ); - ) wanted_fields; - - (* Prefix all the field names with the structure name. *) - let fields = - List.map (fun (name, details) -> - struct_name ^ "_" ^ name, details) fields in - - Some (struct_name, (fields, total_size)) - ) structs in - - (basename, version, arch, structures) + (info, structures) ) kernels in if debug then List.iter ( - fun (basename, version, arch, structures) -> - printf "%s (version: %s, arch: %s):\n" basename version arch; + fun (info, structures) -> + printf "%s ----------\n" (PP.string_of_info info); List.iter ( - fun (struct_name, (fields, total_size)) -> - printf " struct %s {\n" struct_name; - List.iter ( - fun (field_name, (typ, offset, size)) -> - (match typ with - | `Int -> - printf " int %s; " field_name - | `Ptr struct_name -> - printf " struct %s *%s; " struct_name field_name - | `VoidPtr -> - printf " void *%s; " field_name - | `Str width -> - printf " char %s[%d]; " field_name width - ); - printf " /* offset = %d, size = %d */\n" offset size - ) fields; - printf " } /* %d bytes */\n\n" total_size; + fun structure -> + printf "%s\n\n" (PP.string_of_structure structure); ) structures; ) kernels; @@ -580,7 +299,9 @@ Example (from toplevel of virt-mem source tree): let () = let _loc = Loc.ghost in - let versions = List.map (fun (_, version, _, _) -> version) kernels in + let versions = List.map ( + fun ({ PP.kernel_version = version }, _) -> version + ) kernels in (* Sort them in reverse because we are going to generate the * final list in reverse. @@ -607,12 +328,18 @@ Example (from toplevel of virt-mem source tree): *) let files = List.map ( - fun (name, _) -> + fun (struct_name, _) -> let kernels = List.filter_map ( - fun (basename, version, arch, structures) -> - try Some (basename, version, arch, List.assoc name structures) - with Not_found -> None + fun (info, structures) -> + try + let structure = + List.find ( + fun { PP.struct_name = name } -> name = struct_name + ) structures in + Some (info, structure) + with Not_found -> + None ) kernels in (* Sort the kernels, which makes the generated output more stable @@ -620,11 +347,12 @@ Example (from toplevel of virt-mem source tree): *) let kernels = List.sort kernels in - name, kernels + struct_name, kernels ) structs in let kernels = () in ignore kernels; (* garbage collect *) +(* (* Get just the field types. * * It's plausible that a field with the same name has a different @@ -1063,3 +791,4 @@ Example (from toplevel of virt-mem source tree): Unix.unlink new_output_file ) files +*)