From c0ef66b9b93fdc37e351f2fa657df35868472638 Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Thu, 1 Jan 1970 00:00:00 +0000 Subject: [PATCH] Fully parses the output of 'pahole'. --- extract/codegen/Makefile.in | 2 +- extract/codegen/kerneldb_to_parser.ml | 302 ++++++++++++++++++++++++++++++++-- 2 files changed, 286 insertions(+), 18 deletions(-) diff --git a/extract/codegen/Makefile.in b/extract/codegen/Makefile.in index 4643f13..edcfe96 100644 --- a/extract/codegen/Makefile.in +++ b/extract/codegen/Makefile.in @@ -23,7 +23,7 @@ INSTALL = @INSTALL@ MKDIR_P = @MKDIR_P@ bindir = @bindir@ -OCAMLCPACKAGES = -package extlib,pcre +OCAMLCPACKAGES = -package extlib,pcre,unix OCAMLCFLAGS = @OCAMLCFLAGS@ OCAMLCLIBS = -linkpkg diff --git a/extract/codegen/kerneldb_to_parser.ml b/extract/codegen/kerneldb_to_parser.ml index 6b64786..1137f30 100644 --- a/extract/codegen/kerneldb_to_parser.ml +++ b/extract/codegen/kerneldb_to_parser.ml @@ -30,8 +30,17 @@ let what = [ "task_struct", ( - "struct task_struct", - [ "state"; "prio"; "normal_prio"; "static_prio"; "tasks"; "comm"] + "struct task_struct {", "};", true, + [ "state"; "prio"; "normal_prio"; "static_prio"; + "tasks.prev"; "tasks.next"; "comm"] + ); + "mm_struct", ( + "struct mm_struct {", "};", true, + [ ] + ); + "net_device", ( + "struct net_device {", "};", true, + [ "name"; "dev_addr" ] ); ] @@ -80,31 +89,33 @@ Example (from toplevel of virt-mem source tree): let chan = open_in filename in let line = input_line chan in - let name, version = + (* Kernel version string. *) + let version = if Pcre.pmatch ~rex:re_oldformat line then ( - (* If the file starts with "RPM: \d+: ..." then it's the original - * format. Everything in one line. + (* If the file starts with "RPM: \d+: ..." then it's the + * original Fedora format. Everything in one line. *) let subs = Pcre.exec ~rex:re_oldformat line in - let name = Pcre.get_substring subs 1 in + (* let name = Pcre.get_substring subs 1 in *) let version = Pcre.get_substring subs 2 in let release = Pcre.get_substring subs 3 in let arch = Pcre.get_substring subs 4 in close_in chan; - name, sprintf "%s-%s.%s" version release arch + (* XXX Map name -> PAE, hugemem etc. *) + (* name, *) sprintf "%s-%s.%s" version release arch ) else ( (* New-style "key: value" entries, up to end of file or the first * blank line. *) - let name, version, release, arch = - ref "", ref "", ref "", ref "" in + let (*name,*) version, release, arch = + (*ref "",*) ref "", ref "", ref "" in let rec loop line = try let subs = Pcre.exec ~rex:re_keyvalue line in let key = Pcre.get_substring subs 1 in let value = Pcre.get_substring subs 2 in - if key = "Name" then name := value - else if key = "Version" then version := value + (*if key = "Name" then name := value + else*) if key = "Version" then version := value else if key = "Release" then release := value else if key = "Architecture" then arch := value; let line = input_line chan in @@ -114,16 +125,273 @@ Example (from toplevel of virt-mem source tree): close_in chan in loop line; - let name, version, release, arch = - !name, !version, !release, !arch in - if name = "" || version = "" || release = "" || arch = "" then + let (*name,*) version, release, arch = + (*!name,*) !version, !release, !arch in + if (*name = "" ||*) version = "" || release = "" || arch = "" then failwith (sprintf "%s: missing Name, Version, Release or Architecture key" filename); - name, sprintf "%s-%s.%s" version release arch + (* XXX Map name -> PAE, hugemem etc. *) + (* name, *) sprintf "%s-%s.%s" version release arch ) in - printf "%s -> %s, %s\n" basename name version; + (*printf "%s -> %s\n%!" basename version;*) + + (basename, version) + ) infos in + + (* For quick access to the opener strings, build a hash. *) + let openers = Hashtbl.create 13 in + List.iter ( + fun (name, (opener, closer, _, _)) -> + Hashtbl.add openers opener (closer, name) + ) what; + + (* Now read the data files and parse out the structures of interest. *) + let datas = List.map ( + fun (basename, version) -> + let file_exists name = + try Unix.access name [Unix.F_OK]; true + with Unix.Unix_error _ -> false + in + let close_process_in cmd chan = + match Unix.close_process_in chan with + | Unix.WEXITED 0 -> () + | Unix.WEXITED i -> + eprintf "%s: command exited with code %d\n" cmd i; exit i + | Unix.WSIGNALED i -> + eprintf "%s: command exited with signal %d\n" cmd i; exit 1 + | Unix.WSTOPPED i -> + eprintf "%s: command stopped by signal %d\n" cmd i; exit 1 + in + + (* Open the data file, uncompressing it on the fly if necessary. *) + let chan, close = + if file_exists (basename ^ ".data") then + open_in (basename ^ ".data"), close_in + else if file_exists (basename ^ ".data.gz") then ( + let cmd = + sprintf "gzip -cd %s" (Filename.quote (basename ^ ".data.gz")) in + Unix.open_process_in cmd, close_process_in cmd + ) + else if file_exists (basename ^ ".data.bz2") then ( + let cmd = + sprintf "bzip2 -cd %s" (Filename.quote (basename ^ ".data.bz2")) in + Unix.open_process_in cmd, close_process_in cmd + ) else + failwith + (sprintf "%s: cannot find corresponding data file" basename) in + + (* Read the data file in, looking for structures of interest to us. *) + let bodies = Hashtbl.create 13 in + let rec loop () = + let line = input_line chan in + + (* If the line is an opener for one of the structures we + * are looking for, then for now just save all the text until + * we get to the closer line. + *) + (try + let closer, name = Hashtbl.find openers line in + let rec loop2 lines = + let line = input_line chan in + let lines = line :: lines in + if String.starts_with line closer then List.rev lines + else loop2 lines + in - (basename, name, version) + let body = + try loop2 [line] + with End_of_file -> + failwith (sprintf "%s: %s: %S not matched by closing %S" basename name line closer) in + + Hashtbl.replace bodies name body + with Not_found -> ()); + + loop () + in + (try loop () with End_of_file -> ()); + + close chan; + + (* Make sure we got all the mandatory structures. *) + List.iter ( + fun (name, (_, _, mandatory, _)) -> + if mandatory && not (Hashtbl.mem bodies name) then + failwith (sprintf "%s: structure %s not found in this kernel" basename name) + ) what; + + (basename, version, bodies) ) infos in + (* Now parse each structure body. + * XXX This would be better as a proper lex/yacc parser. + * XXX Even better would be to have a proper interface to libdwarves. + *) + let re_offsetsize = Pcre.regexp "/\\*\\s+(\\d+)\\s+(\\d+)\\s+\\*/" in + let re_intfield = Pcre.regexp "int\\s+(\\w+);" in + let re_ptrfield = Pcre.regexp "struct\\s+(\\w+)\\s*\\*\\s*(\\w+);" in + let re_strfield = Pcre.regexp "char\\s+(\\w+)\\[(\\d+)\\];" in + let re_structopener = Pcre.regexp "(struct|union)\\s+.*{$" in + let re_structcloser = Pcre.regexp "}\\s*(\\w+)?(\\[\\d+\\])?;" in + + (* 'basename' is the source file, and second parameter ('body') is + * the list of text lines which covers this structure (minus the + * opener line). Result is the list of parsed fields from this + * structure. + *) + let rec parse basename = function + | [] -> assert false + | [_] -> [] (* Just the closer line, finished. *) + | line :: lines when Pcre.pmatch ~rex:re_structopener line -> + (* Recursively parse a sub-structure. First search for the + * corresponding closer line. + *) + let rec loop depth acc = function + | [] -> + eprintf "%s: %S has no matching close structure line\n%!" + basename line; + assert false + | line :: lines when Pcre.pmatch ~rex:re_structopener line -> + loop (depth+1) (line :: acc) lines + | line :: lines + when depth = 0 && Pcre.pmatch ~rex:re_structcloser line -> + (line :: acc), lines + | line :: lines + when depth > 0 && Pcre.pmatch ~rex:re_structcloser line -> + loop (depth-1) (line :: acc) lines + | line :: lines -> loop depth (line :: acc) lines + in + let nested_body, rest = loop 0 [] lines in + + (* Then parse the sub-structure. *) + let struct_name, nested_body = + match nested_body with + | [] -> assert false + | closer :: _ -> + let subs = Pcre.exec ~rex:re_structcloser closer in + let struct_name = + try Some (Pcre.get_substring subs 1) with Not_found -> None in + struct_name, List.rev nested_body in + let nested_fields = parse basename nested_body in + + (* Prefix the sub-fields with the name of the structure. *) + let nested_fields = + match struct_name with + | None -> nested_fields + | Some prefix -> + List.map ( + fun (name, details) -> (prefix ^ "." ^ name, details) + ) nested_fields in + + (* Parse the rest. *) + nested_fields @ parse basename rest + + | line :: lines when Pcre.pmatch ~rex:re_intfield line -> + (* An int field. *) + let subs = Pcre.exec ~rex:re_intfield line in + let name = Pcre.get_substring subs 1 in + (try + let subs = Pcre.exec ~rex:re_offsetsize line in + let offset = int_of_string (Pcre.get_substring subs 1) in + let size = int_of_string (Pcre.get_substring subs 2) in + (name, (`Int, offset, size)) :: parse basename lines + with + Not_found -> parse basename lines + ); + + | line :: lines when Pcre.pmatch ~rex:re_ptrfield line -> + (* A pointer-to-struct field. *) + let subs = Pcre.exec ~rex:re_ptrfield line in + let struct_name = Pcre.get_substring subs 1 in + let name = Pcre.get_substring subs 2 in + (try + let subs = Pcre.exec ~rex:re_offsetsize line in + let offset = int_of_string (Pcre.get_substring subs 1) in + let size = int_of_string (Pcre.get_substring subs 2) in + (name, (`Ptr struct_name, offset, size)) :: parse basename lines + with + Not_found -> parse basename lines + ); + + | line :: lines when Pcre.pmatch ~rex:re_strfield line -> + (* A string (char array) field. *) + let subs = Pcre.exec ~rex:re_strfield line in + let name = Pcre.get_substring subs 1 in + let width = int_of_string (Pcre.get_substring subs 2) in + (try + let subs = Pcre.exec ~rex:re_offsetsize line in + let offset = int_of_string (Pcre.get_substring subs 1) in + let size = int_of_string (Pcre.get_substring subs 2) in + (name, (`Str width, offset, size)) :: parse basename lines + with + Not_found -> parse basename lines + ); + + | _ :: lines -> + (* Just ignore any other field we can't parse. *) + parse basename lines + + in + + let datas = List.map ( + fun (basename, version, bodies) -> + let structures = List.filter_map ( + fun (name, (_, _, _, wanted_fields)) -> + let body = + try Some (Hashtbl.find bodies name) with Not_found -> None in + match body with + | None -> None + | Some body -> + let body = List.tl body in (* Don't care about opener line. *) + let fields = parse basename body in + + (* That got us all the fields, but we only care about + * the wanted_fields. + *) + let fields = List.filter ( + fun (name, _) -> List.mem name wanted_fields + ) fields in + + (* Also check we have all the wanted fields. *) + List.iter ( + fun wanted_field -> + if not (List.mem_assoc wanted_field fields) then + failwith (sprintf "%s: structure %s is missing required field %s" basename name wanted_field) + ) wanted_fields; + + Some (name, fields) + ) what in + + (basename, version, structures) + ) datas in + + (* If you're debugging, uncomment this to print out the parsed + * structures. + *) +(* + List.iter ( + fun (basename, version, structures) -> + printf "%s (version: %s):\n" basename version; + List.iter ( + fun (struct_name, fields) -> + printf " struct %s {\n" struct_name; + List.iter ( + fun (field_name, (typ, offset, size)) -> + (match typ with + | `Int -> printf " int %s; " field_name + | `Ptr struct_name -> + printf " struct %s *%s; " struct_name field_name + | `Str width -> + printf " char %s[%d]; " field_name width + ); + printf " /* offset = %d, size = %d */\n" offset size + ) fields; + printf " }\n\n"; + ) structures; + ) datas; +*) + + (* Let's generate some code! *) + + + () -- 1.8.3.1