X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=extract%2Fcodegen%2Fkerneldb_to_parser.ml;h=177d607a5d18fb36ee3fa0ac9d617d65b8996dcb;hb=c15c1624692d506eefe5f2cb2d775a6fb9127589;hp=f94de2f72b3457324fc004a91ef79b945ce53ed1;hpb=e49f9de0ae3883b7dc1f3905972f98a44417bf8f;p=virt-mem.git diff --git a/extract/codegen/kerneldb_to_parser.ml b/extract/codegen/kerneldb_to_parser.ml index f94de2f..177d607 100644 --- a/extract/codegen/kerneldb_to_parser.ml +++ b/extract/codegen/kerneldb_to_parser.ml @@ -36,22 +36,35 @@ type struct_t = { } and field_t = { mandatory_field : bool; (* Is this field mandatory? *) + list_head_adjustment : bool; (* Only applies if the field points to a + * struct list_head: If true, then we do the + * list_head adjustment, so the field points + * to the start of the structure. If false, + * leave the pointer intact. The list_head + * adjustment only works if the list_head + * is in the same type of structure. + *) } +let ordinary_field = { mandatory_field = true; list_head_adjustment = true; } + +(*---------------------------------------------------------------------- + * This controls what structures & fields we will parse out. + *----------------------------------------------------------------------*) let structs = [ "task_struct", { opener = "struct task_struct {"; closer = "};"; mandatory_struct = true; fields = [ - "state", { mandatory_field = true }; - "prio", { mandatory_field = true }; - "normal_prio", { mandatory_field = true }; - "static_prio", { mandatory_field = true }; - "tasks'prev", { mandatory_field = true }; - "tasks'next", { mandatory_field = true }; - "mm", { mandatory_field = true }; - "active_mm", { mandatory_field = true }; - "comm", { mandatory_field = true }; - "pid", { mandatory_field = true }; + "state", ordinary_field; + "prio", ordinary_field; + "normal_prio", ordinary_field; + "static_prio", ordinary_field; + "tasks'prev", ordinary_field; + "tasks'next", ordinary_field; + "mm", ordinary_field; + "active_mm", ordinary_field; + "comm", ordinary_field; + "pid", ordinary_field; ] }; (* @@ -63,22 +76,62 @@ let structs = [ "net_device", { opener = "struct net_device {"; closer = "};"; mandatory_struct = true; fields = [ - "dev_list'prev", { mandatory_field = false }; - "dev_list'next", { mandatory_field = false }; - "next", { mandatory_field = false }; - "name", { mandatory_field = true }; - "dev_addr", { mandatory_field = true }; + "dev_list'prev", { mandatory_field = false; list_head_adjustment = true }; + "dev_list'next", { mandatory_field = false; list_head_adjustment = true }; + "next", { mandatory_field = false; list_head_adjustment = true }; + "name", ordinary_field; + "flags", ordinary_field; + "operstate", ordinary_field; + "mtu", ordinary_field; + "perm_addr", ordinary_field; + "addr_len", ordinary_field; + "ip_ptr", ordinary_field; + "ip6_ptr", ordinary_field; ] }; "net", { opener = "struct net {"; closer = "};"; mandatory_struct = false; fields = [ - "dev_base_head'next", { mandatory_field = true }; + "dev_base_head'next", + (* Don't do list_head adjustment on this field, because it points + * to a net_device struct. + *) + { mandatory_field = true; list_head_adjustment = false }; ] }; + "in_device", { + opener = "struct in_device {"; closer = "};"; mandatory_struct = true; + fields = [ + "ifa_list", ordinary_field; + ]; + }; + "inet6_dev", { + opener = "struct inet6_dev {"; closer = "};"; mandatory_struct = true; + fields = [ + "addr_list", ordinary_field; + ]; + }; + "in_ifaddr", { + opener = "struct in_ifaddr {"; closer = "};"; mandatory_struct = true; + fields = [ + "ifa_next", ordinary_field; + "ifa_local", ordinary_field; + "ifa_address", ordinary_field; + "ifa_mask", ordinary_field; + "ifa_broadcast", ordinary_field; + ]; + }; + "inet6_ifaddr", { + opener = "struct inet6_ifaddr {"; closer = "};"; mandatory_struct = true; + fields = [ + (*"addr'in6_u'u6_addr8", ordinary_field;*) + "prefix_len", ordinary_field; + "lst_next", ordinary_field; + ]; + }; ] -let debug = false +let debug = true open Camlp4.PreCast open Syntax @@ -88,6 +141,8 @@ open ExtList open ExtString open Printf +module PP = Pahole_parser + let (//) = Filename.concat (* Couple of handy camlp4 construction functions which do some @@ -158,341 +213,83 @@ Example (from toplevel of virt-mem source tree): " arg0 arg0 arg0; exit 2 in - (* Get the *.info files from the kernels database. *) - let infos = Sys.readdir kernelsdir in - let infos = Array.to_list infos in - let infos = List.filter (fun name -> String.ends_with name ".info") infos in - let infos = List.map ( (//) kernelsdir) infos in - - (* Regular expressions. We really really should use ocaml-mikmatch ... *) - let re_oldformat = Pcre.regexp "^RPM: \\d+: \\(build \\d+\\) ([-\\w]+) ([\\w.]+) ([\\w.]+) \\(.*?\\) (\\w+)" in - let re_keyvalue = Pcre.regexp "^(\\w+): (.*)" in + let kernels = PP.list_kernels kernelsdir in + let nr_kernels = List.length kernels in - (* Parse in the *.info files. These have historically had a few different - * formats that we need to support. - *) - let infos = List.map ( - fun filename -> - (* Get the basename (for getting the .data file later on). *) - let basename = Filename.chop_suffix filename ".info" in - - let chan = open_in filename in - let line = input_line chan in - - (* Kernel version string. *) - let version, arch = - if Pcre.pmatch ~rex:re_oldformat line then ( - (* If the file starts with "RPM: \d+: ..." then it's the - * original Fedora format. Everything in one line. - *) - let subs = Pcre.exec ~rex:re_oldformat line in - (* let name = Pcre.get_substring subs 1 in *) - let version = Pcre.get_substring subs 2 in - let release = Pcre.get_substring subs 3 in - let arch = Pcre.get_substring subs 4 in - close_in chan; - (* XXX Map name -> PAE, hugemem etc. *) - (* name, *) sprintf "%s-%s.%s" version release arch, arch - ) else ( - (* New-style "key: value" entries, up to end of file or the first - * blank line. - *) - let (*name,*) version, release, arch = - (*ref "",*) ref "", ref "", ref "" in - let rec loop line = - try - let subs = Pcre.exec ~rex:re_keyvalue line in - let key = Pcre.get_substring subs 1 in - let value = Pcre.get_substring subs 2 in - (*if key = "Name" then name := value - else*) if key = "Version" then version := value - else if key = "Release" then release := value - else if key = "Architecture" then arch := value; - let line = input_line chan in - loop line - with - Not_found | End_of_file -> - close_in chan - in - loop line; - let (*name,*) version, release, arch = - (*!name,*) !version, !release, !arch in - if (*name = "" ||*) version = "" || release = "" || arch = "" then - failwith (sprintf "%s: missing Name, Version, Release or Architecture key" filename); - (* XXX Map name -> PAE, hugemem etc. *) - (* name, *) sprintf "%s-%s.%s" version release arch, arch - ) in - - (*printf "%s -> %s %s\n%!" basename version arch;*) - - (basename, version, arch) - ) infos in - - let nr_kernels = List.length infos in - - (* For quick access to the opener strings, build a hash. *) - let openers = Hashtbl.create 13 in - List.iter ( - fun (name, { opener = opener; closer = closer }) -> - Hashtbl.add openers opener (closer, name) - ) structs; - - (* Now read the data files and parse out the structures of interest. *) let kernels = List.mapi ( - fun i (basename, version, arch) -> + fun i info -> printf "Loading kernel data file %d/%d\r%!" (i+1) nr_kernels; - let file_exists name = - try Unix.access name [Unix.F_OK]; true - with Unix.Unix_error _ -> false - in - let close_process_in cmd chan = - match Unix.close_process_in chan with - | Unix.WEXITED 0 -> () - | Unix.WEXITED i -> - eprintf "%s: command exited with code %d\n" cmd i; exit i - | Unix.WSIGNALED i -> - eprintf "%s: command exited with signal %d\n" cmd i; exit 1 - | Unix.WSTOPPED i -> - eprintf "%s: command stopped by signal %d\n" cmd i; exit 1 - in - - (* Open the data file, uncompressing it on the fly if necessary. *) - let chan, close = - if file_exists (basename ^ ".data") then - open_in (basename ^ ".data"), close_in - else if file_exists (basename ^ ".data.gz") then ( - let cmd = - sprintf "gzip -cd %s" (Filename.quote (basename ^ ".data.gz")) in - Unix.open_process_in cmd, close_process_in cmd - ) - else if file_exists (basename ^ ".data.bz2") then ( - let cmd = - sprintf "bzip2 -cd %s" (Filename.quote (basename ^ ".data.bz2")) in - Unix.open_process_in cmd, close_process_in cmd - ) else - failwith - (sprintf "%s: cannot find corresponding data file" basename) in - - (* Read the data file in, looking for structures of interest to us. *) - let bodies = Hashtbl.create 13 in - let rec loop () = - let line = input_line chan in - - (* If the line is an opener for one of the structures we - * are looking for, then for now just save all the text until - * we get to the closer line. - *) - (try - let closer, name = Hashtbl.find openers line in - let rec loop2 lines = - let line = input_line chan in - let lines = line :: lines in - if String.starts_with line closer then List.rev lines - else loop2 lines - in - - let body = - try loop2 [line] - with End_of_file -> - failwith (sprintf "%s: %s: %S not matched by closing %S" basename name line closer) in - - Hashtbl.replace bodies name body - with Not_found -> ()); + let struct_names = List.map fst structs in + let structures = PP.load_structures info struct_names in - loop () - in - (try loop () with End_of_file -> ()); - - close chan; - - (* Make sure we got all the mandatory structures. *) + (* Make sure we got all the mandatory structures & fields. *) List.iter ( - fun (name, { mandatory_struct = mandatory }) -> - if mandatory && not (Hashtbl.mem bodies name) then - failwith (sprintf "%s: structure %s not found in this kernel" basename name) - ) structs; - - (basename, version, arch, bodies) - ) infos in - - (* Now parse each structure body. - * XXX This would be better as a proper lex/yacc parser. - * XXX Even better would be to have a proper interface to libdwarves. - *) - let re_offsetsize = Pcre.regexp "/\\*\\s+(\\d+)\\s+(\\d+)\\s+\\*/" in - let re_intfield = Pcre.regexp "int\\s+(\\w+);" in - let re_ptrfield = Pcre.regexp "struct\\s+(\\w+)\\s*\\*\\s*(\\w+);" in - let re_strfield = Pcre.regexp "char\\s+(\\w+)\\[(\\d+)\\];" in - let re_structopener = Pcre.regexp "(struct|union)\\s+.*{$" in - let re_structcloser = Pcre.regexp "}\\s*(\\w+)?(\\[\\d+\\])?;" in - - (* 'basename' is the source file, and second parameter ('body') is - * the list of text lines which covers this structure (minus the - * opener line). Result is the list of parsed fields from this - * structure. - *) - let rec parse basename = function - | [] -> assert false - | [_] -> [] (* Just the closer line, finished. *) - | line :: lines when Pcre.pmatch ~rex:re_structopener line -> - (* Recursively parse a sub-structure. First search for the - * corresponding closer line. - *) - let rec loop depth acc = function - | [] -> - eprintf "%s: %S has no matching close structure line\n%!" - basename line; - assert false - | line :: lines when Pcre.pmatch ~rex:re_structopener line -> - loop (depth+1) (line :: acc) lines - | line :: lines - when depth = 0 && Pcre.pmatch ~rex:re_structcloser line -> - (line :: acc), lines - | line :: lines - when depth > 0 && Pcre.pmatch ~rex:re_structcloser line -> - loop (depth-1) (line :: acc) lines - | line :: lines -> loop depth (line :: acc) lines - in - let nested_body, rest = loop 0 [] lines in - - (* Then parse the sub-structure. *) - let struct_name, nested_body = - match nested_body with - | [] -> assert false - | closer :: _ -> - let subs = Pcre.exec ~rex:re_structcloser closer in - let struct_name = - try Some (Pcre.get_substring subs 1) with Not_found -> None in - struct_name, List.rev nested_body in - let nested_fields = parse basename nested_body in - - (* Prefix the sub-fields with the name of the structure. *) - let nested_fields = - match struct_name with - | None -> nested_fields - | Some prefix -> - List.map ( - fun (name, details) -> (prefix ^ "'" ^ name, details) - ) nested_fields in - - (* Parse the rest. *) - nested_fields @ parse basename rest - - | line :: lines when Pcre.pmatch ~rex:re_intfield line -> - (* An int field. *) - let subs = Pcre.exec ~rex:re_intfield line in - let name = Pcre.get_substring subs 1 in - (try - let subs = Pcre.exec ~rex:re_offsetsize line in - let offset = int_of_string (Pcre.get_substring subs 1) in - let size = int_of_string (Pcre.get_substring subs 2) in - (name, (`Int, offset, size)) :: parse basename lines - with - Not_found -> parse basename lines - ); - - | line :: lines when Pcre.pmatch ~rex:re_ptrfield line -> - (* A pointer-to-struct field. *) - let subs = Pcre.exec ~rex:re_ptrfield line in - let struct_name = Pcre.get_substring subs 1 in - let name = Pcre.get_substring subs 2 in - (try - let subs = Pcre.exec ~rex:re_offsetsize line in - let offset = int_of_string (Pcre.get_substring subs 1) in - let size = int_of_string (Pcre.get_substring subs 2) in - (name, (`Ptr struct_name, offset, size)) - :: parse basename lines - with - Not_found -> parse basename lines - ); - - | line :: lines when Pcre.pmatch ~rex:re_strfield line -> - (* A string (char array) field. *) - let subs = Pcre.exec ~rex:re_strfield line in - let name = Pcre.get_substring subs 1 in - let width = int_of_string (Pcre.get_substring subs 2) in - (try - let subs = Pcre.exec ~rex:re_offsetsize line in - let offset = int_of_string (Pcre.get_substring subs 1) in - let size = int_of_string (Pcre.get_substring subs 2) in - (name, (`Str width, offset, size)) - :: parse basename lines - with - Not_found -> parse basename lines - ); - - | _ :: lines -> - (* Just ignore any other field we can't parse. *) - parse basename lines - - in + fun (struct_name, + { mandatory_struct = mandatory; fields = wanted_fields }) -> + try + let s = + List.find (fun s -> struct_name = s.PP.struct_name) + structures in + + (* Check we have all the mandatory fields. *) + let all_fields = s.PP.struct_fields in + List.iter ( + fun (wanted_field, { mandatory_field = mandatory }) -> + let got_it = + List.exists ( + fun { PP.field_name = name } -> name = wanted_field + ) all_fields in + if mandatory && not got_it then ( + eprintf "%s: structure %s is missing required field %s\n" + info.PP.basename struct_name wanted_field; + eprintf "fields found in this structure:\n"; + List.iter ( + fun { PP.field_name = name } -> eprintf "\t%s\n" name + ) all_fields; + exit 1 + ); + ) wanted_fields - let kernels = List.map ( - fun (basename, version, arch, bodies) -> - let structures = List.filter_map ( - fun (struct_name, { fields = wanted_fields }) -> - let body = - try Some (Hashtbl.find bodies struct_name) - with Not_found -> None in - match body with - | None -> None - | Some body -> - let body = List.tl body in (* Don't care about opener line. *) - let fields = parse basename body in - - (* Compute total size of the structure. *) - let total_size = - let fields = List.map ( - fun (_, (_, offset, size)) -> offset + size - ) fields in - List.fold_left max 0 fields in + with Not_found -> + if mandatory then + failwith (sprintf "%s: structure %s not found in this kernel" + info.PP.basename struct_name) + ) structs; - (* That got us all the fields, but we only care about - * the wanted_fields. - *) - let fields = List.filter ( - fun (name, _) -> List.mem_assoc name wanted_fields + let structures = + List.map ( + fun ({ PP.struct_name = struct_name; PP.struct_fields = fields } + as structure) -> + let { fields = wanted_fields } = List.assoc struct_name structs in + + (* That got us all the fields, but we only care about + * the wanted_fields. + *) + let fields = List.filter ( + fun { PP.field_name = name } -> List.mem_assoc name wanted_fields + ) fields in + + (* Prefix all the field names with the structure name. *) + let fields = + List.map ( + fun ({ PP.field_name = name } as field) -> + let name = struct_name ^ "_" ^ name in + { field with PP.field_name = name } ) fields in + { structure with PP.struct_fields = fields } + ) structures in - (* Also check we have all the mandatory fields. *) - List.iter ( - fun (wanted_field, { mandatory_field = mandatory }) -> - if mandatory && not (List.mem_assoc wanted_field fields) then - failwith (sprintf "%s: structure %s is missing required field %s" basename struct_name wanted_field) - ) wanted_fields; - - (* Prefix all the field names with the structure name. *) - let fields = - List.map (fun (name, details) -> - struct_name ^ "_" ^ name, details) fields in - - Some (struct_name, (fields, total_size)) - ) structs in - - (basename, version, arch, structures) + (info, structures) ) kernels in if debug then List.iter ( - fun (basename, version, arch, structures) -> - printf "%s (version: %s, arch: %s):\n" basename version arch; + fun (info, structures) -> + printf "%s ----------\n" (PP.string_of_info info); List.iter ( - fun (struct_name, (fields, total_size)) -> - printf " struct %s {\n" struct_name; - List.iter ( - fun (field_name, (typ, offset, size)) -> - (match typ with - | `Int -> - printf " int %s; " field_name - | `Ptr struct_name -> - printf " struct %s *%s; " struct_name field_name - | `Str width -> - printf " char %s[%d]; " field_name width - ); - printf " /* offset = %d, size = %d */\n" offset size - ) fields; - printf " } /* %d bytes */\n\n" total_size; + fun structure -> + printf "%s\n\n" (PP.string_of_structure structure); ) structures; ) kernels; @@ -502,7 +299,9 @@ Example (from toplevel of virt-mem source tree): let () = let _loc = Loc.ghost in - let versions = List.map (fun (_, version, _, _) -> version) kernels in + let versions = List.map ( + fun ({ PP.kernel_version = version }, _) -> version + ) kernels in (* Sort them in reverse because we are going to generate the * final list in reverse. @@ -529,12 +328,18 @@ Example (from toplevel of virt-mem source tree): *) let files = List.map ( - fun (name, _) -> + fun (struct_name, _) -> let kernels = List.filter_map ( - fun (basename, version, arch, structures) -> - try Some (basename, version, arch, List.assoc name structures) - with Not_found -> None + fun (info, structures) -> + try + let structure = + List.find ( + fun { PP.struct_name = name } -> name = struct_name + ) structures in + Some (info, structure) + with Not_found -> + None ) kernels in (* Sort the kernels, which makes the generated output more stable @@ -542,11 +347,12 @@ Example (from toplevel of virt-mem source tree): *) let kernels = List.sort kernels in - name, kernels + struct_name, kernels ) structs in let kernels = () in ignore kernels; (* garbage collect *) +(* (* Get just the field types. * * It's plausible that a field with the same name has a different @@ -585,16 +391,16 @@ Example (from toplevel of virt-mem source tree): (* Now get a type for each structure field. *) List.filter_map ( - fun (field_name, { mandatory_field = mandatory }) -> + fun (field_name, ft) -> try let field_name = struct_name ^ "_" ^ field_name in let typ = Hashtbl.find hash field_name in - Some (field_name, (typ, mandatory)) + Some (field_name, (typ, ft)) with Not_found -> let msg = sprintf "%s.%s: this field was not found in any kernel version" struct_name field_name in - if mandatory then failwith msg else prerr_endline msg; + if ft.mandatory_field then failwith msg else prerr_endline msg; None ) struct_fields in (struct_name, kernels, field_types) @@ -687,17 +493,17 @@ Example (from toplevel of virt-mem source tree): let struct_type, struct_sig = let fields = List.map ( function - | (name, (`Int, true)) -> + | (name, (`Int, { mandatory_field = true })) -> <:ctyp< $lid:name$ : int64 >> - | (name, (`Int, false)) -> + | (name, (`Int, { mandatory_field = false })) -> <:ctyp< $lid:name$ : int64 option >> - | (name, (`Ptr _, true)) -> + | (name, ((`VoidPtr|`Ptr _), { mandatory_field = true })) -> <:ctyp< $lid:name$ : Virt_mem_mmap.addr >> - | (name, (`Ptr _, false)) -> + | (name, ((`VoidPtr|`Ptr _), { mandatory_field = false })) -> <:ctyp< $lid:name$ : Virt_mem_mmap.addr option >> - | (name, (`Str _, true)) -> + | (name, (`Str _, { mandatory_field = true })) -> <:ctyp< $lid:name$ : string >> - | (name, (`Str _, false)) -> + | (name, (`Str _, { mandatory_field = false })) -> <:ctyp< $lid:name$ : string option >> ) field_types in let fields = concat_record_fields _loc fields in @@ -790,8 +596,7 @@ Example (from toplevel of virt-mem source tree): String.concat ";\n " ( List.map ( function - | (field_name, (`Int, offset, size)) - | (field_name, (`Ptr _, offset, size)) -> + | (field_name, ((`Int|`Ptr _|`VoidPtr), offset, size)) -> (* 'zero+' is a hack to force the type to int64. *) sprintf "%s : zero+%d : offset(%d), %s" field_name (size*8) (offset*8) endian @@ -803,21 +608,22 @@ Example (from toplevel of virt-mem source tree): let assignments = List.map ( fun (field_name, typ) -> - let (_, mandatory) = + let (_, { mandatory_field = mandatory; + list_head_adjustment = list_head_adjustment }) = try List.assoc field_name field_types with Not_found -> failwith (sprintf "%s: not found in field_types" field_name) in - match typ, mandatory with - | (`Ptr "list_head", offset, size), true -> + match typ, mandatory, list_head_adjustment with + | (`Ptr "list_head", offset, size), true, true -> sprintf "%s = Int64.sub %s %dL" field_name field_name offset - | (`Ptr "list_head", offset, size), false -> + | (`Ptr "list_head", offset, size), false, true -> sprintf "%s = Some (Int64.sub %s %dL)" field_name field_name offset - | _, true -> + | _, true, _ -> sprintf "%s = %s" field_name field_name - | _, false -> + | _, false, _ -> sprintf "%s = Some %s" field_name field_name ) fields in let assignments_not_present = @@ -985,3 +791,4 @@ Example (from toplevel of virt-mem source tree): Unix.unlink new_output_file ) files +*)