(* Memory info for virtual domains. (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. http://libvirt.org/ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *) (* This program takes the kernel database (in kernels/ in toplevel directory) and generates parsing code for the various structures in the kernel that we are interested in. The output programs -- *.ml, *.mli files of generated code -- go into lib/ at the toplevel, eg. lib/kernel_task_struct.ml The stuff at the top of this file determine what structures and fields we try to parse. *) let what = [ "task_struct", ( "struct task_struct {", "};", true, [ "state"; "prio"; "normal_prio"; "static_prio"; "tasks'prev"; "tasks'next"; "mm"; "active_mm"; "comm"; "pid" ] ); (* "mm_struct", ( "struct mm_struct {", "};", true, [ ] ); *) "net_device", ( "struct net_device {", "};", true, [ "name"; "dev_addr" ] ); ] let debug = false open Camlp4.PreCast open Syntax (*open Ast*) open ExtList open ExtString open Printf let (//) = Filename.concat let () = let args = Array.to_list Sys.argv in let kernelsdir, outputdir = match args with | [_;kd;od] -> kd,od | _ -> let arg0 = Filename.basename Sys.executable_name in eprintf "%s - Turn kernels database into code modules. Usage: %s Example (from toplevel of virt-mem source tree): %s kernels/ lib/ " arg0 arg0 arg0; exit 2 in (* Get the *.info files from the kernels database. *) let infos = Sys.readdir kernelsdir in let infos = Array.to_list infos in let infos = List.filter (fun name -> String.ends_with name ".info") infos in let infos = List.map ( (//) kernelsdir) infos in (* Regular expressions. We really really should use ocaml-mikmatch ... *) let re_oldformat = Pcre.regexp "^RPM: \\d+: \\(build \\d+\\) ([-\\w]+) ([\\w.]+) ([\\w.]+) \\(.*?\\) (\\w+)" in let re_keyvalue = Pcre.regexp "^(\\w+): (.*)" in (* Parse in the *.info files. These have historically had a few different * formats that we need to support. *) let infos = List.map ( fun filename -> (* Get the basename (for getting the .data file later on). *) let basename = Filename.chop_suffix filename ".info" in let chan = open_in filename in let line = input_line chan in (* Kernel version string. *) let version, arch = if Pcre.pmatch ~rex:re_oldformat line then ( (* If the file starts with "RPM: \d+: ..." then it's the * original Fedora format. Everything in one line. *) let subs = Pcre.exec ~rex:re_oldformat line in (* let name = Pcre.get_substring subs 1 in *) let version = Pcre.get_substring subs 2 in let release = Pcre.get_substring subs 3 in let arch = Pcre.get_substring subs 4 in close_in chan; (* XXX Map name -> PAE, hugemem etc. *) (* name, *) sprintf "%s-%s.%s" version release arch, arch ) else ( (* New-style "key: value" entries, up to end of file or the first * blank line. *) let (*name,*) version, release, arch = (*ref "",*) ref "", ref "", ref "" in let rec loop line = try let subs = Pcre.exec ~rex:re_keyvalue line in let key = Pcre.get_substring subs 1 in let value = Pcre.get_substring subs 2 in (*if key = "Name" then name := value else*) if key = "Version" then version := value else if key = "Release" then release := value else if key = "Architecture" then arch := value; let line = input_line chan in loop line with Not_found | End_of_file -> close_in chan in loop line; let (*name,*) version, release, arch = (*!name,*) !version, !release, !arch in if (*name = "" ||*) version = "" || release = "" || arch = "" then failwith (sprintf "%s: missing Name, Version, Release or Architecture key" filename); (* XXX Map name -> PAE, hugemem etc. *) (* name, *) sprintf "%s-%s.%s" version release arch, arch ) in (*printf "%s -> %s %s\n%!" basename version arch;*) (basename, version, arch) ) infos in (* For quick access to the opener strings, build a hash. *) let openers = Hashtbl.create 13 in List.iter ( fun (name, (opener, closer, _, _)) -> Hashtbl.add openers opener (closer, name) ) what; (* Now read the data files and parse out the structures of interest. *) let datas = List.map ( fun (basename, version, arch) -> let file_exists name = try Unix.access name [Unix.F_OK]; true with Unix.Unix_error _ -> false in let close_process_in cmd chan = match Unix.close_process_in chan with | Unix.WEXITED 0 -> () | Unix.WEXITED i -> eprintf "%s: command exited with code %d\n" cmd i; exit i | Unix.WSIGNALED i -> eprintf "%s: command exited with signal %d\n" cmd i; exit 1 | Unix.WSTOPPED i -> eprintf "%s: command stopped by signal %d\n" cmd i; exit 1 in (* Open the data file, uncompressing it on the fly if necessary. *) let chan, close = if file_exists (basename ^ ".data") then open_in (basename ^ ".data"), close_in else if file_exists (basename ^ ".data.gz") then ( let cmd = sprintf "gzip -cd %s" (Filename.quote (basename ^ ".data.gz")) in Unix.open_process_in cmd, close_process_in cmd ) else if file_exists (basename ^ ".data.bz2") then ( let cmd = sprintf "bzip2 -cd %s" (Filename.quote (basename ^ ".data.bz2")) in Unix.open_process_in cmd, close_process_in cmd ) else failwith (sprintf "%s: cannot find corresponding data file" basename) in (* Read the data file in, looking for structures of interest to us. *) let bodies = Hashtbl.create 13 in let rec loop () = let line = input_line chan in (* If the line is an opener for one of the structures we * are looking for, then for now just save all the text until * we get to the closer line. *) (try let closer, name = Hashtbl.find openers line in let rec loop2 lines = let line = input_line chan in let lines = line :: lines in if String.starts_with line closer then List.rev lines else loop2 lines in let body = try loop2 [line] with End_of_file -> failwith (sprintf "%s: %s: %S not matched by closing %S" basename name line closer) in Hashtbl.replace bodies name body with Not_found -> ()); loop () in (try loop () with End_of_file -> ()); close chan; (* Make sure we got all the mandatory structures. *) List.iter ( fun (name, (_, _, mandatory, _)) -> if mandatory && not (Hashtbl.mem bodies name) then failwith (sprintf "%s: structure %s not found in this kernel" basename name) ) what; (basename, version, arch, bodies) ) infos in (* Now parse each structure body. * XXX This would be better as a proper lex/yacc parser. * XXX Even better would be to have a proper interface to libdwarves. *) let re_offsetsize = Pcre.regexp "/\\*\\s+(\\d+)\\s+(\\d+)\\s+\\*/" in let re_intfield = Pcre.regexp "int\\s+(\\w+);" in let re_ptrfield = Pcre.regexp "struct\\s+(\\w+)\\s*\\*\\s*(\\w+);" in let re_strfield = Pcre.regexp "char\\s+(\\w+)\\[(\\d+)\\];" in let re_structopener = Pcre.regexp "(struct|union)\\s+.*{$" in let re_structcloser = Pcre.regexp "}\\s*(\\w+)?(\\[\\d+\\])?;" in (* 'basename' is the source file, and second parameter ('body') is * the list of text lines which covers this structure (minus the * opener line). Result is the list of parsed fields from this * structure. *) let rec parse basename = function | [] -> assert false | [_] -> [] (* Just the closer line, finished. *) | line :: lines when Pcre.pmatch ~rex:re_structopener line -> (* Recursively parse a sub-structure. First search for the * corresponding closer line. *) let rec loop depth acc = function | [] -> eprintf "%s: %S has no matching close structure line\n%!" basename line; assert false | line :: lines when Pcre.pmatch ~rex:re_structopener line -> loop (depth+1) (line :: acc) lines | line :: lines when depth = 0 && Pcre.pmatch ~rex:re_structcloser line -> (line :: acc), lines | line :: lines when depth > 0 && Pcre.pmatch ~rex:re_structcloser line -> loop (depth-1) (line :: acc) lines | line :: lines -> loop depth (line :: acc) lines in let nested_body, rest = loop 0 [] lines in (* Then parse the sub-structure. *) let struct_name, nested_body = match nested_body with | [] -> assert false | closer :: _ -> let subs = Pcre.exec ~rex:re_structcloser closer in let struct_name = try Some (Pcre.get_substring subs 1) with Not_found -> None in struct_name, List.rev nested_body in let nested_fields = parse basename nested_body in (* Prefix the sub-fields with the name of the structure. *) let nested_fields = match struct_name with | None -> nested_fields | Some prefix -> List.map ( fun (name, details) -> (prefix ^ "'" ^ name, details) ) nested_fields in (* Parse the rest. *) nested_fields @ parse basename rest | line :: lines when Pcre.pmatch ~rex:re_intfield line -> (* An int field. *) let subs = Pcre.exec ~rex:re_intfield line in let name = Pcre.get_substring subs 1 in (try let subs = Pcre.exec ~rex:re_offsetsize line in let offset = int_of_string (Pcre.get_substring subs 1) in let size = int_of_string (Pcre.get_substring subs 2) in (name, (`Int, offset, size)) :: parse basename lines with Not_found -> parse basename lines ); | line :: lines when Pcre.pmatch ~rex:re_ptrfield line -> (* A pointer-to-struct field. *) let subs = Pcre.exec ~rex:re_ptrfield line in let struct_name = Pcre.get_substring subs 1 in let name = Pcre.get_substring subs 2 in (try let subs = Pcre.exec ~rex:re_offsetsize line in let offset = int_of_string (Pcre.get_substring subs 1) in let size = int_of_string (Pcre.get_substring subs 2) in (name, (`Ptr struct_name, offset, size)) :: parse basename lines with Not_found -> parse basename lines ); | line :: lines when Pcre.pmatch ~rex:re_strfield line -> (* A string (char array) field. *) let subs = Pcre.exec ~rex:re_strfield line in let name = Pcre.get_substring subs 1 in let width = int_of_string (Pcre.get_substring subs 2) in (try let subs = Pcre.exec ~rex:re_offsetsize line in let offset = int_of_string (Pcre.get_substring subs 1) in let size = int_of_string (Pcre.get_substring subs 2) in (name, (`Str width, offset, size)) :: parse basename lines with Not_found -> parse basename lines ); | _ :: lines -> (* Just ignore any other field we can't parse. *) parse basename lines in let datas = List.map ( fun (basename, version, arch, bodies) -> let structures = List.filter_map ( fun (struct_name, (_, _, _, wanted_fields)) -> let body = try Some (Hashtbl.find bodies struct_name) with Not_found -> None in match body with | None -> None | Some body -> let body = List.tl body in (* Don't care about opener line. *) let fields = parse basename body in (* Compute total size of the structure. *) let total_size = let fields = List.map ( fun (_, (_, offset, size)) -> offset + size ) fields in List.fold_left max 0 fields in (* That got us all the fields, but we only care about * the wanted_fields. *) let fields = List.filter ( fun (name, _) -> List.mem name wanted_fields ) fields in (* Also check we have all the wanted fields. *) List.iter ( fun wanted_field -> if not (List.mem_assoc wanted_field fields) then failwith (sprintf "%s: structure %s is missing required field %s" basename struct_name wanted_field) ) wanted_fields; (* Prefix all the field names with the structure name. *) let fields = List.map (fun (name, details) -> struct_name ^ "_" ^ name, details) fields in Some (struct_name, (fields, total_size)) ) what in (basename, version, arch, structures) ) datas in if debug then List.iter ( fun (basename, version, arch, structures) -> printf "%s (version: %s, arch: %s):\n" basename version arch; List.iter ( fun (struct_name, (fields, total_size)) -> printf " struct %s {\n" struct_name; List.iter ( fun (field_name, (typ, offset, size)) -> (match typ with | `Int -> printf " int %s; " field_name | `Ptr struct_name -> printf " struct %s *%s; " struct_name field_name | `Str width -> printf " char %s[%d]; " field_name width ); printf " /* offset = %d, size = %d */\n" offset size ) fields; printf " } /* %d bytes */\n\n" total_size; ) structures; ) datas; (* We'll generate a code file for each structure type (eg. task_struct * across all kernel versions), so rearrange 'datas' for that purpose. * * XXX This loop is O(n^3), luckily n is small! *) let files = List.map ( fun (name, _) -> let kernels = List.filter_map ( fun (basename, version, arch, structures) -> try Some (basename, version, arch, List.assoc name structures) with Not_found -> None ) datas in (* Sort the kernels, which makes the generated output more stable * and makes patches more useful. *) let kernels = List.sort kernels in name, kernels ) what in let datas = () in ignore datas; (* garbage collect *) (* Get just the field types. It's plausible that a field with the * same name has a different type between kernel versions, so we must * check that didn't happen. *) let files = List.map ( fun (struct_name, kernels) -> let field_types = match kernels with | [] -> [] | (_, _, _, (fields, _)) :: kernels -> let field_types_of_fields fields = List.sort ( List.map ( fun (field_name, (typ, _, _)) -> field_name, typ ) fields ) in let field_types = field_types_of_fields fields in List.iter ( fun (_, _, _, (fields, _)) -> if field_types <> field_types_of_fields fields then failwith (sprintf "%s: one of the structure fields changed type between kernel versions" struct_name) ) kernels; field_types in (struct_name, kernels, field_types) ) files in (* To minimize generated code size, we want to fold together all * structures where the particulars (eg. offsets, sizes, endianness) * of the fields we care about are the same -- eg. between kernel * versions which are very similar. *) let endian_of_architecture arch = if String.starts_with arch "i386" || String.starts_with arch "i486" || String.starts_with arch "i586" || String.starts_with arch "i686" || String.starts_with arch "x86_64" || String.starts_with arch "x86-64" then Bitstring.LittleEndian else if String.starts_with arch "ia64" then Bitstring.LittleEndian (* XXX usually? *) else if String.starts_with arch "ppc" then Bitstring.BigEndian else if String.starts_with arch "sparc" then Bitstring.BigEndian else failwith (sprintf "endian_of_architecture: cannot parse %S" arch) in let files = List.map ( fun (struct_name, kernels, field_types) -> let hash = Hashtbl.create 13 in let i = ref 0 in let xs = ref [] in let kernels = List.map ( fun (basename, version, arch, (fields, total_size)) -> let key = endian_of_architecture arch, fields in let j = try Hashtbl.find hash key with Not_found -> incr i; xs := (!i, key) :: !xs; Hashtbl.add hash key !i; !i in (basename, version, arch, total_size, j) ) kernels in let parsers = List.rev !xs in struct_name, kernels, field_types, parsers ) files in (* How much did we save by sharing? *) if debug then List.iter ( fun (struct_name, kernels, _, parsers) -> printf "struct %s:\n" struct_name; printf " number of kernel versions: %d\n" (List.length kernels); printf " number of parser functions needed after sharing: %d\n" (List.length parsers) ) files; (* Let's generate some code! *) let files = List.map ( fun (struct_name, kernels, field_types, parsers) -> (* Dummy location required - there are no real locations for * output files. *) let _loc = Loc.ghost in (* The structure type. *) let struct_type, struct_sig = let fields = List.map ( function | (name, `Int) -> <:ctyp< $lid:name$ : int64 >> | (name, `Ptr _) -> <:ctyp< $lid:name$ : Virt_mem_mmap.addr >> | (name, `Str _) -> <:ctyp< $lid:name$ : string >> ) field_types in let f, fs = match fields with | [] -> failwith (sprintf "%s: structure has no fields" struct_name) | f :: fs -> f, fs in let fields = List.fold_left ( fun fs f -> <:ctyp< $fs$ ; $f$ >> ) f fs in let struct_type = <:str_item< type t = { $fields$ } >> in let struct_sig = <:sig_item< type t = { $fields$ } >> in struct_type, struct_sig in (* The shared parser functions. * * We could include bitmatch statements directly in here, but * what happens is that the macros get expanded here, resulting * in (even more) unreadable generated code. So instead just * do a textual substitution later by post-processing the * generated files. Not type-safe, but we can't have * everything. *) let parser_stmts, parser_subs = let parser_stmts = List.map ( fun (i, _) -> let fnname = sprintf "parser_%d" i in <:str_item< let $lid:fnname$ bits = $str:fnname$ >> ) parsers in let parser_stmts = match parser_stmts with | [] -> <:str_item< >> | p :: ps -> List.fold_left (fun ps p -> <:str_item< $ps$ $p$ >>) p ps in (* What gets substituted for "parser_NN" ... *) let parser_subs = List.map ( fun (i, (endian, fields)) -> let fnname = sprintf "parser_%d" i in let endian = match endian with | Bitstring.LittleEndian -> "littleendian" | Bitstring.BigEndian -> "bigendian" | _ -> assert false in let patterns = (* Fields must be sorted by offset, otherwise bitmatch * will complain. *) let cmp (_, (_, o1, _)) (_, (_, o2, _)) = compare o1 o2 in let fields = List.sort ~cmp fields in String.concat ";\n " ( List.map ( function | (field_name, (`Int, offset, size)) | (field_name, (`Ptr _, offset, size)) -> (* 'zero+' is a hack to force the type to int64. *) sprintf "%s : zero+%d : offset(%d), %s" field_name (size*8) (offset*8) endian | (field_name, (`Str width, offset, size)) -> sprintf "%s : %d : offset(%d), string" field_name (width*8) (offset*8) ) fields ) in let assignments = String.concat ";\n " ( List.map ( function | (field_name, (`Ptr "list_head", offset, size)) -> sprintf "%s = Int64.sub %s %dL" field_name field_name offset | (field_name, _) -> sprintf "%s = %s" field_name field_name ) fields ) in let sub = sprintf " bitmatch bits with | { %s } -> { %s } | { _ } -> raise (ParseError (struct_name, %S, match_err))" patterns assignments fnname in fnname, sub ) parsers in parser_stmts, parser_subs in (* Define a map from kernel versions to parsing functions. *) let version_map = let stmts = List.fold_left ( fun stmts (_, version, arch, total_size, i) -> let parserfn = sprintf "parser_%d" i in <:str_item< $stmts$ let v = ($lid:parserfn$, $`int:total_size$) let map = StringMap.add $str:version$ v map >> ) <:str_item< let map = StringMap.empty >> kernels in <:str_item< module StringMap = Map.Make (String) ;; $stmts$ >> in (* Code (.ml file). *) let code = <:str_item< let warning = "This code is automatically generated from the kernel database by kerneldb-to-parser program. Any edits you make will be lost." let zero = 0 let struct_name = $str:struct_name$ let match_err = "failed to match kernel structure" exception ParseError of string * string * string;; $struct_type$ $parser_stmts$ $version_map$ type kernel_version = string let $lid:struct_name^"_known"$ version = StringMap.mem version map let $lid:struct_name^"_size"$ version = let _, size = StringMap.find version map in size let $lid:struct_name^"_of_bits"$ version bits = let parsefn, _ = StringMap.find version map in parsefn bits let $lid:"get_"^struct_name$ version mem addr = let parsefn, size = StringMap.find version map in let bytes = Virt_mem_mmap.get_bytes mem addr size in let bits = Bitstring.bitstring_of_string bytes in parsefn bits >> in (* Interface (.mli file). *) let interface = <:sig_item< exception ParseError of string * string * string;; $struct_sig$ val struct_name : string type kernel_version = string val $lid:struct_name^"_known"$ : kernel_version -> bool val $lid:struct_name^"_size"$ : kernel_version -> int val $lid:struct_name^"_of_bits"$ : kernel_version -> Bitstring.bitstring -> t val $lid:"get_"^struct_name$ : kernel_version -> ('a, 'b, [`HasMapping]) Virt_mem_mmap.t -> Virt_mem_mmap.addr -> t >> in (struct_name, code, interface, parser_subs) ) files in (* Finally generate the output files. *) let re_subst = Pcre.regexp "^(.*)\"(parser_\\d+)\"(.*)$" in List.iter ( fun (struct_name, code, interface, parser_subs) -> (* Interface (.mli file). *) let output_file = outputdir // "kernel_" ^ struct_name ^ ".mli" in printf "Writing %s interface to %s ...\n%!" struct_name output_file; Printers.OCaml.print_interf ~output_file interface; (* Implementation (.ml file). *) let output_file = outputdir // "kernel_" ^ struct_name ^ ".ml" in printf "Writing %s implementation to %s ...\n%!" struct_name output_file; let new_output_file = output_file ^ ".new" in Printers.OCaml.print_implem ~output_file:new_output_file code; (* Substitute the parser bodies in the output file. *) let ichan = open_in new_output_file in let ochan = open_out output_file in let rec loop () = let line = input_line ichan in let line = if Pcre.pmatch ~rex:re_subst line then ( let subs = Pcre.exec ~rex:re_subst line in let start = Pcre.get_substring subs 1 in let template = Pcre.get_substring subs 2 in let rest = Pcre.get_substring subs 3 in let sub = List.assoc template parser_subs in start ^ sub ^ rest ) else line in output_string ochan line; output_char ochan '\n'; loop () in (try loop () with End_of_file -> ()); close_out ochan; close_in ichan; Unix.unlink new_output_file ) files