(* Memory info for virtual domains. (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. http://libvirt.org/ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *) (* This program takes the kernel database (in kernels/ in toplevel directory) and generates parsing code for the various structures in the kernel that we are interested in. The output programs -- *.ml, *.mli files of generated code -- go into lib/ at the toplevel, eg. lib/kernel_task_struct.ml The stuff at the top of this file determine what structures and fields we try to parse. *) let what = [ "task_struct", ( "struct task_struct {", "};", true, [ "state"; "prio"; "normal_prio"; "static_prio"; "tasks'prev"; "tasks'next"; "comm"] ); (* "mm_struct", ( "struct mm_struct {", "};", true, [ ] ); *) "net_device", ( "struct net_device {", "};", true, [ "name"; "dev_addr" ] ); ] let debug = true open Camlp4.PreCast open Syntax (*open Ast*) open ExtList open ExtString open Printf let (//) = Filename.concat let () = let args = Array.to_list Sys.argv in let kernelsdir, outputdir = match args with | [_;kd;od] -> kd,od | _ -> let arg0 = Filename.basename Sys.executable_name in eprintf "%s - Turn kernels database into code modules. Usage: %s Example (from toplevel of virt-mem source tree): %s kernels/ lib/ " arg0 arg0 arg0; exit 2 in (* Get the *.info files from the kernels database. *) let infos = Sys.readdir kernelsdir in let infos = Array.to_list infos in let infos = List.filter (fun name -> String.ends_with name ".info") infos in let infos = List.map ((//) kernelsdir) infos in (* Regular expressions. We really really should use ocaml-mikmatch ... *) let re_oldformat = Pcre.regexp "^RPM: \\d+: \\(build \\d+\\) ([-\\w]+) ([\\w.]+) ([\\w.]+) \\(.*?\\) (\\w+)" in let re_keyvalue = Pcre.regexp "^(\\w+): (.*)" in (* Parse in the *.info files. These have historically had a few different * formats that we need to support. *) let infos = List.map ( fun filename -> (* Get the basename (for getting the .data file later on). *) let basename = Filename.chop_suffix filename ".info" in let chan = open_in filename in let line = input_line chan in (* Kernel version string. *) let version, arch = if Pcre.pmatch ~rex:re_oldformat line then ( (* If the file starts with "RPM: \d+: ..." then it's the * original Fedora format. Everything in one line. *) let subs = Pcre.exec ~rex:re_oldformat line in (* let name = Pcre.get_substring subs 1 in *) let version = Pcre.get_substring subs 2 in let release = Pcre.get_substring subs 3 in let arch = Pcre.get_substring subs 4 in close_in chan; (* XXX Map name -> PAE, hugemem etc. *) (* name, *) sprintf "%s-%s.%s" version release arch, arch ) else ( (* New-style "key: value" entries, up to end of file or the first * blank line. *) let (*name,*) version, release, arch = (*ref "",*) ref "", ref "", ref "" in let rec loop line = try let subs = Pcre.exec ~rex:re_keyvalue line in let key = Pcre.get_substring subs 1 in let value = Pcre.get_substring subs 2 in (*if key = "Name" then name := value else*) if key = "Version" then version := value else if key = "Release" then release := value else if key = "Architecture" then arch := value; let line = input_line chan in loop line with Not_found | End_of_file -> close_in chan in loop line; let (*name,*) version, release, arch = (*!name,*) !version, !release, !arch in if (*name = "" ||*) version = "" || release = "" || arch = "" then failwith (sprintf "%s: missing Name, Version, Release or Architecture key" filename); (* XXX Map name -> PAE, hugemem etc. *) (* name, *) sprintf "%s-%s.%s" version release arch, arch ) in (*printf "%s -> %s %s\n%!" basename version arch;*) (basename, version, arch) ) infos in (* For quick access to the opener strings, build a hash. *) let openers = Hashtbl.create 13 in List.iter ( fun (name, (opener, closer, _, _)) -> Hashtbl.add openers opener (closer, name) ) what; (* Now read the data files and parse out the structures of interest. *) let datas = List.map ( fun (basename, version, arch) -> let file_exists name = try Unix.access name [Unix.F_OK]; true with Unix.Unix_error _ -> false in let close_process_in cmd chan = match Unix.close_process_in chan with | Unix.WEXITED 0 -> () | Unix.WEXITED i -> eprintf "%s: command exited with code %d\n" cmd i; exit i | Unix.WSIGNALED i -> eprintf "%s: command exited with signal %d\n" cmd i; exit 1 | Unix.WSTOPPED i -> eprintf "%s: command stopped by signal %d\n" cmd i; exit 1 in (* Open the data file, uncompressing it on the fly if necessary. *) let chan, close = if file_exists (basename ^ ".data") then open_in (basename ^ ".data"), close_in else if file_exists (basename ^ ".data.gz") then ( let cmd = sprintf "gzip -cd %s" (Filename.quote (basename ^ ".data.gz")) in Unix.open_process_in cmd, close_process_in cmd ) else if file_exists (basename ^ ".data.bz2") then ( let cmd = sprintf "bzip2 -cd %s" (Filename.quote (basename ^ ".data.bz2")) in Unix.open_process_in cmd, close_process_in cmd ) else failwith (sprintf "%s: cannot find corresponding data file" basename) in (* Read the data file in, looking for structures of interest to us. *) let bodies = Hashtbl.create 13 in let rec loop () = let line = input_line chan in (* If the line is an opener for one of the structures we * are looking for, then for now just save all the text until * we get to the closer line. *) (try let closer, name = Hashtbl.find openers line in let rec loop2 lines = let line = input_line chan in let lines = line :: lines in if String.starts_with line closer then List.rev lines else loop2 lines in let body = try loop2 [line] with End_of_file -> failwith (sprintf "%s: %s: %S not matched by closing %S" basename name line closer) in Hashtbl.replace bodies name body with Not_found -> ()); loop () in (try loop () with End_of_file -> ()); close chan; (* Make sure we got all the mandatory structures. *) List.iter ( fun (name, (_, _, mandatory, _)) -> if mandatory && not (Hashtbl.mem bodies name) then failwith (sprintf "%s: structure %s not found in this kernel" basename name) ) what; (basename, version, arch, bodies) ) infos in (* Now parse each structure body. * XXX This would be better as a proper lex/yacc parser. * XXX Even better would be to have a proper interface to libdwarves. *) let re_offsetsize = Pcre.regexp "/\\*\\s+(\\d+)\\s+(\\d+)\\s+\\*/" in let re_intfield = Pcre.regexp "int\\s+(\\w+);" in let re_ptrfield = Pcre.regexp "struct\\s+(\\w+)\\s*\\*\\s*(\\w+);" in let re_strfield = Pcre.regexp "char\\s+(\\w+)\\[(\\d+)\\];" in let re_structopener = Pcre.regexp "(struct|union)\\s+.*{$" in let re_structcloser = Pcre.regexp "}\\s*(\\w+)?(\\[\\d+\\])?;" in (* 'basename' is the source file, and second parameter ('body') is * the list of text lines which covers this structure (minus the * opener line). Result is the list of parsed fields from this * structure. *) let rec parse basename = function | [] -> assert false | [_] -> [] (* Just the closer line, finished. *) | line :: lines when Pcre.pmatch ~rex:re_structopener line -> (* Recursively parse a sub-structure. First search for the * corresponding closer line. *) let rec loop depth acc = function | [] -> eprintf "%s: %S has no matching close structure line\n%!" basename line; assert false | line :: lines when Pcre.pmatch ~rex:re_structopener line -> loop (depth+1) (line :: acc) lines | line :: lines when depth = 0 && Pcre.pmatch ~rex:re_structcloser line -> (line :: acc), lines | line :: lines when depth > 0 && Pcre.pmatch ~rex:re_structcloser line -> loop (depth-1) (line :: acc) lines | line :: lines -> loop depth (line :: acc) lines in let nested_body, rest = loop 0 [] lines in (* Then parse the sub-structure. *) let struct_name, nested_body = match nested_body with | [] -> assert false | closer :: _ -> let subs = Pcre.exec ~rex:re_structcloser closer in let struct_name = try Some (Pcre.get_substring subs 1) with Not_found -> None in struct_name, List.rev nested_body in let nested_fields = parse basename nested_body in (* Prefix the sub-fields with the name of the structure. *) let nested_fields = match struct_name with | None -> nested_fields | Some prefix -> List.map ( fun (name, details) -> (prefix ^ "'" ^ name, details) ) nested_fields in (* Parse the rest. *) nested_fields @ parse basename rest | line :: lines when Pcre.pmatch ~rex:re_intfield line -> (* An int field. *) let subs = Pcre.exec ~rex:re_intfield line in let name = Pcre.get_substring subs 1 in (try let subs = Pcre.exec ~rex:re_offsetsize line in let offset = int_of_string (Pcre.get_substring subs 1) in let size = int_of_string (Pcre.get_substring subs 2) in (name, (`Int, offset, size)) :: parse basename lines with Not_found -> parse basename lines ); | line :: lines when Pcre.pmatch ~rex:re_ptrfield line -> (* A pointer-to-struct field. *) let subs = Pcre.exec ~rex:re_ptrfield line in let struct_name = Pcre.get_substring subs 1 in let name = Pcre.get_substring subs 2 in (try let subs = Pcre.exec ~rex:re_offsetsize line in let offset = int_of_string (Pcre.get_substring subs 1) in let size = int_of_string (Pcre.get_substring subs 2) in (name, (`Ptr struct_name, offset, size)) :: parse basename lines with Not_found -> parse basename lines ); | line :: lines when Pcre.pmatch ~rex:re_strfield line -> (* A string (char array) field. *) let subs = Pcre.exec ~rex:re_strfield line in let name = Pcre.get_substring subs 1 in let width = int_of_string (Pcre.get_substring subs 2) in (try let subs = Pcre.exec ~rex:re_offsetsize line in let offset = int_of_string (Pcre.get_substring subs 1) in let size = int_of_string (Pcre.get_substring subs 2) in (name, (`Str width, offset, size)) :: parse basename lines with Not_found -> parse basename lines ); | _ :: lines -> (* Just ignore any other field we can't parse. *) parse basename lines in let datas = List.map ( fun (basename, version, arch, bodies) -> let structures = List.filter_map ( fun (name, (_, _, _, wanted_fields)) -> let body = try Some (Hashtbl.find bodies name) with Not_found -> None in match body with | None -> None | Some body -> let body = List.tl body in (* Don't care about opener line. *) let fields = parse basename body in (* That got us all the fields, but we only care about * the wanted_fields. *) let fields = List.filter ( fun (name, _) -> List.mem name wanted_fields ) fields in (* Also check we have all the wanted fields. *) List.iter ( fun wanted_field -> if not (List.mem_assoc wanted_field fields) then failwith (sprintf "%s: structure %s is missing required field %s" basename name wanted_field) ) wanted_fields; Some (name, fields) ) what in (basename, version, arch, structures) ) datas in if debug then List.iter ( fun (basename, version, arch, structures) -> printf "%s (version: %s, arch: %s):\n" basename version arch; List.iter ( fun (struct_name, fields) -> printf " struct %s {\n" struct_name; List.iter ( fun (field_name, (typ, offset, size)) -> (match typ with | `Int -> printf " int %s; " field_name | `Ptr struct_name -> printf " struct %s *%s; " struct_name field_name | `Str width -> printf " char %s[%d]; " field_name width ); printf " /* offset = %d, size = %d */\n" offset size ) fields; printf " }\n\n"; ) structures; ) datas; (* We'll generate a code file for each structure type (eg. task_struct * across all kernel versions), so rearrange 'datas' for that purpose. * * XXX This loop is O(n^3), luckily n is small! *) let files = List.map ( fun (name, _) -> name, List.filter_map ( fun (basename, version, arch, structures) -> try Some (basename, version, arch, List.assoc name structures) with Not_found -> None ) datas ) what in let datas = () in ignore datas; (* garbage collect *) (* Get just the field types. It's plausible that a field with the * same name has a different type between kernel versions, so we must * check that didn't happen. *) let files = List.map ( fun (struct_name, kernels) -> let field_types = match kernels with | [] -> [] | (_, _, _, fields) :: kernels -> let field_types_of_fields fields = List.sort ( List.map ( fun (field_name, (typ, _, _)) -> field_name, typ ) fields ) in let field_types = field_types_of_fields fields in List.iter ( fun (_, _, _, fields) -> if field_types <> field_types_of_fields fields then failwith (sprintf "%s: one of the structure fields changed type between kernel versions" struct_name) ) kernels; field_types in (struct_name, kernels, field_types) ) files in (* To minimize generated code size, we want to fold together all * structures where the particulars (eg. offsets, sizes, endianness) * of the fields we care about are the same -- eg. between kernel * versions which are very similar. *) let endian_of_architecture arch = if String.starts_with arch "i386" || String.starts_with arch "i486" || String.starts_with arch "i586" || String.starts_with arch "i686" || String.starts_with arch "x86_64" || String.starts_with arch "x86-64" then Bitstring.LittleEndian else if String.starts_with arch "ia64" then Bitstring.LittleEndian (* XXX usually? *) else if String.starts_with arch "ppc" then Bitstring.BigEndian else if String.starts_with arch "sparc" then Bitstring.BigEndian else failwith (sprintf "endian_of_architecture: cannot parse %S" arch) in let files = List.map ( fun (struct_name, kernels, field_types) -> let hash = Hashtbl.create 13 in let i = ref 0 in let xs = ref [] in let kernels = List.map ( fun (basename, version, arch, fields) -> let key = endian_of_architecture arch, fields in let j = try Hashtbl.find hash key with Not_found -> incr i; xs := (!i, key) :: !xs; Hashtbl.add hash key !i; !i in (basename, version, arch, j) ) kernels in struct_name, kernels, field_types, List.rev !xs ) files in (* How much did we save by sharing? *) if debug then List.iter ( fun (struct_name, kernels, _, parsers) -> printf "struct %s:\n" struct_name; printf " number of kernel versions: %d\n" (List.length kernels); printf " number of parser functions needed after sharing: %d\n" (List.length parsers) ) files; (* Let's generate some code! *) let files = List.map ( fun (struct_name, kernels, field_types, parsers) -> (* Dummy location required - there are no real locations for * output files. *) let _loc = Loc.ghost in (* The structure type. *) let struct_type, struct_sig = let fields = List.map ( function | (name, `Int) -> <:ctyp< $lid:name$ : int >> | (name, `Ptr struct_name) -> <:ctyp< $lid:name$ : (*`$str:struct_name$*) int64 >> | (name, `Str _) -> <:ctyp< $lid:name$ : string >> ) field_types in let f, fs = match fields with | [] -> failwith (sprintf "%s: structure has no fields" struct_name) | f :: fs -> f, fs in let fields = List.fold_left ( fun fs f -> <:ctyp< $fs$ ; $f$ >> ) f fs in let struct_type = <:str_item< type t = { $fields$ } >> in let struct_sig = <:sig_item< type t = { $fields$ } >> in struct_type, struct_sig in let code = <:str_item< $struct_type$ >> in let interface = <:sig_item< $struct_sig$ >> in (struct_name, code, interface) ) files in (* Finally generate the output files. *) List.iter ( fun (struct_name, code, interface) -> let output_file = outputdir // "kernel_" ^ struct_name ^ ".ml" in Printers.OCaml.print_implem ~output_file code; let output_file = outputdir // "kernel_" ^ struct_name ^ ".mli" in Printers.OCaml.print_interf ~output_file interface ) files