1 (* Memory info for virtual domains.
2 (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 (* This program takes the kernel database (in kernels/ in toplevel
21 directory) and generates parsing code for the various structures
22 in the kernel that we are interested in.
24 The output programs -- *.ml, *.mli files of generated code -- go
25 into lib/ at the toplevel, eg. lib/kernel_task_struct.ml
27 The stuff at the top of this file determine what structures
28 and fields we try to parse.
33 "struct task_struct {", "};", true,
34 [ "state"; "prio"; "normal_prio"; "static_prio";
35 "tasks'prev"; "tasks'next"; "mm"; "active_mm"; "comm"; "pid" ]
39 "struct mm_struct {", "};", true,
44 "struct net_device {", "};", true,
45 [ "name"; "dev_addr" ]
59 let (//) = Filename.concat
62 let args = Array.to_list Sys.argv in
64 let kernelsdir, outputdir =
68 let arg0 = Filename.basename Sys.executable_name in
69 eprintf "%s - Turn kernels database into code modules.
72 %s <kernelsdir> <outputdir>
74 Example (from toplevel of virt-mem source tree):
79 (* Get the *.info files from the kernels database. *)
80 let infos = Sys.readdir kernelsdir in
81 let infos = Array.to_list infos in
82 let infos = List.filter (fun name -> String.ends_with name ".info") infos in
83 let infos = List.map ( (//) kernelsdir) infos in
85 (* Regular expressions. We really really should use ocaml-mikmatch ... *)
86 let re_oldformat = Pcre.regexp "^RPM: \\d+: \\(build \\d+\\) ([-\\w]+) ([\\w.]+) ([\\w.]+) \\(.*?\\) (\\w+)" in
87 let re_keyvalue = Pcre.regexp "^(\\w+): (.*)" in
89 (* Parse in the *.info files. These have historically had a few different
90 * formats that we need to support.
92 let infos = List.map (
94 (* Get the basename (for getting the .data file later on). *)
95 let basename = Filename.chop_suffix filename ".info" in
97 let chan = open_in filename in
98 let line = input_line chan in
100 (* Kernel version string. *)
102 if Pcre.pmatch ~rex:re_oldformat line then (
103 (* If the file starts with "RPM: \d+: ..." then it's the
104 * original Fedora format. Everything in one line.
106 let subs = Pcre.exec ~rex:re_oldformat line in
107 (* let name = Pcre.get_substring subs 1 in *)
108 let version = Pcre.get_substring subs 2 in
109 let release = Pcre.get_substring subs 3 in
110 let arch = Pcre.get_substring subs 4 in
112 (* XXX Map name -> PAE, hugemem etc. *)
113 (* name, *) sprintf "%s-%s.%s" version release arch, arch
115 (* New-style "key: value" entries, up to end of file or the first
118 let (*name,*) version, release, arch =
119 (*ref "",*) ref "", ref "", ref "" in
122 let subs = Pcre.exec ~rex:re_keyvalue line in
123 let key = Pcre.get_substring subs 1 in
124 let value = Pcre.get_substring subs 2 in
125 (*if key = "Name" then name := value
126 else*) if key = "Version" then version := value
127 else if key = "Release" then release := value
128 else if key = "Architecture" then arch := value;
129 let line = input_line chan in
132 Not_found | End_of_file ->
136 let (*name,*) version, release, arch =
137 (*!name,*) !version, !release, !arch in
138 if (*name = "" ||*) version = "" || release = "" || arch = "" then
139 failwith (sprintf "%s: missing Name, Version, Release or Architecture key" filename);
140 (* XXX Map name -> PAE, hugemem etc. *)
141 (* name, *) sprintf "%s-%s.%s" version release arch, arch
144 (*printf "%s -> %s %s\n%!" basename version arch;*)
146 (basename, version, arch)
149 let nr_kernels = List.length infos in
151 (* For quick access to the opener strings, build a hash. *)
152 let openers = Hashtbl.create 13 in
154 fun (name, (opener, closer, _, _)) ->
155 Hashtbl.add openers opener (closer, name)
158 (* Now read the data files and parse out the structures of interest. *)
159 let kernels = List.mapi (
160 fun i (basename, version, arch) ->
161 printf "Loading kernel data file %d/%d\r%!" (i+1) nr_kernels;
163 let file_exists name =
164 try Unix.access name [Unix.F_OK]; true
165 with Unix.Unix_error _ -> false
167 let close_process_in cmd chan =
168 match Unix.close_process_in chan with
169 | Unix.WEXITED 0 -> ()
171 eprintf "%s: command exited with code %d\n" cmd i; exit i
172 | Unix.WSIGNALED i ->
173 eprintf "%s: command exited with signal %d\n" cmd i; exit 1
175 eprintf "%s: command stopped by signal %d\n" cmd i; exit 1
178 (* Open the data file, uncompressing it on the fly if necessary. *)
180 if file_exists (basename ^ ".data") then
181 open_in (basename ^ ".data"), close_in
182 else if file_exists (basename ^ ".data.gz") then (
184 sprintf "gzip -cd %s" (Filename.quote (basename ^ ".data.gz")) in
185 Unix.open_process_in cmd, close_process_in cmd
187 else if file_exists (basename ^ ".data.bz2") then (
189 sprintf "bzip2 -cd %s" (Filename.quote (basename ^ ".data.bz2")) in
190 Unix.open_process_in cmd, close_process_in cmd
193 (sprintf "%s: cannot find corresponding data file" basename) in
195 (* Read the data file in, looking for structures of interest to us. *)
196 let bodies = Hashtbl.create 13 in
198 let line = input_line chan in
200 (* If the line is an opener for one of the structures we
201 * are looking for, then for now just save all the text until
202 * we get to the closer line.
205 let closer, name = Hashtbl.find openers line in
206 let rec loop2 lines =
207 let line = input_line chan in
208 let lines = line :: lines in
209 if String.starts_with line closer then List.rev lines
216 failwith (sprintf "%s: %s: %S not matched by closing %S" basename name line closer) in
218 Hashtbl.replace bodies name body
219 with Not_found -> ());
223 (try loop () with End_of_file -> ());
227 (* Make sure we got all the mandatory structures. *)
229 fun (name, (_, _, mandatory, _)) ->
230 if mandatory && not (Hashtbl.mem bodies name) then
231 failwith (sprintf "%s: structure %s not found in this kernel" basename name)
234 (basename, version, arch, bodies)
237 (* Now parse each structure body.
238 * XXX This would be better as a proper lex/yacc parser.
239 * XXX Even better would be to have a proper interface to libdwarves.
241 let re_offsetsize = Pcre.regexp "/\\*\\s+(\\d+)\\s+(\\d+)\\s+\\*/" in
242 let re_intfield = Pcre.regexp "int\\s+(\\w+);" in
243 let re_ptrfield = Pcre.regexp "struct\\s+(\\w+)\\s*\\*\\s*(\\w+);" in
244 let re_strfield = Pcre.regexp "char\\s+(\\w+)\\[(\\d+)\\];" in
245 let re_structopener = Pcre.regexp "(struct|union)\\s+.*{$" in
246 let re_structcloser = Pcre.regexp "}\\s*(\\w+)?(\\[\\d+\\])?;" in
248 (* 'basename' is the source file, and second parameter ('body') is
249 * the list of text lines which covers this structure (minus the
250 * opener line). Result is the list of parsed fields from this
253 let rec parse basename = function
255 | [_] -> [] (* Just the closer line, finished. *)
256 | line :: lines when Pcre.pmatch ~rex:re_structopener line ->
257 (* Recursively parse a sub-structure. First search for the
258 * corresponding closer line.
260 let rec loop depth acc = function
262 eprintf "%s: %S has no matching close structure line\n%!"
265 | line :: lines when Pcre.pmatch ~rex:re_structopener line ->
266 loop (depth+1) (line :: acc) lines
268 when depth = 0 && Pcre.pmatch ~rex:re_structcloser line ->
271 when depth > 0 && Pcre.pmatch ~rex:re_structcloser line ->
272 loop (depth-1) (line :: acc) lines
273 | line :: lines -> loop depth (line :: acc) lines
275 let nested_body, rest = loop 0 [] lines in
277 (* Then parse the sub-structure. *)
278 let struct_name, nested_body =
279 match nested_body with
282 let subs = Pcre.exec ~rex:re_structcloser closer in
284 try Some (Pcre.get_substring subs 1) with Not_found -> None in
285 struct_name, List.rev nested_body in
286 let nested_fields = parse basename nested_body in
288 (* Prefix the sub-fields with the name of the structure. *)
290 match struct_name with
291 | None -> nested_fields
294 fun (name, details) -> (prefix ^ "'" ^ name, details)
297 (* Parse the rest. *)
298 nested_fields @ parse basename rest
300 | line :: lines when Pcre.pmatch ~rex:re_intfield line ->
302 let subs = Pcre.exec ~rex:re_intfield line in
303 let name = Pcre.get_substring subs 1 in
305 let subs = Pcre.exec ~rex:re_offsetsize line in
306 let offset = int_of_string (Pcre.get_substring subs 1) in
307 let size = int_of_string (Pcre.get_substring subs 2) in
308 (name, (`Int, offset, size)) :: parse basename lines
310 Not_found -> parse basename lines
313 | line :: lines when Pcre.pmatch ~rex:re_ptrfield line ->
314 (* A pointer-to-struct field. *)
315 let subs = Pcre.exec ~rex:re_ptrfield line in
316 let struct_name = Pcre.get_substring subs 1 in
317 let name = Pcre.get_substring subs 2 in
319 let subs = Pcre.exec ~rex:re_offsetsize line in
320 let offset = int_of_string (Pcre.get_substring subs 1) in
321 let size = int_of_string (Pcre.get_substring subs 2) in
322 (name, (`Ptr struct_name, offset, size))
323 :: parse basename lines
325 Not_found -> parse basename lines
328 | line :: lines when Pcre.pmatch ~rex:re_strfield line ->
329 (* A string (char array) field. *)
330 let subs = Pcre.exec ~rex:re_strfield line in
331 let name = Pcre.get_substring subs 1 in
332 let width = int_of_string (Pcre.get_substring subs 2) in
334 let subs = Pcre.exec ~rex:re_offsetsize line in
335 let offset = int_of_string (Pcre.get_substring subs 1) in
336 let size = int_of_string (Pcre.get_substring subs 2) in
337 (name, (`Str width, offset, size))
338 :: parse basename lines
340 Not_found -> parse basename lines
344 (* Just ignore any other field we can't parse. *)
349 let kernels = List.map (
350 fun (basename, version, arch, bodies) ->
351 let structures = List.filter_map (
352 fun (struct_name, (_, _, _, wanted_fields)) ->
354 try Some (Hashtbl.find bodies struct_name)
355 with Not_found -> None in
359 let body = List.tl body in (* Don't care about opener line. *)
360 let fields = parse basename body in
362 (* Compute total size of the structure. *)
364 let fields = List.map (
365 fun (_, (_, offset, size)) -> offset + size
367 List.fold_left max 0 fields in
369 (* That got us all the fields, but we only care about
372 let fields = List.filter (
373 fun (name, _) -> List.mem name wanted_fields
376 (* Also check we have all the wanted fields. *)
379 if not (List.mem_assoc wanted_field fields) then
380 failwith (sprintf "%s: structure %s is missing required field %s" basename struct_name wanted_field)
383 (* Prefix all the field names with the structure name. *)
385 List.map (fun (name, details) ->
386 struct_name ^ "_" ^ name, details) fields in
388 Some (struct_name, (fields, total_size))
391 (basename, version, arch, structures)
396 fun (basename, version, arch, structures) ->
397 printf "%s (version: %s, arch: %s):\n" basename version arch;
399 fun (struct_name, (fields, total_size)) ->
400 printf " struct %s {\n" struct_name;
402 fun (field_name, (typ, offset, size)) ->
405 printf " int %s; " field_name
406 | `Ptr struct_name ->
407 printf " struct %s *%s; " struct_name field_name
409 printf " char %s[%d]; " field_name width
411 printf " /* offset = %d, size = %d */\n" offset size
413 printf " } /* %d bytes */\n\n" total_size;
417 (* First output file is a simple list of kernels, to support the
418 * 'virt-mem --list-kernels' option.
421 let _loc = Loc.ghost in
423 let versions = List.map (fun (_, version, _, _) -> version) kernels in
425 (* Sort them in reverse because we are going to generate the
426 * final list in reverse.
428 let cmp a b = compare b a in
429 let versions = List.sort ~cmp versions in
432 List.fold_left (fun xs version -> <:expr< $str:version$ :: $xs$ >>)
433 <:expr< [] >> versions in
435 let code = <:str_item<
439 let output_file = outputdir // "virt_mem_kernels.ml" in
440 printf "Writing list of kernels to %s ...\n%!" output_file;
441 Printers.OCaml.print_implem ~output_file code in
443 (* We'll generate a code file for each structure type (eg. task_struct
444 * across all kernel versions), so rearrange 'kernels' for that purpose.
446 * XXX This loop is O(n^3), luckily n is small!
453 fun (basename, version, arch, structures) ->
454 try Some (basename, version, arch, List.assoc name structures)
455 with Not_found -> None
458 (* Sort the kernels, which makes the generated output more stable
459 * and makes patches more useful.
461 let kernels = List.sort kernels in
466 let kernels = () in ignore kernels; (* garbage collect *)
468 (* Get just the field types. It's plausible that a field with the
469 * same name has a different type between kernel versions, so we must
470 * check that didn't happen.
472 let files = List.map (
473 fun (struct_name, kernels) ->
477 | (_, _, _, (fields, _)) :: kernels ->
478 let field_types_of_fields fields =
481 fun (field_name, (typ, _, _)) -> field_name, typ
485 let field_types = field_types_of_fields fields in
487 fun (_, _, _, (fields, _)) ->
488 if field_types <> field_types_of_fields fields then
489 failwith (sprintf "%s: one of the structure fields changed type between kernel versions" struct_name)
492 (struct_name, kernels, field_types)
495 (* To minimize generated code size, we want to fold together all
496 * structures where the particulars (eg. offsets, sizes, endianness)
497 * of the fields we care about are the same -- eg. between kernel
498 * versions which are very similar.
500 let endian_of_architecture arch =
501 if String.starts_with arch "i386" ||
502 String.starts_with arch "i486" ||
503 String.starts_with arch "i586" ||
504 String.starts_with arch "i686" ||
505 String.starts_with arch "x86_64" ||
506 String.starts_with arch "x86-64" then
507 Bitstring.LittleEndian
508 else if String.starts_with arch "ia64" then
509 Bitstring.LittleEndian (* XXX usually? *)
510 else if String.starts_with arch "ppc" then
512 else if String.starts_with arch "sparc" then
515 failwith (sprintf "endian_of_architecture: cannot parse %S" arch)
520 fun (struct_name, kernels, field_types) ->
521 let hash = Hashtbl.create 13 in
526 fun (basename, version, arch, (fields, total_size)) ->
527 let key = endian_of_architecture arch, fields in
529 try Hashtbl.find hash key
532 xs := (!i, key) :: !xs; Hashtbl.add hash key !i;
534 (basename, version, arch, total_size, j)
536 let parsers = List.rev !xs in
537 struct_name, kernels, field_types, parsers
540 (* How much did we save by sharing? *)
543 fun (struct_name, kernels, _, parsers) ->
544 printf "struct %s:\n" struct_name;
545 printf " number of kernel versions: %d\n" (List.length kernels);
546 printf " number of parser functions needed after sharing: %d\n"
547 (List.length parsers)
550 (* Let's generate some code! *)
553 fun (struct_name, kernels, field_types, parsers) ->
554 (* Dummy location required - there are no real locations for
557 let _loc = Loc.ghost in
559 (* The structure type. *)
560 let struct_type, struct_sig =
561 let fields = List.map (
564 <:ctyp< $lid:name$ : int64 >>
566 <:ctyp< $lid:name$ : Virt_mem_mmap.addr >>
568 <:ctyp< $lid:name$ : string >>
570 let f, fs = match fields with
571 | [] -> failwith (sprintf "%s: structure has no fields" struct_name)
572 | f :: fs -> f, fs in
573 let fields = List.fold_left (
574 fun fs f -> <:ctyp< $fs$ ; $f$ >>
577 let struct_type = <:str_item< type t = { $fields$ } >> in
578 let struct_sig = <:sig_item< type t = { $fields$ } >> in
579 struct_type, struct_sig in
581 (* The shared parser functions.
583 * We could include bitmatch statements directly in here, but
584 * what happens is that the macros get expanded here, resulting
585 * in (even more) unreadable generated code. So instead just
586 * do a textual substitution later by post-processing the
587 * generated files. Not type-safe, but we can't have
590 let parser_stmts, parser_subs =
591 let parser_stmts = List.map (
593 let fnname = sprintf "parser_%d" i in
595 let $lid:fnname$ bits = $str:fnname$
600 match parser_stmts with
601 | [] -> <:str_item< >>
603 List.fold_left (fun ps p -> <:str_item< $ps$ $p$ >>) p ps in
605 (* What gets substituted for "parser_NN" ... *)
606 let parser_subs = List.map (
607 fun (i, (endian, fields)) ->
608 let fnname = sprintf "parser_%d" i in
611 | Bitstring.LittleEndian -> "littleendian"
612 | Bitstring.BigEndian -> "bigendian"
613 | _ -> assert false in
615 (* Fields must be sorted by offset, otherwise bitmatch
618 let cmp (_, (_, o1, _)) (_, (_, o2, _)) = compare o1 o2 in
619 let fields = List.sort ~cmp fields in
620 String.concat ";\n " (
623 | (field_name, (`Int, offset, size))
624 | (field_name, (`Ptr _, offset, size)) ->
625 (* 'zero+' is a hack to force the type to int64. *)
626 sprintf "%s : zero+%d : offset(%d), %s"
627 field_name (size*8) (offset*8) endian
628 | (field_name, (`Str width, offset, size)) ->
629 sprintf "%s : %d : offset(%d), string"
630 field_name (width*8) (offset*8)
634 String.concat ";\n " (
637 | (field_name, (`Ptr "list_head", offset, size)) ->
638 sprintf "%s = Int64.sub %s %dL" field_name field_name offset
640 sprintf "%s = %s" field_name field_name
650 raise (ParseError (struct_name, %S, match_err))"
651 patterns assignments fnname in
656 parser_stmts, parser_subs in
658 (* Define a map from kernel versions to parsing functions. *)
660 let stmts = List.fold_left (
661 fun stmts (_, version, arch, total_size, i) ->
662 let parserfn = sprintf "parser_%d" i in
665 let v = ($lid:parserfn$, $`int:total_size$)
666 let map = StringMap.add $str:version$ v map
668 ) <:str_item< let map = StringMap.empty >> kernels in
671 module StringMap = Map.Make (String) ;;
675 (* Code (.ml file). *)
676 let code = <:str_item<
678 let struct_name = $str:struct_name$
679 let match_err = "failed to match kernel structure"
680 exception ParseError of string * string * string;;
685 type kernel_version = string
686 let $lid:struct_name^"_known"$ version = StringMap.mem version map
687 let $lid:struct_name^"_size"$ version =
688 let _, size = StringMap.find version map in
690 let $lid:struct_name^"_of_bits"$ version bits =
691 let parsefn, _ = StringMap.find version map in
693 let $lid:"get_"^struct_name$ version mem addr =
694 let parsefn, size = StringMap.find version map in
695 let bytes = Virt_mem_mmap.get_bytes mem addr size in
696 let bits = Bitstring.bitstring_of_string bytes in
700 (* Interface (.mli file). *)
701 let interface = <:sig_item<
702 exception ParseError of string * string * string;;
705 val struct_name : string
706 type kernel_version = string
707 val $lid:struct_name^"_known"$ : kernel_version -> bool
708 val $lid:struct_name^"_size"$ : kernel_version -> int
709 val $lid:struct_name^"_of_bits"$ :
710 kernel_version -> Bitstring.bitstring -> t
711 val $lid:"get_"^struct_name$ : kernel_version ->
712 ('a, 'b, [`HasMapping]) Virt_mem_mmap.t -> Virt_mem_mmap.addr -> t
715 (struct_name, code, interface, parser_subs)
718 (* Finally generate the output files. *)
719 let re_subst = Pcre.regexp "^(.*)\"(parser_\\d+)\"(.*)$" in
722 fun (struct_name, code, interface, parser_subs) ->
723 (* Interface (.mli file). *)
724 let output_file = outputdir // "kernel_" ^ struct_name ^ ".mli" in
725 printf "Writing %s interface to %s ...\n%!" struct_name output_file;
726 Printers.OCaml.print_interf ~output_file interface;
728 (* Implementation (.ml file). *)
729 let output_file = outputdir // "kernel_" ^ struct_name ^ ".ml" in
730 printf "Writing %s implementation to %s ...\n%!" struct_name output_file;
732 let new_output_file = output_file ^ ".new" in
733 Printers.OCaml.print_implem ~output_file:new_output_file code;
735 (* Substitute the parser bodies in the output file. *)
736 let ichan = open_in new_output_file in
737 let ochan = open_out output_file in
739 output_string ochan "\
740 (* WARNING: This file and the corresponding mli (interface) are
741 * automatically generated by the extract/codegen/kerneldb_to_parser.ml
744 * Any edits you make to this file will be lost.
746 * To update this file from the latest kernel database, it is recommended
747 * that you do 'make update-kernel-structs'.
752 let line = input_line ichan in
754 if Pcre.pmatch ~rex:re_subst line then (
755 let subs = Pcre.exec ~rex:re_subst line in
756 let start = Pcre.get_substring subs 1 in
757 let template = Pcre.get_substring subs 2 in
758 let rest = Pcre.get_substring subs 3 in
759 let sub = List.assoc template parser_subs in
762 output_string ochan line; output_char ochan '\n';
765 (try loop () with End_of_file -> ());
770 Unix.unlink new_output_file