X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=extract%2Fcodegen%2Fcode_generation.ml;h=d45134996c17125f5b0003b1abfce0265d5f07ab;hb=fea04ba6838a7fb1f7f6df9ea5b9603603205f3d;hp=2dba82012c289d4e3bc7dcff5deffb835f8e7580;hpb=9a4e42524fac9afd50fca18f2124f6df91716d4c;p=virt-mem.git diff --git a/extract/codegen/code_generation.ml b/extract/codegen/code_generation.ml index 2dba820..d451349 100644 --- a/extract/codegen/code_generation.ml +++ b/extract/codegen/code_generation.ml @@ -26,7 +26,20 @@ open ExtString open Printf module PP = Pahole_parser -module SC = Struct_classify +module MM = Minimizer + +let rec uniq ?(cmp = Pervasives.compare) = function + [] -> [] + | [x] -> [x] + | x :: y :: xs when cmp x y = 0 -> + uniq (x :: xs) + | x :: y :: xs -> + x :: uniq (y :: xs) + +let sort_uniq ?cmp xs = + let xs = List.sort ?cmp xs in + let xs = uniq ?cmp xs in + xs (* We don't care about locations when generating code, so it's * useful to just have a single global _loc. @@ -61,17 +74,35 @@ let concat_sig_items items = | x :: xs -> List.fold_left (fun xs x -> <:sig_item< $xs$ $x$ >>) x xs +let concat_exprs exprs = + match exprs with + | [] -> assert false + | x :: xs -> + List.fold_left (fun xs x -> <:expr< $xs$ ; $x$ >>) x xs + let concat_record_fields fields = match fields with - | [] -> assert false - | f :: fs -> - List.fold_left (fun fs f -> <:ctyp< $fs$ ; $f$ >>) f fs + | [] -> assert false + | f :: fs -> + List.fold_left (fun fs f -> <:ctyp< $fs$ ; $f$ >>) f fs let concat_record_bindings rbs = match rbs with - | [] -> assert false - | rb :: rbs -> - List.fold_left (fun rbs rb -> <:rec_binding< $rbs$ ; $rb$ >>) rb rbs + | [] -> assert false + | rb :: rbs -> + List.fold_left (fun rbs rb -> <:rec_binding< $rbs$ ; $rb$ >>) rb rbs + +let concat_bindings bs = + match bs with + | [] -> assert false + | b :: bs -> + List.fold_left (fun bs b -> <:binding< $bs$ and $b$ >>) b bs + +let concat_sum_types ts = + match ts with + | [] -> assert false + | t :: ts -> + List.fold_left (fun ts t -> <:ctyp< $ts$ | $t$ >>) t ts let build_record rbs = Ast.ExRec (_loc, rbs, Ast.ExNil _loc) @@ -83,76 +114,647 @@ let build_tuple_from_exprs exprs = Ast.ExTup (_loc, List.fold_left (fun xs x -> Ast.ExCom (_loc, x, xs)) x xs) +let build_tuple_from_patts patts = + match patts with + | [] | [_] -> assert false + | x :: xs -> + Ast.PaTup (_loc, + List.fold_left (fun xs x -> Ast.PaCom (_loc, x, xs)) x xs) + +(* Helper functions to store things in a fixed-length tuple very efficiently. + * Note that the tuple length must be >= 2. + *) +type tuple = string list + +let tuple_create fields : tuple = fields + +(* Generates 'let _, _, resultpatt, _ = tupleexpr in body'. *) +let tuple_generate_extract fields field resultpatt tupleexpr body = + let patts = List.map ( + fun name -> if name = field then resultpatt else <:patt< _ >> + ) fields in + let result = build_tuple_from_patts patts in + <:expr< let $result$ = $tupleexpr$ in $body$ >> + +(* Generates '(fieldexpr1, fieldexpr2, ...)'. *) +let tuple_generate_construct fieldexprs = + build_tuple_from_exprs fieldexprs + +type code = Ast.str_item * Ast.sig_item + let ocaml_type_of_field_type = function - | PP.FInteger -> <:ctyp< int64 >> - | PP.FString _ -> <:ctyp< string >> - | PP.FStructPointer _ | PP.FVoidPointer - | PP.FAnonListHeadPointer | PP.FListHeadPointer _ -> + | PP.FInteger, true -> <:ctyp< int64 >> + | PP.FInteger, false -> <:ctyp< int64 option >> + | PP.FString _, true -> <:ctyp< string >> + | PP.FString _, false -> <:ctyp< string option >> + | PP.FStructPointer _, true | PP.FVoidPointer, true + | PP.FAnonListHeadPointer, true | PP.FListHeadPointer _, true -> <:ctyp< Virt_mem_mmap.addr >> + | PP.FStructPointer _, false | PP.FVoidPointer, false + | PP.FAnonListHeadPointer, false | PP.FListHeadPointer _, false -> + <:ctyp< Virt_mem_mmap.addr option >> let generate_types xs = - let strs = List.map ( - fun (struct_name, sflist, cflist) -> - let sflist = List.map ( - fun { SC.sf_name = name; sf_fields = fields } -> - if fields <> [] then ( - let fields = List.map ( - fun { PP.field_name = name; PP.field_type = t } -> - let t = ocaml_type_of_field_type t in - <:ctyp< $lid:name$ : $t$ >> - ) fields in - let fields = concat_record_fields fields in - - <:str_item< - type $lid:name$ = { $fields$ } - >> - ) else - <:str_item< type $lid:name$ = unit >> - ) sflist in - let sflist = concat_str_items sflist in - - let cflist = List.map ( - fun { SC.cf_name = name; cf_fields = fields } -> - if fields <> [] then ( - let fields = List.map ( - fun { PP.field_name = name; PP.field_type = t } -> - let t = ocaml_type_of_field_type t in - <:ctyp< $lid:name$ : $t$ >> - ) fields in - let fields = concat_record_fields fields in - - <:str_item< - type $lid:name$ = { $fields$ } - >> - ) else - <:str_item< type $lid:name$ = unit >> - ) cflist in - let cflist = concat_str_items cflist in + let types = List.map ( + fun (struct_name, all_fields) -> + let fields = List.map ( + fun (name, (typ, always_available)) -> + match typ with + | PP.FListHeadPointer _ -> + (* A list head turns into three fields, the pointer, + * the offset within current struct, and the adjustment + * (offset within destination struct). + *) + let t = ocaml_type_of_field_type (typ, always_available) in + [ <:ctyp< $lid:struct_name^"_"^name$ : $t$ >>; + <:ctyp< $lid:struct_name^"_"^name^"_offset"$ : int >>; + <:ctyp< $lid:struct_name^"_"^name^"_adjustment"$ : int >> ] + | _ -> + let t = ocaml_type_of_field_type (typ, always_available) in + [ <:ctyp< $lid:struct_name^"_"^name$ : $t$ >> ] + ) all_fields in + let fields = List.concat fields in + let fields = concat_record_fields fields in <:str_item< - type ('a, 'b) $lid:struct_name$ = { - $lid:struct_name^"_shape"$ : 'a; - $lid:struct_name^"_content"$ : 'b; - } - $sflist$ - $cflist$ + type $lid:struct_name$ = { $fields$ } + >>, + <:sig_item< + type $lid:struct_name$ = { $fields$ } >> ) xs in - let sigs = + (* Generate a sum-type which can use to store any type of kernel + * structure, ie. Task_struct | Net_device | ... + *) + let types = types @ [ + let constrs = + List.map ( + fun (struct_name, _) -> + let struct_name_uc = String.capitalize struct_name in + <:ctyp< $uid:struct_name_uc$ of $lid:struct_name$ >> + ) xs in + let constrs = concat_sum_types constrs in + <:str_item< + type kernel_struct = $constrs$ + >>, + <:sig_item< + type kernel_struct = $constrs$ + >> + ] in + + let strs, sigs = List.split types in + concat_str_items strs, concat_sig_items sigs + +let generate_offsets xs = + (* Only need to generate the offset_of_* functions for fields + * which are cross-referenced from another field. Which + * ones are those? + *) + let fields = + List.concat ( + List.map ( + fun (_, (_, all_fields)) -> + List.filter_map ( + function + | (_, + (PP.FListHeadPointer + ((Some (struct_name, field_name)) as f), + _)) -> + f + | _ -> + None + ) all_fields + ) xs + ) in + + let fields = sort_uniq fields in + + let strs = List.map ( - fun (struct_name, _, _) -> - <:sig_item< - type ('a, 'b) $lid:struct_name$ - >> + fun (struct_name, field_name) -> + let kernels, _ = + try List.assoc struct_name xs + with Not_found -> + failwith ( + sprintf "generate_offsets: structure %s not found. This is probably a list_head-related bug." + struct_name + ) in + (* Find the offset of this field in each kernel version. *) + let offsets = + List.filter_map ( + fun ({ PP.kernel_version = version }, + { PP.struct_fields = fields }) -> + try + let field = + List.find (fun { PP.field_name = name } -> field_name = name) + fields in + let offset = field.PP.field_offset in + Some (version, offset) + with Not_found -> None + ) kernels in + + if offsets = [] then + failwith ( + sprintf "generate_offsets: field %s.%s not found in any kernel. This is probably a list_head-related bug." + struct_name field_name + ); + + (* Generate a map of kernel version to offset. *) + let map = List.fold_left ( + fun map (version, offset) -> + <:expr< StringMap.add $str:version$ $`int:offset$ $map$ >> + ) <:expr< StringMap.empty >> offsets in + + let code = + <:str_item< + let $lid:"offset_of_"^struct_name^"_"^field_name$ = + let map = $map$ in + fun kernel_version -> StringMap.find kernel_version map + >> in + code + ) fields in + + let strs = concat_str_items strs in + + strs, <:sig_item< >> + +let generate_parsers xs = + let strs = + List.map ( + fun (struct_name, (all_fields, palist)) -> + let palist = + List.map ( + fun { MM.pa_name = pa_name } -> + <:str_item< + let $lid:pa_name$ kernel_version bits = $str:pa_name$ + >> + ) palist in + concat_str_items palist ) xs in - concat_str_items strs, concat_sig_items sigs + let strs = concat_str_items strs in + + (* The shared parser functions. + * + * We could include bitmatch statements directly in here, but + * what happens is that the macros get expanded here, resulting + * in (even more) unreadable generated code. So instead just + * do a textual substitution later by post-processing the + * generated files. Not type-safe, but we can't have + * everything. + *) + let subs = Hashtbl.create 13 in + List.iter ( + fun (struct_name, (all_fields, palist)) -> + List.iter ( + fun ({ MM.pa_name = pa_name; + pa_endian = endian; pa_structure = structure }) -> + (* Generate the code to match this structure. *) + let endian = + match endian with + | Bitstring.LittleEndian -> "littleendian" + | Bitstring.BigEndian -> "bigendian" + | _ -> assert false in + let patterns = + String.concat ";\n " ( + List.map ( + function + | { PP.field_name = field_name; + field_type = PP.FInteger; + field_offset = offset; + field_size = size } -> + (* 'zero+' is a hack to force the type to int64. *) + sprintf "%s : zero+%d : offset(%d), %s" + field_name (size*8) (offset*8) endian + + | { PP.field_name = field_name; + field_type = (PP.FStructPointer _ + | PP.FVoidPointer + | PP.FAnonListHeadPointer + | PP.FListHeadPointer _); + field_offset = offset; + field_size = size } -> + sprintf "%s : zero+%d : offset(%d), %s" + field_name (size*8) (offset*8) endian + + | { PP.field_name = field_name; + field_type = PP.FString width; + field_offset = offset; + field_size = size } -> + sprintf "%s : %d : offset(%d), string" + field_name (width*8) (offset*8) + ) structure.PP.struct_fields + ) in + + let assignments = + List.map ( + fun (field_name, (field_type, always_available)) -> + if always_available then ( + (* Go and look up the field offset in the correct kernel. *) + let { PP.field_offset = offset } = + List.find (fun { PP.field_name = name } -> + field_name = name) + structure.PP.struct_fields in + + (* Generate assignment code. List_heads are treated + * specially because they have an implicit adjustment. + *) + match field_type with + | PP.FListHeadPointer None -> + sprintf "%s_%s = %s; + %s_%s_offset = %d; + %s_%s_adjustment = %d" + struct_name field_name field_name + struct_name field_name offset + struct_name field_name offset + + | PP.FListHeadPointer (Some (other_struct_name, + other_field_name)) -> + (* A reference to a field in another structure. We don't + * know the offset until runtime, so we have to call + * offset_of__ to find it. + *) + sprintf "%s_%s = %s; + %s_%s_offset = %d; + %s_%s_adjustment = offset_of_%s_%s kernel_version" + struct_name field_name field_name + struct_name field_name offset (* in this struct *) + struct_name field_name (* ... & in other struct*) + other_struct_name other_field_name + + | _ -> + sprintf "%s_%s = %s" struct_name field_name field_name + ) else ( + (* Field is optional. Is it available in this kernel + * version? If so, get its offset, else throw Not_found. + *) + try + let { PP.field_offset = offset } = + List.find (fun { PP.field_name = name } -> + field_name = name) + structure.PP.struct_fields in + + (* Generate assignment code. List_heads are treated + * specially because they have an implicit adjustment. + *) + match field_type with + | PP.FListHeadPointer None -> + sprintf "%s_%s = Some %s; + %s_%s_offset = %d; + %s_%s_adjustment = %d" + struct_name field_name field_name + struct_name field_name offset + struct_name field_name offset + + | PP.FListHeadPointer (Some (other_struct_name, + other_field_name)) -> + (* A reference to a field in another structure. We + * don't know the offset until runtime, so we have + * to call offset_of__ to find it. + *) + sprintf "%s_%s = Some %s; + %s_%s_offset = %d; + %s_%s_adjustment = offset_of_%s_%s kernel_version" + struct_name field_name field_name + struct_name field_name offset(*in this struct *) + struct_name field_name (*... & in other struct*) + other_struct_name other_field_name + + | _ -> + sprintf "%s_%s = Some %s" + struct_name field_name field_name + with + Not_found -> + (* Field is not available in this kernel version. *) + match field_type with + | PP.FListHeadPointer _ -> + sprintf "%s_%s = None; + %s_%s_offset = -1; + %s_%s_adjustment = -1" + struct_name field_name + struct_name field_name + struct_name field_name + | _ -> + sprintf "%s_%s = None" struct_name field_name + ) + ) all_fields in + + let assignments = String.concat ";\n " assignments in + + let code = + sprintf " + bitmatch bits with + | { %s } -> + { %s } + | { _ } -> + raise (ParseError (%S, %S, match_err))" + patterns assignments + struct_name pa_name in + + Hashtbl.add subs pa_name code + ) palist; + ) xs; + + (strs, <:sig_item< >>), subs + +let generate_version_maps xs = + (* size_of_ kernel_version *) + let strs = List.map ( + fun (struct_name, (kernels, _)) -> + let map = + List.fold_right ( + fun ({ PP.kernel_version = version }, + { PP.struct_total_size = size }) map -> + <:expr< + StringMap.add $str:version$ $`int:size$ $map$ + >> + ) kernels <:expr< StringMap.empty >> in + + <:str_item< + let $lid:"size_of_"^struct_name$ = + let map = $map$ in + fun kernel_version -> + try StringMap.find kernel_version map + with Not_found -> + unknown_kernel_version kernel_version $str:struct_name$ + >> + ) xs in + + (* parser_of_ kernel_version *) + let strs = strs @ List.map ( + fun (struct_name, (kernels, pahash)) -> + let map = + List.fold_right ( + fun ({ PP.kernel_version = version }, _) map -> + let { MM.pa_name = pa_name } = Hashtbl.find pahash version in + <:expr< + StringMap.add $str:version$ $lid:pa_name$ $map$ + >> + ) kernels <:expr< StringMap.empty >> in + + <:str_item< + let $lid:"parser_of_"^struct_name$ = + let map = $map$ in + fun kernel_version -> + try StringMap.find kernel_version map + with Not_found -> + unknown_kernel_version kernel_version $str:struct_name$ + >> + ) xs in + + concat_str_items strs, <:sig_item< >> + +let generate_followers names xs = + (* A follower function for every structure. *) + let bindings = List.map ( + fun (struct_name, all_fields) -> + let followers = List.fold_right ( + fun (name, (typ, always_available)) rest -> + let is_shape_field = + match typ with + | PP.FListHeadPointer None -> true + | PP.FListHeadPointer (Some (struct_name, _)) + | PP.FStructPointer struct_name + when List.mem struct_name names -> true + | _ -> false in + if not is_shape_field then rest + else ( + let dest_struct_name = + match typ with + | PP.FListHeadPointer None -> struct_name + | PP.FListHeadPointer (Some (struct_name, _)) -> struct_name + | PP.FStructPointer struct_name -> struct_name + | _ -> assert false in + + let body = + match typ with + | PP.FListHeadPointer _ -> + <:expr< + (* For list head pointers, add the address of the base + * of this virtual structure to the map, then adjust + * the pointer. + *) + let offset = data.$lid:struct_name^"_"^name^"_offset"$ + and adj = data.$lid:struct_name^"_"^name^"_adjustment"$ in + let offset = Int64.of_int offset + and adj = Int64.of_int adj in + + (* 'addr' is base of the virtual struct, but when + * adding it to the map make sure we don't splat + * the real structure (can happen if offset=adj). + *) + let map = + if offset <> adj then ( + let addr = Int64.sub (Int64.add addr offset) adj in + AddrMap.add addr ($str:dest_struct_name$, None) map + ) else map in + + (* 'dest_addr' is the destination address of + * this pointer. It needs the usual list_head + * adjustment applied. + *) + let dest_addr = Int64.sub dest_addr adj in + let map = + $lid:dest_struct_name^"_follower"$ + kernel_version load map dest_addr in + map + >> + + | PP.FStructPointer _ -> + <:expr< + let map = + $lid:dest_struct_name^"_follower"$ + kernel_version load map dest_addr in + map + >> + + | _ -> assert false in + + if always_available then + <:expr< + let dest_addr = data.$lid:struct_name^"_"^name$ in + let map = $body$ in + $rest$ + >> + else + <:expr< + let map = + match data.$lid:struct_name^"_"^name$ with + | None -> map + | Some dest_addr -> $body$ in + $rest$ + >> + ) + ) all_fields <:expr< map >> in + + let struct_name_uc = String.capitalize struct_name in + + <:binding< + $lid:struct_name^"_follower"$ kernel_version load map addr = + if addr <> 0L && not (AddrMap.mem addr map) then ( + let parser_ = $lid:"parser_of_"^struct_name$ kernel_version in + let total_size = $lid:"size_of_"^struct_name$ kernel_version in + let bits = load $str:struct_name$ addr total_size in + let data = parser_ kernel_version bits in + let map = AddrMap.add + addr ($str:struct_name$, + Some (total_size, bits, $uid:struct_name_uc$ data)) + map in + $followers$ + ) + else map + >> + ) xs in + let bindings = concat_bindings bindings in + let strs = <:str_item< let rec $bindings$ >> in + + (* Function signatures for the interface. *) + let sigs = List.map ( + fun (struct_name, _) -> + <:sig_item< + val $lid:struct_name^"_follower"$ : + kernel_version -> load_fn -> addrmap -> Virt_mem_mmap.addr -> + addrmap + >> + ) xs in + let sigs = concat_sig_items sigs in + + strs, sigs + +let output_interf ~output_file types offsets parsers version_maps followers = + (* Some standard code that appears at the top and bottom + * of the interface file. + *) + let prologue = + <:sig_item< + module AddrMap : sig + type key = Virt_mem_mmap.addr + type 'a t = 'a Map.Make(Int64).t + val empty : 'a t + val is_empty : 'a t -> bool + val add : key -> 'a -> 'a t -> 'a t + val find : key -> 'a t -> 'a + val remove : key -> 'a t -> 'a t + val mem : key -> 'a t -> bool + val iter : (key -> 'a -> unit) -> 'a t -> unit + val map : ('a -> 'b) -> 'a t -> 'b t + val mapi : (key -> 'a -> 'b) -> 'a t -> 'b t + val fold : (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b + val compare : ('a -> 'a -> int) -> 'a t -> 'a t -> int + val equal : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool + end ;; + exception ParseError of string * string * string ;; + + type kernel_version = string + + type load_fn = + string -> Virt_mem_mmap.addr -> int -> Bitstring.bitstring + >> + and addrmap = + <:sig_item< + type addrmap = + (string * (int * Bitstring.bitstring * kernel_struct) option) + AddrMap.t + >> in + + let sigs = + concat_sig_items [ prologue; + types; + addrmap; + offsets; + parsers; + version_maps; + followers ] in + Printers.OCaml.print_interf ~output_file sigs; + + ignore (Sys.command (sprintf "wc -l %s" (Filename.quote output_file))) + +(* Finally generate the output files. *) +let re_subst = Pcre.regexp "^(.*)\"(\\w+_parser_\\d+)\"(.*)$" + +let output_implem ~output_file types offsets parsers parser_subs + version_maps followers = + (* Some standard code that appears at the top and bottom + * of the implementation file. + *) + let prologue = + <:str_item< + open Printf ;; + module StringMap = Map.Make (String) ;; + module AddrMap = Map.Make (Int64) ;; + exception ParseError of string * string * string ;; + + let match_err = "failed to match kernel structure" + + let unknown_kernel_version version struct_name = + invalid_arg (sprintf "%s: unknown kernel version or +struct %s is not supported in this kernel. +Try a newer version of virt-mem, or if the guest is not from a +supported Linux distribution, see this page about adding support: + http://et.redhat.com/~rjones/virt-mem/faq.html\n" + version struct_name) + + type kernel_version = string + type load_fn = string -> Virt_mem_mmap.addr -> int -> Bitstring.bitstring + + let zero = 0 + >> + and addrmap = + <:str_item< + type addrmap = + (string * (int * Bitstring.bitstring * kernel_struct) option) + AddrMap.t + >> in + + let strs = + concat_str_items [ prologue; + types; + addrmap; + offsets; + parsers; + version_maps; + followers ] in + + (* Write the new implementation to .ml.new file. *) + let new_output_file = output_file ^ ".new" in + Printers.OCaml.print_implem ~output_file:new_output_file strs; + + (* Substitute the parser bodies in the output file. *) + let ichan = open_in new_output_file in + let ochan = open_out output_file in + + output_string ochan "\ +(* WARNING: This file and the corresponding mli (interface) are + * automatically generated by the extract/codegen/ program. + * + * Any edits you make to this file will be lost. + * + * To update this file from the latest kernel database, it is recommended + * that you do 'make update-kernel-structs'. + *)\n\n"; + + let rec loop () = + let line = input_line ichan in + let line = + if Pcre.pmatch ~rex:re_subst line then ( + let subs = Pcre.exec ~rex:re_subst line in + let start = Pcre.get_substring subs 1 in + let template = Pcre.get_substring subs 2 in + let rest = Pcre.get_substring subs 3 in + let sub = + try Hashtbl.find parser_subs template + with Not_found -> assert false in + start ^ sub ^ rest + ) else line in + output_string ochan line; output_char ochan '\n'; + loop () + in + (try loop () with End_of_file -> ()); + + close_out ochan; + close_in ichan; -let output_interf ~output_file types = - let sigs = concat_sig_items [ types ] in - Printers.OCaml.print_interf ~output_file sigs + Unix.unlink new_output_file; -let output_implem ~output_file types = - let strs = concat_str_items [ types ] in - Printers.OCaml.print_implem ~output_file strs + ignore (Sys.command (sprintf "wc -l %s" (Filename.quote output_file)))