From 5a960be177bdfbbb1d62f96490e355e3e3e54f12 Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Thu, 1 Jan 1970 00:00:00 +0000 Subject: [PATCH] Program to extract kernels from Fedora's Koji, parse the debug info for the shape of kernel structures. --- HACKING | 10 + Makefile.in | 2 +- configure.ac | 6 + extract/README | 7 + extract/fedora-koji/Makefile.in | 52 ++++ .../fedora-koji/fedora_koji_download_kernels.ml | 317 +++++++++++++++++++++ 6 files changed, 393 insertions(+), 1 deletion(-) create mode 100644 extract/README create mode 100644 extract/fedora-koji/Makefile.in create mode 100644 extract/fedora-koji/fedora_koji_download_kernels.ml diff --git a/HACKING b/HACKING index bedcc68..2fb1179 100644 --- a/HACKING +++ b/HACKING @@ -24,6 +24,16 @@ mem/ - This brings everything together and links it into a single executable. Other than that purpose, there is almost nothing in this directory. +kernels/ + + - The database of known kernels and the layout of their structures. + +extract/ + + - Tools to extract the structure layout data from kernels. Various + subdirectories here correspond to the different Linux distributions + and methods of getting at their kernels. + General structure of lib/virt_mem.ml ------------------------------------ diff --git a/Makefile.in b/Makefile.in index 4fcf694..87b5a28 100644 --- a/Makefile.in +++ b/Makefile.in @@ -39,7 +39,7 @@ HAVE_PERLDOC = @HAVE_PERLDOC@ TOOLS = uname dmesg ps export TOOLS -SUBDIRS_NOT_PO = lib $(TOOLS) mem +SUBDIRS_NOT_PO = lib $(TOOLS) mem extract/fedora-koji SUBDIRS = $(SUBDIRS_NOT_PO) po ifeq ($(HAVE_PERLDOC),perldoc) diff --git a/configure.ac b/configure.ac index 467cd6a..336a672 100644 --- a/configure.ac +++ b/configure.ac @@ -31,6 +31,9 @@ AC_PROG_CC_C_O AC_CHECK_FUNCS([memmem]) +dnl Do we have pahole (from acme's "dwarves" library)? +AC_PATH_PROG(PAHOLE,pahole) + dnl Check for basic OCaml environment & findlib. AC_PROG_OCAML AC_PROG_FINDLIB @@ -70,6 +73,7 @@ fi dnl Check for optional OCaml packages. AC_CHECK_OCAML_PKG(gettext) AC_CHECK_OCAML_PKG(csv) +AC_CHECK_OCAML_PKG(xmlrpc-light) AC_SUBST(pkg_unix) AC_SUBST(pkg_extlib) @@ -78,6 +82,7 @@ AC_SUBST(pkg_xml_light) AC_SUBST(pkg_bitstring) AC_SUBST(pkg_gettext) AC_SUBST(pkg_csv) +AC_SUBST(pkg_xmlrpc_light) dnl Check for optional perldoc (for building manual pages). AC_CHECK_PROG(HAVE_PERLDOC,perldoc,perldoc) @@ -151,5 +156,6 @@ AC_CONFIG_FILES([Makefile ps/Makefile mem/Makefile po/Makefile + extract/fedora-koji/Makefile ]) AC_OUTPUT diff --git a/extract/README b/extract/README new file mode 100644 index 0000000..f19fc7c --- /dev/null +++ b/extract/README @@ -0,0 +1,7 @@ + +This directory contains distro-specific tools for downloading kernels +and extracting the information we need from them. + +fedora-koji/ + + - For recent Fedora kernels built at http://koji.fedoraproject.org/ diff --git a/extract/fedora-koji/Makefile.in b/extract/fedora-koji/Makefile.in new file mode 100644 index 0000000..5fabb01 --- /dev/null +++ b/extract/fedora-koji/Makefile.in @@ -0,0 +1,52 @@ +# virt-mem +# @configure_input@ +# Copyright (C) 2008 Red Hat Inc., Richard W.M. Jones +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +PACKAGE = @PACKAGE_NAME@ +VERSION = @PACKAGE_VERSION@ + +INSTALL = @INSTALL@ +MKDIR_P = @MKDIR_P@ +bindir = @bindir@ + +pkg_xmlrpc_light = @pkg_xmlrpc_light@ + +OCAMLCPACKAGES = + +ifneq ($(pkg_xmlrpc_light),no) +OCAMLCPACKAGES += -package xmlrpc-light,extlib +endif + +OCAMLCFLAGS = @OCAMLCFLAGS@ +OCAMLCLIBS = -linkpkg + +OCAMLOPTFLAGS = @OCAMLOPTFLAGS@ +OCAMLOPTPACKAGES = $(OCAMLCPACKAGES) +OCAMLOPTLIBS = -linkpkg + +TARGETS = fedora-koji-download-kernels.opt + +OBJS = fedora_koji_download_kernels.cmo +XOBJS = $(OBJS:.cmo=.cmx) + +all: $(TARGETS) + +fedora-koji-download-kernels.opt: $(XOBJS) + ocamlfind ocamlopt \ + $(OCAMLOPTFLAGS) $(OCAMLOPTPACKAGES) $(OCAMLOPTLIBS) $(XOBJS) -o $@ + +include ../../Make.rules \ No newline at end of file diff --git a/extract/fedora-koji/fedora_koji_download_kernels.ml b/extract/fedora-koji/fedora_koji_download_kernels.ml new file mode 100644 index 0000000..45afdb3 --- /dev/null +++ b/extract/fedora-koji/fedora_koji_download_kernels.ml @@ -0,0 +1,317 @@ +(* Memory info for virtual domains. + (C) Copyright 2008 Richard W.M. Jones, Red Hat Inc. + http://libvirt.org/ + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*) + +(* This is a script which downloads kernels from Fedora and extracts + the kernel layout information. + + The basic plan is as follows: + + (1) Use koji to list out all kernel builds, compare this to + what we have already got (in the kernels/ database at the top level + of the virt-mem source), and download any kernels we haven't + seen already. + + (2) For each kernel, get the kernel-*debuginfo* RPMs (there will + be several, one for each architecture, and one for each variant + such as PAE). + + (3) For each debuginfo RPM, extract the 'vmlinux' (kernel image) + from the RPM. This contains debugging symbols. + + (4) Run 'pahole -E' (from acme's dwarves library) to extract all + the kernel structures. + + (5) Save the kernel name/version/architecture + the output of pahole + in the kernels/ directory (the kernels database). + *) + +open ExtList +open ExtString +open Printf + +let (//) = Filename.concat + +(* Wrappers around the XMLRPC calls. *) +type build = { + package_name : string; (* eg. "kernel" *) + version : string; (* eg. "2.6.25" *) + release : string; (* eg. "1.fc8" *) + build_id : int; +} + +let string_of_build { package_name = package_name; + version = version; release = release; + build_id = build_id } = + sprintf "%d: %s %s %s" build_id package_name version release + +type rpm = { + rpm_id : int; (* RPM ID (for downloading, etc.) *) + rpm_build : build; + rpm_name : string; (* eg. "kernel" *) + rpm_version : string; (* eg. "2.6.25" *) + rpm_release : string; (* eg. "1.fc8" *) + rpm_size : int; (* size in bytes of the RPM. *) + rpm_arch : string; (* architecture *) +} + +let string_of_rpm { rpm_id = id; rpm_build = { build_id = build_id }; + rpm_name = name; + rpm_version = version; rpm_release = release; + rpm_size = size; rpm_arch = arch } = + sprintf "%d: (build %d) %s %s %s (%d bytes) %s" + id build_id name version release size arch + +let get_string_from_struct name items = + match List.assoc name items with + | `String str -> str + | _ -> invalid_arg (name ^ ": expected string type") + +let get_int_from_struct name items = + match List.assoc name items with + | `Int i -> i + | _ -> invalid_arg (name ^ ": expected int type") + +let koji_list_builds rpc ~prefix = + let builds = rpc#call "listBuilds" [ + `Struct [ + (* __starstar is some wierd Python thing which is needed for + * Python optional arguments to work. + *) + "__starstar", `Int 1; + "prefix", `String prefix; + ] + ] in + + match builds with + | `Array builds -> + List.map ( + function + | `Struct items -> + (try + let package_name = get_string_from_struct "package_name" items in + let version = get_string_from_struct "version" items in + let release = get_string_from_struct "release" items in + let build_id = get_int_from_struct "build_id" items in + { package_name = package_name; + version = version; release = release; + build_id = build_id } + with + | Not_found -> + prerr_endline "missing element in build structure from koji listBuilds() calls"; + exit 1 + | Invalid_argument err -> + prerr_endline err; + exit 1 + ) + | t -> + prerr_endline "unexpected type from koji listBuilds() call"; + prerr_endline (XmlRpc.dump t); + exit 1 + ) builds + | t -> + prerr_endline "unexpected type from koji listBuilds() call:"; + prerr_endline (XmlRpc.dump t); + exit 1 + +let koji_list_build_rpms rpc ({ build_id = build_id } as build) = + let rpms = rpc#call "listBuildRPMs" [ `Int build_id ] in + + match rpms with + | `Array rpms -> + List.map ( + function + | `Struct items -> + (try + let name = get_string_from_struct "name" items in + let version = get_string_from_struct "version" items in + let release = get_string_from_struct "release" items in + let build_id' = get_int_from_struct "build_id" items in + let id = get_int_from_struct "id" items in + let size = get_int_from_struct "size" items in + let arch = get_string_from_struct "arch" items in + assert (build_id = build_id'); + { rpm_name = name; rpm_version = version; rpm_release = release; + rpm_build = build; rpm_id = id; rpm_size = size; + rpm_arch = arch } + with + | Not_found -> + prerr_endline "missing element in build structure from koji listBuildRPMs() calls"; + exit 1 + | Invalid_argument err -> + prerr_endline err; + exit 1 + ) + | t -> + prerr_endline "unexpected type from koji listBuildRPMs() call"; + prerr_endline (XmlRpc.dump t); + exit 1 + ) rpms + | t -> + prerr_endline "unexpected type from koji listBuildRPMs() call:"; + prerr_endline (XmlRpc.dump t); + exit 1 + +(* This gets the RPM download URL for an RPM. I can't see a way to + * get this using the Koji API, but the URLs are fairly predictable + * anyway. + *) +let koji_rpm_download_url { rpm_build = { package_name = build_name }; + rpm_name = rpm_name; + rpm_version = version; rpm_release = release; + rpm_arch = arch } = + let filename = sprintf "%s-%s-%s.%s.rpm" rpm_name version release arch in + let uri = sprintf "http://koji.fedoraproject.org/packages/%s/%s/%s/%s/%s" + build_name version release arch filename in + uri, filename + +(* Main program. *) +let main outputdir = + let rpc = new XmlRpc.client "http://koji.fedoraproject.org/kojihub" in + + (* Grab the list of kernel builds from Koji. *) + printf "Downloading list of kernel builds from Koji ...\n%!"; + let builds = koji_list_builds rpc ~prefix:"kernel" in + + (* Only care about "kernel" and "kernel-xen" builds. *) + let builds = List.filter ( + fun { package_name = name } -> + name = "kernel" || name = "kernel-xen" + ) builds in + + let nr_builds = List.length builds in + printf "%d kernel builds found on Koji.\n%!" nr_builds; + + List.iteri ( + fun i build -> + printf "Build %d/%d: %s\n" (i+1) nr_builds (string_of_build build); + + (* List the RPMs in the build. *) + let rpms = koji_list_build_rpms rpc build in + + (* Only care about debuginfo builds, and not debuginfo-common. *) + let contains_string substr name = + try ignore (String.find name substr); true + with Invalid_string -> false + in + let contains_debuginfo = contains_string "debuginfo" in + let contains_common = contains_string "common" in + let rpms = List.filter ( + fun { rpm_name = name } -> + contains_debuginfo name && not (contains_common name) + ) rpms in + + List.iter ( + fun rpm -> + let uri, filename = koji_rpm_download_url rpm in + let infofile = outputdir // filename ^ ".info" in + + let infoexists = + try ignore (Unix.access infofile [Unix.F_OK]); true + with Unix.Unix_error _ -> false in + + if infoexists then + printf "Skipping %s\n%!" (string_of_rpm rpm) + else ( + printf "%s\n%!" (string_of_rpm rpm); + + let run cmd = + let r = Sys.command cmd in + if r <> 0 then + failwith (sprintf "%s: command exited with code %d" cmd r) + in + + (* Function to clean up the RPM & the temporary subdirectory + * (usr/, used for unpacking the RPM). + *) + let cleanup () = + (try Unix.unlink filename with _ -> ()); + ignore (Sys.command "rm -rf usr/") + in + + cleanup (); + + try + Std.finally cleanup ( + fun () -> + (* Download the RPM. + * + * Could use ocurl here (the OCaml CURL library) but + * using CURL as a library is generally more trouble + * than it's worth. So shell out to 'wget' instead. + *) + printf "Downloading RPM ...\n%!"; + run (sprintf "wget --quiet %s" (Filename.quote uri)); + + printf "Finished downloading RPM.\n%!"; + + (* Unpack vmlinux binary from the RPM. *) + run (sprintf "rpm2cpio %s | cpio -id --quiet '*/vmlinux'" + (Filename.quote filename)); + + run (sprintf "find usr/ -name vmlinux -print0 | + xargs -0 pahole -E > %s.data" + (Filename.quote outputdir // Filename.quote filename)); + + let chan = open_out infofile in + fprintf chan "Source: fedora-koji\n"; + fprintf chan "Distribution: Fedora\n"; + fprintf chan "RPM_id: %d\n" rpm.rpm_id; + fprintf chan "RPM_build_id: %d\n" rpm.rpm_build.build_id; + fprintf chan "Name: %s\n" rpm.rpm_name; + fprintf chan "Version: %s\n" rpm.rpm_version; + fprintf chan "Release: %s\n" rpm.rpm_release; + fprintf chan "Architecture: %s\n" rpm.rpm_arch; + fprintf chan "RPM_size: %d\n" rpm.rpm_size; + fprintf chan "\n"; + close_out chan; + + run (sprintf "rpm -qip %s >> %s" + (Filename.quote filename) (Filename.quote infofile)); + ) () + with + Failure msg -> + eprintf "%s\n%!" msg (* but continue to next RPM ... *) + ) + ) rpms; + + exit 2 + ) builds + +let () = + Random.self_init (); + + (* Create a temporary work directory, chdir into there to run the + * main program, then ensure that the temporary directory is cleaned + * up when we exit. + *) + let olddir = Unix.getcwd () in + let tmpdir = + sprintf "%s/tmp%d%Ld" + Filename.temp_dir_name + (Unix.getpid ()) (Random.int64 Int64.max_int) in + + Unix.mkdir tmpdir 0o700; + Sys.chdir tmpdir; + + let cleanup () = + Sys.chdir olddir; + ignore (Sys.command (sprintf "rm -rf %s" (Filename.quote tmpdir))) + in + + Std.finally cleanup (fun () -> main olddir) () -- 1.8.3.1