From 071e0e59c28ebdade0fa78845535f0592d7858ee Mon Sep 17 00:00:00 2001 From: rjones Date: Wed, 25 Mar 2009 12:49:44 +0000 Subject: [PATCH 1/1] Commit before big code restructuring. --- .cvsignore | 11 +++++ Makefile.am | 49 +++++++++++++++++++ README | 32 +++++++++++++ autogen.sh | 7 +++ configure.ac | 51 ++++++++++++++++++++ repodeps.py | 100 +++++++++++++++++++++++++++++++++++++++ rpmdepsize.ml | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ rpmdepsize.pl | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 540 insertions(+) create mode 100644 .cvsignore create mode 100644 Makefile.am create mode 100644 README create mode 100755 autogen.sh create mode 100644 configure.ac create mode 100644 repodeps.py create mode 100644 rpmdepsize.ml create mode 100755 rpmdepsize.pl diff --git a/.cvsignore b/.cvsignore new file mode 100644 index 0000000..c5b14ab --- /dev/null +++ b/.cvsignore @@ -0,0 +1,11 @@ +Makefile.in +Makefile +aclocal.m4 +autom4te.cache +config.h.in +config.h +config.log +config.status +configure +rpmdepsize +stamp-h1 diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..5e26a55 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,49 @@ +# rpmdepsize Makefile.am +# (C) Copyright 2009 Red Hat Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +# Written by Richard W.M. Jones + +bin_SCRIPTS = \ + rpmdepsize repodeps + +rpmdepsize: rpmdepsize.ml + ocamlfind ocamlopt -package sexplib,unix,extlib,sexplib.syntax -syntax camlp4o -linkpkg $< -o $@ + +repodeps: repodeps.py + rm -f $@ + sed 's,PYTHON,$(PYTHON),' < $< > $@-t + chmod 0555 $@-t + mv $@-t $@ + +man_MANS = \ + rpmdepsize.1 + +if HAVE_PERLDOC + +rpmdepsize.1: rpmdepsize.pl + pod2man \ + --section 1 \ + -c "Virtualization Support" \ + --release "$(PACKAGE_NAME)-$(PACKAGE_VERSION)" \ + $< > $@ + +endif + +EXTRA_DIST = \ + rpmdepsize.1 \ + rpmdepsize.sh \ + repodeps.py diff --git a/README b/README new file mode 100644 index 0000000..4e21920 --- /dev/null +++ b/README @@ -0,0 +1,32 @@ +rpmdepsize +by Richard W.M. Jones +http://et.redhat.com/~rjones/rpmdepsize +---------------------------------------------------------------------- + +This program displays the size of RPMs and their dependencies. It's +useful for shaming RPMs that have too many dependencies or pull in +large amounts of data because of indirect dependencies. + +Please read the manual page rpmdepsize(1) for full details. + +Requirements +---------------------------------------------------------------------- + + perl + + python + + yum + + perldoc + + graphviz + + repoquery + +Build +---------------------------------------------------------------------- + + ./configure + make + sudo make install diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..7218c6f --- /dev/null +++ b/autogen.sh @@ -0,0 +1,7 @@ +#!/bin/sh - + +set -e +set -v +export AUTOMAKE='automake --foreign --add-missing' +autoreconf +./configure "$@" diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..6df2e9d --- /dev/null +++ b/configure.ac @@ -0,0 +1,51 @@ +dnl rpmdepsize configure.ac +dnl (C) Copyright 2009 Red Hat Inc. +dnl +dnl This program is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation; either version 2 of the License, or +dnl (at your option) any later version. +dnl +dnl This program is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +dnl GNU General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with this program; if not, write to the Free Software +dnl Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +dnl +dnl Written by Richard W.M. Jones + +AC_INIT(rpmdepsize,1.0) +AM_INIT_AUTOMAKE + +AC_PATH_PROG(PERL,[perl],[no]) +if test "x$PERL" = "xno" ; then + AC_MSG_FAILURE([perl not found]) +fi + +AC_PATH_PROG(PYTHON,[python],[no]) +if test "x$PYTHON" = "xno" ; then + AC_MSG_FAILURE([python not found]) +fi + +AC_CHECK_PROG(PERLDOC,[perldoc],[perldoc],[no]) +if test "x$PERLDOC" = "xno" ; then + AC_MSG_WARN([perldoc not found - install perl to make man pages]) +fi +AM_CONDITIONAL(HAVE_PERLDOC,[test "$perldoc" != "no"]) + +AC_CHECK_PROG(DOT,[dot],[dot],[no]) +if test "x$DOT" = "xno" ; then + AC_MSG_FAILURE([graphviz not found]) +fi + +AC_CHECK_PROG(REPOQUERY,[repoquery],[repoquery],[no]) +if test "x$REPOQUERY" = "xno" ; then + AC_MSG_FAILURE([repoquery not found]) +fi + +AC_CONFIG_HEADERS([config.h]) +AC_CONFIG_FILES([Makefile]) +AC_OUTPUT diff --git a/repodeps.py b/repodeps.py new file mode 100644 index 0000000..28e7c63 --- /dev/null +++ b/repodeps.py @@ -0,0 +1,100 @@ +#!PYTHON +# repodeps - list recursive dependencies of a package in the repo +# (C) Copyright 2009 Red Hat Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +# Written by Richard W.M. Jones +# Heavily derived from a script by Seth Vidal. + +import yum +import yum.misc +import sys + +yb = yum.YumBase () + +basepkg = yb.pkgSack.returnPackages (patterns=[sys.argv[1]])[0] +deps = dict ({basepkg:False}) + +# Recursively find all the dependencies. +stable = False +while not stable: + stable = True + for pkg in deps.keys(): + if deps[pkg] == False: + deps[pkg] = [] + stable = False + for r in pkg.requires: + ps = yb.whatProvides (r[0], r[1], r[2]) + best = yb._bestPackageFromList (ps.returnPackages ()) + if best.name != pkg.name: + deps[pkg].append (best) + if not deps.has_key (best): + deps[best] = False + deps[pkg] = yum.misc.unique (deps[pkg]) + +# Get the data out of python as fast as possible so we can +# use a serious language for analysis of the tree. +print "(%s (" % basepkg +for pkg in deps.keys(): + print "((nevra %s) (name %s) (epoch %s) (version %s) (release %s) (arch %s) (size %s)" % (pkg, pkg.name, pkg.epoch, pkg.version, pkg.release, pkg.arch, pkg.installedsize) + print "(deps (" + for p in deps[pkg]: + print "%s " % p, + print ")))" +sys.stdout.write ("))") # suppress trailing \n + +# # Function to get the total size of a dependency (ie. size of +# # package + size of all dependencies). +# def total(pkg, seen=None): +# if seen is None: +# seen = dict() +# if not seen.has_key (pkg): +# seen[pkg] = True +# sum = pkg.installedsize +# for p in deps[pkg]: +# sum = sum + total (p, seen) +# return sum +# else: +# return 0 + +# # To speed things up, calculate the total size of each package. +# totals = dict () +# for pkg in deps.keys(): +# totals[pkg] = total (pkg) + +# # Sort the lists of dependencies by total size (largest first). +# def sort_by_totals(a, b): +# if totals[a] > totals[b]: +# return -1 +# if totals[a] == totals[b]: +# return 0 +# if totals[a] < totals[b]: +# return 1 + +# for pkg in deps.keys(): +# deps[pkg].sort (cmp=sort_by_totals) + +# # Iterate over the tree and print out the package details. +# def pr(pkg, indent=0, seen=None): +# if seen is None: +# seen = dict() +# if not seen.has_key (pkg): +# seen[pkg] = True +# print '%s%s %s/%s' % (" "*indent, pkg, pkg.installedsize, totals[pkg]) +# for p in deps[pkg]: +# pr (p, indent+2, seen) + +# pr (basepkg) diff --git a/rpmdepsize.ml b/rpmdepsize.ml new file mode 100644 index 0000000..d55ec36 --- /dev/null +++ b/rpmdepsize.ml @@ -0,0 +1,148 @@ +(* rpmdepsize - visualize the size of RPM dependencies + * (C) Copyright 2009 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Written by Richard W.M. Jones + *) + +open Sexplib +TYPE_CONV_PATH "." + +open ExtList +open Unix +open Printf + +(* This corresponds to the sexpr that we write out from the + * Python code. OCaml will type-check it. + *) +type root_packages = string * packages +and packages = pkg list +and pkg = { + nevra : string; (* name-[epoch:]version-release.arch *) + name : string; + epoch : int; + version : string; + release : string; + arch : string; + size : int64; (* installed size, excl. dirs *) + deps : string list; +} + with sexp + +(* Full dependency representation. This is actually a graph because + * it contains dependency loops. 'deps list' is a ref because we + * update it as we are building it. + *) +type deps = Deps of pkg * deps list ref + +(* Final tree representation, loops removed, and everything we want to + * display stored in the nodes. + *) +type tree = Tree of pkg * int64 * int64 * tree list + +module StringMap = Map.Make (String) +let (+^) = Int64.add +let sum = List.fold_left (+^) 0L +let spaces n = String.make n ' ' + +let () = + (* Run the Python program and read in the generated sexpr. *) + let cmd = + sprintf "./repodeps %s" (Filename.quote Sys.argv.(1)) in + let chan = open_process_in cmd in + ignore (input_line chan); (* drop "Loaded plugins" *) + let root, pkgs = + root_packages_of_sexp (Sexp.of_string (Std.input_all chan)) in + (match close_process_in chan with + | WEXITED 0 -> () + | WEXITED i -> failwith (sprintf "command exited with status %d" i) + | WSIGNALED i | WSTOPPED i -> + failwith (sprintf "command stopped with signal %d" i) + ); + + (* Create the dependency graph, probably contains loops so beware. *) + let deps = List.map (fun pkg -> Deps (pkg, ref [])) pkgs in + let depsmap = + List.fold_left ( + fun map (Deps (pkg, _) as deps) -> + StringMap.add pkg.nevra deps map + ) StringMap.empty deps in + List.iter ( + fun (Deps (pkg, deps)) -> + let deps' = List.map (fun n -> StringMap.find n depsmap) pkg.deps in + deps := List.append !deps deps' + ) deps; + + (* For each package, calculate the total installed size of the package, + * which includes all subpackages pulled in. So it's what would be + * installed if you did 'yum install foo'. + *) + let total pkg = + let seen = ref StringMap.empty in + let rec _total = function + | Deps (pkg, _) when StringMap.mem pkg.nevra !seen -> 0L + | Deps (pkg, { contents = children }) -> + seen := StringMap.add pkg.nevra true !seen; + pkg.size +^ sum (List.map _total children) + in + _total (StringMap.find pkg.nevra depsmap) + in + let totalsmap = + List.fold_left ( + fun map pkg -> StringMap.add pkg.nevra (total pkg) map + ) StringMap.empty pkgs in + + (* Create the final display tree. Each node is sorted so that + * children with the largest contribution come first (on the left). + * We remove packages which are already installed by earlier + * (leftward) packages. At each node we also store total size and + * size of the additional packages. + *) + let tree = + let seen = ref StringMap.empty in + let rec build_tree = function + | Deps (pkg, _) when StringMap.mem pkg.nevra !seen -> None + | Deps (pkg, { contents = children }) -> + (* Sort children by reverse total size. *) + let cmp (Deps (p1, _)) (Deps (p2, _)) = + let t1 = StringMap.find p1.nevra totalsmap in + let t2 = StringMap.find p2.nevra totalsmap in + compare t2 t1 + in + let children = List.sort ~cmp children in + seen := StringMap.add pkg.nevra true !seen; + let children = List.filter_map build_tree children in + let total = StringMap.find pkg.nevra totalsmap in + let childadditional = + let rec sum_child_sizes = function + | Tree (pkg, _, _, children) -> + List.fold_left ( + fun size child -> size +^ sum_child_sizes child + ) pkg.size children + in + sum_child_sizes (Tree (pkg, 0L, 0L, children)) in + Some (Tree (pkg, total, childadditional, children)) + in + Option.get (build_tree (StringMap.find root depsmap)) in + + (* Display tree. *) + let rec display ?(indent=0) = function + | Tree (pkg, total, childadditional, children) -> + printf "%s%s %Ld/%Ld/%Ld\n" + (spaces indent) pkg.nevra pkg.size childadditional total; + List.iter (display ~indent:(indent+2)) children + in + display tree diff --git a/rpmdepsize.pl b/rpmdepsize.pl new file mode 100755 index 0000000..0241a60 --- /dev/null +++ b/rpmdepsize.pl @@ -0,0 +1,142 @@ +#!PERL -w +# rpmdepsize - visualize the size of RPM dependencies +# (C) Copyright 2009 Red Hat Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +# Written by Richard W.M. Jones + +use strict; + +use Getopt::Long; +use Pod::Usage; + +my $man = 0; +my $help = 0; + +GetOptions ('help|?' => \$help, + 'man' => \$man) + or pod2usage (2); +pod2usage (1) if $help || @ARGV == 0; +pod2usage (-exitstatus => 0, -verbose => 2) if $man; + +# Recurse through dependencies until all deps have been found. +my %deps; + +foreach (@ARGV) { + $deps{$_} = [] +} + +my $stable = 0; +while (!$stable) { + $stable = 1; + foreach my $name (sort keys %deps) { + if (@{$deps{$name}} == 0) { + $stable = 0; + add_deps ($name); + } + } +} + +sub add_deps +{ + my $name = shift; + + print "resolving deps in $name ...\n"; + +# repoquery is incredibly slow. Unfortunately python has a +# privileged position into the yum databases, and a python +# script to access this information runs quickly, so this +# is what the alternate implementation below uses. +# my $cmd = +# "repoquery --recursive --resolve -R $name | +# sort -u | awk -F- '{print \$1}'"; + + my $cmd = "./repodeps $name | grep -v '^Loaded plugins:'"; + + open RQ, "$cmd |" or die "$cmd: $!"; + my $n = 0; + while () { + chomp; + push @{$deps{$name}}, $_; + $n++; + $deps{$_} = [] unless exists $deps{$_}; + } + close RQ; + push @{$deps{$name}}, $name if $n == 0; +} + +__END__ + +=head1 NAME + + rpmdepsize - Visualize the size of RPM dependencies + +=head1 SYNOPSIS + + rpmdepsize [--options] package [package ...] + +=head1 OPTIONS + +=over 4 + +=item B<--help> + +Display short usage message and exit. + +=item B<--man> + +Display manual page and exit. + +=back + +=head1 DESCRIPTION + + + + + +=head1 HOME PAGE + +L + +=head1 SEE ALSO + +L, L, L. + +=head1 AUTHORS + +Richard W.M. Jones + +=head1 COPYRIGHT + +(C) Copyright 2009 Red Hat Inc., +L. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +=cut -- 1.8.3.1