From: Richard W.M. Jones Date: Thu, 13 Oct 2011 14:37:56 +0000 (+0100) Subject: virt-sysprep: Switch to using guestmount, add more features. X-Git-Tag: 1.13.21~15 X-Git-Url: http://git.annexia.org/?p=libguestfs.git;a=commitdiff_plain;h=9e382f1ae33ed25f012596b801e9cc4a440cb74b virt-sysprep: Switch to using guestmount, add more features. This switches virt-sysprep to use guestmount instead of guestfish. This makes the script a little bit easier to modify for sysadmins. This commit also adds: - dhcp-client-state - dhcp-server-state - logfiles - random-seed - smolt-uuid - yum-uuid SELinux relabelling, and a section on security in the manual page. --- diff --git a/Makefile.am b/Makefile.am index 3d44c76..5170ec8 100644 --- a/Makefile.am +++ b/Makefile.am @@ -38,9 +38,6 @@ SUBDIRS += fish # virt-tools in C. SUBDIRS += align cat df edit inspector rescue -# virt-tools in shell. -SUBDIRS += clone - # Language bindings. if HAVE_PERL SUBDIRS += perl perl/examples @@ -88,6 +85,11 @@ if HAVE_FUSE SUBDIRS += fuse endif +# virt-tools in shell. This uses guestmount. +if HAVE_FUSE +SUBDIRS += clone +endif + # po-docs must come after tools, inspector. if HAVE_PO4A SUBDIRS += po-docs diff --git a/README b/README index 396c8bb..9d18eeb 100644 --- a/README +++ b/README @@ -134,6 +134,12 @@ To build the Perl tools: - perl-libintl for translating perl code (optional) +To run virt-sysprep: + +- xmlstarlet (optional) + +- virt-sysprep also requires FUSE support since it uses guestmount + Building ---------------------------------------------------------------------- diff --git a/TODO b/TODO index bb9af78..a82ad57 100644 --- a/TODO +++ b/TODO @@ -537,7 +537,6 @@ virt-sysprep ideas ------------------ - touch /.unconfigured ? - - smolt ID - Spacewalk / RHN ID - Kerberos keys - Puppet registration @@ -549,11 +548,8 @@ virt-sysprep ideas . install a firstboot script virt-sysprep --script=/tmp/foo.sh . run an external shell script . run external guestfish script virt-sysprep --fish=/tmp/foo.fish - . rm /var/cache/apt/archives/*deb - - log files (thanks Steve Grubb) - . as well as the obvious log files, also - utmp/wtmp/btmp/tallylog and pam_faillock's data files - - RNG seed (Steve Grubb) + . rm /var/cache/apt/archives/* + - /var/run/* and pam_faillock's data files - homedirs/.ssh directory, especially /root/.ssh (Steve Grubb) - if drives are encrypted, then dm-crypt key should be changed and drives all re-encrypted @@ -564,20 +560,13 @@ virt-sysprep ideas may have picked up some certificates or things. This is an area that you would want to look into. - secure erase of inodes etc using scrub (Steve Grubb) - - touch /.autorelabel if we create any new files (thanks Dan Berrange) - - should we use guestmount instead of guestfish - and would that make it easier to run the tool inside VMs? - other directories that could require cleaning include: - /var/lib/dhcpd/* - /var/lib/dhclient/* /var/cache/gdm/* /var/lib/fprint/* /var/run/* /var/spool/mail/* /var/spool/cron/* /var/lib/AccountService/users/* - /var/cache/yum/* - /var/lib/yum/* (only /var/lib/yum/uuid) /var/lib/sss/db/* /var/lib/samba/* /var/lib/samba/*/* diff --git a/clone/virt-sysprep.in b/clone/virt-sysprep.in index cf4ad96..981ed96 100644 --- a/clone/virt-sysprep.in +++ b/clone/virt-sysprep.in @@ -21,9 +21,12 @@ unset CDPATH program="virt-sysprep" version="@PACKAGE_VERSION@" +# Uncomment this to see every shell command that is executed. +#set -x + TEMP=`getopt \ -o a:c:d:vVx \ - --long help,add:,connect:,domain:,enable:,format::,hostname:,list-operations,verbose,version \ + --long help,add:,connect:,domain:,enable:,format::,hostname:,list-operations,selinux-relabel,no-selinux-relabel,verbose,version \ -n $program -- "$@"` if [ $? != 0 ]; then echo "$program: problem parsing the command line arguments" @@ -31,18 +34,15 @@ if [ $? != 0 ]; then fi eval set -- "$TEMP" -# This array accumulates the arguments we pass through to guestfish. -declare -a guestfish -guestfish[0]="guestfish" -guestfish[1]="--rw" -guestfish[2]="--listen" -guestfish[3]="-i" -i=4 +# This array accumulates the arguments we pass through to guestmount. +declare -a params +i=0 verbose= add_params=0 enable= hostname_param=localhost.localdomain +selinux_relabel=auto usage () { @@ -59,17 +59,17 @@ usage () while true; do case "$1" in -a|--add) - guestfish[i++]="-a" - guestfish[i++]="$2" + params[i++]="-a" + params[i++]="$2" ((add_params++)) shift 2;; -c|--connect) - guestfish[i++]="-c" - guestfish[i++]="$2" + params[i++]="-c" + params[i++]="$2" shift 2;; -d|--domain) - guestfish[i++]="-d" - guestfish[i++]="$2" + params[i++]="-d" + params[i++]="$2" ((add_params++)) shift 2;; --enable) @@ -81,9 +81,9 @@ while true; do shift 2;; --format) if [ -n "$2" ]; then - guestfish[i++]="--format=$2" + params[i++]="--format=$2" else - guestfish[i++]="--format" + params[i++]="--format" fi shift 2;; --help) @@ -94,15 +94,29 @@ while true; do --list-operations) enable=list shift;; + --selinux-relabel) + selinux_relabel=yes + shift;; + --no-selinux-relabel) + selinux_relabel=no + shift;; -v|--verbose) - guestfish[i++]="-v" + params[i++]="-v" verbose=yes shift;; -V|--version) echo "$program $version" exit 0;; -x) - guestfish[i++]="-x" + # Can't pass the -x option directly to guestmount because + # that stops guestmount from forking, which means we can't + # coordinate with guestmount when it has finished + # initializing. So instead set just the underlying option + # in libguestfs by exporting LIBGUESTFS_TRACE. + # Unfortunately (a) this omits FUSE calls, but don't worry + # about that for now, and more importantly (b) trace + # messages disappear into never-never land after the fork. + export LIBGUESTFS_TRACE=1 shift;; --) shift @@ -117,23 +131,41 @@ done # enable all of these, although some of them are only done on certain # guest types (see details below). if [ -z "$enable" ]; then + dhcp_client_state=yes + dhcp_server_state=yes hostname=yes + logfiles=yes net_hwaddr=yes + random_seed=yes + smolt_uuid=yes ssh_hostkeys=yes udev_persistent_net=yes + yum_uuid=yes elif [ "$enable" = "list" ]; then + echo "dhcp-client-state" + echo "dhcp-server-state" echo "hostname" + echo "logfiles" echo "net-hwaddr" + echo "random-seed" + echo "smolt-uuid" echo "ssh-hostkeys" echo "udev-persistent-net" + echo "yum-uuid" exit 0 else for opt in $(echo "$enable" | sed 's/,/ /g'); do case "$opt" in + dhcp-client-state) dhcp_client_state=yes ;; + dhcp-server-state) dhcp_server_state=yes ;; hostname) hostname=yes ;; + logfiles) logfiles=yes ;; net-hwaddr) net_hwaddr=yes ;; + random-seed) random_seed=yes ;; + smolt-uuid) smolt_uuid=yes ;; ssh-hostkeys) ssh_hostkeys=yes ;; udev-persistent-net) udev_persistent_net=yes ;; + yum-uuid) yum_uuid=yes ;; *) echo "error: unknown --enable feature: $opt" exit 1 @@ -161,126 +193,99 @@ fi set -e if [ "$verbose" = "yes" ]; then - echo command: "${guestfish[@]}" + echo params: "${params[@]}" fi # Create a temporary directory for general purpose use during operations. tmpdir="$(mktemp -d)" -# Increase the amount of memory allocated to the appliance because -# we're using augeas. The user can override this by setting -# $LIBGUESTFS_MEMSIZE before running the script. -export LIBGUESTFS_MEMSIZE=${LIBGUESTFS_MEMSIZE:-2048} - -# Call guestfish. -GUESTFISH_PID= -eval $("${guestfish[@]}") -if [ -z "$GUESTFISH_PID" ]; then - echo "$program: guestfish didn't start up, see error messages above" - exit 1 -fi - -# Helper. -gf="guestfish --remote --" - cleanup () { - $gf exit >/dev/null 2>&1 ||: - rm -rf "$tmpdir" ||: + if [ -d $tmpdir/mnt ]; then + fusermount -u $tmpdir/mnt >/dev/null 2>&1 ||: + fi + rm -rf $tmpdir ||: } trap cleanup EXIT ERR -# Launch back-end, inspect for operating systems, and get the guest -# root disk. -root=$($gf inspect-get-roots) - -if [ "$root" = "" ]; then - echo "$program: no operating system was found on this disk" - exit 1 -fi - -if [ "$verbose" = "yes" ]; then - echo root: "$root" -fi - -# Get the guest type. -type="$($gf -inspect-get-type $root)" +# Run virt-inspector and grab inspection information about this guest. +virt-inspector "${params[@]}" > $tmpdir/xml +xmlstarlet sel -t -c \ + "string(/operatingsystems/operatingsystem[position()=1]/name)" \ + $tmpdir/xml > $tmpdir/type +xmlstarlet sel -t -c \ + "string(/operatingsystems/operatingsystem[position()=1]/distro)" \ + $tmpdir/xml > $tmpdir/distro ||: +xmlstarlet sel -t -c \ + "string(/operatingsystems/operatingsystem[position()=1]/package_format)" \ + $tmpdir/xml > $tmpdir/package_format ||: +xmlstarlet sel -t -c \ + "string(/operatingsystems/operatingsystem[position()=1]/package_management)" \ + $tmpdir/xml > $tmpdir/package_management ||: + +type="$(cat $tmpdir/type)" +distro="$(cat $tmpdir/distro)" +package_format="$(cat $tmpdir/package_format)" +package_management="$(cat $tmpdir/package_management)" + +# Mount the disk. +mkdir $tmpdir/mnt +guestmount --rw -i "${params[@]}" $tmpdir/mnt + +mnt="$tmpdir/mnt" -if [ "$type" = "linux" ]; then - distro="$($gf -inspect-get-distro $root)" -fi +#---------------------------------------------------------------------- +# The sysprep operations. -if [ "$type" = "windows" ]; then - systemroot="$($gf -inspect-get-windows-systemroot $root)" +if [ "$dhcp_client_state" = "yes" ]; then + case "$type" in + linux) + rm -rf $mnt/var/lib/dhclient/* + # RHEL 3: + rm -rf $mnt/var/lib/dhcp/* + ;; + esac fi -# Start Augeas if it's a Linux guest. -if [ "$type" = "linux" ]; then - $gf aug-init / 0 - using_augeas=yes +if [ "$dhcp_server_state" = "yes" ]; then + case "$type" in + linux) + rm -rf $mnt/var/lib/dhcpd/* + ;; + esac fi -#---------------------------------------------------------------------- -# Useful functions. - -# erase_line filename regex -# -# Erase line(s) in a file that match the given regex. -erase_line () -{ - $gf download "$1" "$tmpdir/file" - sed "/$2/d" < "$tmpdir/file" > "$tmpdir/file.1" - $gf upload "$tmpdir/file.1" "$1" -} - -# prepend_line filename line -# -# Prepend a line to a file (this is better than appending, because it -# works even when the original file isn't terminated with a newline). -prepend_line () -{ - $gf download "$1" "$tmpdir/file" - echo "$2" > "$tmpdir/file.1" - cat "$tmpdir/file.1" "$tmpdir/file" >> "$tmpdir/file.2" - $gf upload "$tmpdir/file.2" "$1" -} - -# rm_files wildcard -# -# Remove files. Doesn't fail if no files exist. Note the wildcard -# parameter cannot contain spaces or characters that need special -# quoting. -rm_files () -{ - files=$($gf glob-expand "$1") - for f in $files; do - $gf rm "$f" - done -} - -# rm_file filename -# -# Remove a single file. No error if the file doesn't exist or is not -# a file. -rm_file () -{ - t=$($gf is-file "$1") - if [ "$t" = "true" ]; then - $gf rm "$1" - fi -} - -#---------------------------------------------------------------------- -# The sysprep operations. - if [ "$hostname" = "yes" ]; then case "$type/$distro" in linux/fedora) - $gf aug-set /files/etc/sysconfig/network/HOSTNAME "$hostname_param" - augeas_save_needed=yes + echo "HOSTNAME=$hostname_param" > $mnt/etc/sysconfig/network.new + sed '/^HOSTNAME=/d' < $mnt/etc/sysconfig/network >> $mnt/etc/sysconfig/network.new + mv -f $mnt/etc/sysconfig/network.new $mnt/etc/sysconfig/network + created_files=yes ;; linux/debian|linux/ubuntu) - $gf write /etc/hostname "$hostname_param" + echo "$hostname_param" > $mnt/etc/hostname + created_files=yes + ;; + esac +fi + +if [ "$logfiles" = "yes" ]; then + case "$type" in + linux) + rm -rf $mnt/var/log/*.log* + rm -rf $mnt/var/log/audit/* + rm -rf $mnt/var/log/btmp* + rm -rf $mnt/var/log/cron* + rm -rf $mnt/var/log/dmesg* + rm -rf $mnt/var/log/lastlog* + rm -rf $mnt/var/log/maillog* + rm -rf $mnt/var/log/mail/* + rm -rf $mnt/var/log/messages* + rm -rf $mnt/var/log/secure* + rm -rf $mnt/var/log/spooler* + rm -rf $mnt/var/log/tallylog* + rm -rf $mnt/var/log/wtmp* ;; esac fi @@ -288,35 +293,77 @@ fi if [ "$net_hwaddr" = "yes" ]; then case "$type/$distro" in linux/fedora) - # XXX these filenames can have spaces and untrusted chars in them! - nodes=$( $gf aug-ls /files/etc/sysconfig/network-scripts | - grep /files/etc/sysconfig/network-scripts/ifcfg- ) - for node in $nodes; do - $gf -aug-rm "$node/HWADDR" >/dev/null - augeas_save_needed=yes - done + if [ -d $mnt/etc/sysconfig/network-scripts ]; then + rm_hwaddr () + { + sed '/^HWADDR=/d' < "$1" > "$1.new" + mv -f "$1.new" "$1" + } + export -f rm_hwaddr + find $mnt/etc/sysconfig/network-scripts \ + -name 'ifcfg-*' -type f \ + -exec bash -c 'rm_hwaddr "$0"' {} \; + created_files=yes + fi ;; esac fi +if [ "$random_seed" = "yes" -a "$type" = "linux" ]; then + f= + if [ -f $mnt/var/lib/random-seed ]; then + # Fedora + f=$mnt/var/lib/random-seed + elif [ -f $mnt/var/lib/urandom/random-seed ]; then + # Debian + f=$mnt/var/lib/urandom/random-seed + fi + if [ -n "$f" ]; then + dd if=/dev/random of="$f" bs=8 count=1 conv=nocreat,notrunc 2>/dev/null + fi +fi + +if [ "$smolt_uuid" = "yes" -a "$type" = "linux" ]; then + rm -f $mnt/etc/sysconfig/hw-uuid + rm -f $mnt/etc/smolt/uuid + rm -f $mnt/etc/smolt/hw-uuid +fi + if [ "$ssh_hostkeys" = "yes" -a "$type" != "windows" ]; then - rm_files "/etc/ssh/*_host_*" + rm -rf $mnt/etc/ssh/*_host_* fi if [ "$udev_persistent_net" = "yes" -a "$type" = "linux" ]; then - rm_file /etc/udev/rules.d/70-persistent-net.rules + rm -f $mnt/etc/udev/rules.d/70-persistent-net.rules +fi + +if [ "$yum_uuid" = "yes" -a "$package_management" = "yum" ]; then + rm -f $mnt/var/lib/yum/uuid fi #---------------------------------------------------------------------- # Clean up and close down. -if [ "$using_augeas" = "yes" -a "$augeas_save_needed" = "yes" ]; then - $gf aug-save - $gf aug-close -fi +# If we created any new files and the guest uses SELinux, then we have +# to relabel the filesystem on boot. Could do with a better way to +# test "guest uses SELinux" (XXX). +case "$selinux_relabel/$created_files" in + yes/*) + touch $mnt/.autorelabel;; + auto/yes) + case "$type/$distro" in + linux/fedora|linux/rhel|linux/centos|linux/scientificlinux|linux/redhat-based) + touch $mnt/.autorelabel + ;; + esac + ;; +esac + +sync + +fusermount -u $tmpdir/mnt +rm -rf $tmpdir -$gf umount-all -$gf sync -$gf exit +trap - EXIT ERR exit 0 diff --git a/clone/virt-sysprep.pod b/clone/virt-sysprep.pod index cc8e44f..fa10b9e 100755 --- a/clone/virt-sysprep.pod +++ b/clone/virt-sysprep.pod @@ -26,6 +26,12 @@ must be shut down. If you want to preserve the existing contents of the guest, you I. See L below. +You do I need to run virt-sysprep as root. In fact we'd +generally recommend that you don't. The time you might want to run it +as root is when you need root in order to access the disk image, but +even in this case it would be better to change the permissions on the +disk image to be writable as the non-root user running virt-sysprep. + "Sysprep" stands for "system preparation" tool. The name comes from the Microsoft program C which is used to unconfigure Windows machines in preparation for cloning them. Having said that, @@ -119,6 +125,16 @@ If not given, defaults to C. List the operations supported by the virt-sysprep program. +=item B<--selinux-relabel> + +=item B<--no-selinux-relabel> + +I<--selinux-relabel> forces SELinux relabelling next time the guest +boots. I<--no-selinux-relabel> disables relabelling. + +The default is to try to detect if SELinux relabelling is required. +See L below for more details. + =item B<-v> =item B<--verbose> @@ -151,22 +167,45 @@ Use a comma-separated list, for example: To list the operations supported by the current version of virt-sysprep, use I<--list-operations>. +=head2 dhcp-client-state + +Remove DHCP client leases. + +=head2 dhcp-server-state + +Remove DHCP server leases. + =head2 hostname -This changes the hostname of the guest to the value given in the +Changes the hostname of the guest to the value given in the I<--hostname> parameter. If the I<--hostname> parameter is not given, then the hostname is changed to C. +=head2 logfiles + +Remove many log files. + =head2 net-hwaddr Remove HWADDR (hard-coded MAC address) configuration. For Fedora and Red Hat Enterprise Linux, this is removed from C files. +=head2 random-seed + +Write some random bytes from the host into the random seed file of +the guest. + +See C below. + +=head2 smolt-uuid + +Remove the Smolt hardware UUID. + =head2 ssh-hostkeys -This erases the SSH host keys in the guest. +Remove the SSH host keys in the guest. The SSH host keys are regenerated (differently) next time the guest is booted. @@ -181,14 +220,21 @@ you a stark warning about the host key changing: =head2 udev-persistent-net -This erases udev persistent net rules which map the guest's existing -MAC address to a fixed ethernet device (eg. eth0). +Remove udev persistent net rules which map the guest's existing MAC +address to a fixed ethernet device (eg. eth0). After a guest is cloned, the MAC address usually changes. Since the old MAC address occupies the old name (eg. eth0), this means the fresh MAC address is assigned to a new name (eg. eth1) and this is usually undesirable. Erasing the udev persistent net rules avoids this. +=head2 yum-uuid + +Remove the yum UUID. + +yum creates a fresh UUID the next time it runs when it notices that +the original UUID has been erased. + =head1 COPYING AND CLONING Virt-sysprep can be used as part of a process of cloning guests, or to @@ -356,6 +402,54 @@ to pay for disk space), then instead of copying the template, you can run L. Virt-resize performs a copy and resize, and thus is ideal for cloning guests from a template. +=head1 SECURITY + +Although virt-sysprep removes some sensitive information from +the guest, it does not pretend to remove all of it. You should +examine the L above, and the implementation of +the operations in the shell script. + +You should also examine the guest afterwards. + +Sensitive files are simply removed. The data they contained may still +exist on the disk, easily recovered with a hex editor or undelete +tool. Use L as one way to remove this content. See +also the L command to get rid of deleted content in +directory entries and inodes. + +=head2 RANDOM SEED + +I<(This section applies to Linux guests only)> + +The virt-sysprep C operation writes a few bytes of +randomness from the host into the guest's random seed file. + +If this is just done once and the guest is cloned from the same +template, then each guest will start with the same entropy, and things +like SSH host keys and TCP sequence numbers may be predictable. + +Therefore you should arrange to add more randomness I cloning +from a template too, which can be done by just enabling the +C operation: + + cp template.img newguest.img + virt-sysprep --enable=random-seed -a newguest.img + +=head2 SELINUX RELABELLING + +I<(This section applies to Linux guests using SELinux only)> + +If any new files are created by virt-sysprep, then virt-sysprep +touches C so that these will be correctly labelled by +SELinux the next time the guest is booted. This process interrupts +boot and can take some time. + +You can force relabelling for all guests by supplying the +I<--selinux-relabel> option. + +You can disable relabelling entirely by supplying the +I<--no-selinux-relabel> option. + =head1 SHELL QUOTING Libvirt guest names can contain arbitrary characters, some of which @@ -376,8 +470,9 @@ L, L, L, L, -L, L, +L, +L, L, L.