#include <config.h>
+#include <errno.h>
+#include <dirent.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "guestfs-internal-actions.h"
#include "guestfs_protocol.h"
+/* Old-style appliance is going to be obsoleted. */
static const char *kernel_name = "vmlinuz." host_cpu;
static const char *initrd_name = "initramfs." host_cpu ".img";
static int find_path (guestfs_h *g, int (*pred) (guestfs_h *g, const char *pelem, void *data), void *data, char **pelem);
static int dir_contains_file (const char *dir, const char *file);
static int dir_contains_files (const char *dir, ...);
-static int contains_supermin_appliance (guestfs_h *g, const char *path, void *data);
static int contains_ordinary_appliance (guestfs_h *g, const char *path, void *data);
+static int contains_supermin_appliance (guestfs_h *g, const char *path, void *data);
static char *calculate_supermin_checksum (guestfs_h *g, const char *supermin_path);
-static int check_for_cached_appliance (guestfs_h *g, const char *supermin_path, const char *checksum, char **kernel, char **initrd, char **appliance);
-static int build_supermin_appliance (guestfs_h *g, const char *supermin_path, const char *checksum, char **kernel, char **initrd, char **appliance);
+static int check_for_cached_appliance (guestfs_h *g, const char *supermin_path, const char *checksum, uid_t uid, char **kernel, char **initrd, char **appliance);
+static int build_supermin_appliance (guestfs_h *g, const char *supermin_path, const char *checksum, uid_t uid, char **kernel, char **initrd, char **appliance);
+static int hard_link_to_cached_appliance (guestfs_h *g, const char *cachedir, char **kernel, char **initrd, char **appliance);
static int run_supermin_helper (guestfs_h *g, const char *supermin_path, const char *cachedir, size_t cdlen);
/* Locate or build the appliance.
* (1) Look for the first element of g->path which contains a
* supermin appliance skeleton. If no element has this, skip
* straight to step (5).
+ *
* (2) Calculate the checksum of this supermin appliance.
- * (3) Check whether $TMPDIR/$checksum/ directory exists, contains
- * a cached appliance, and passes basic security checks. If so,
- * return this appliance.
- * (4) Try to build the supermin appliance into $TMPDIR/$checksum/.
- * If this is successful, return it.
+ *
+ * (3) Check whether a cached appliance with the checksum calculated
+ * in (2) exists and passes basic security checks. If so, return
+ * this appliance.
+ *
+ * (4) Try to build the supermin appliance. If this is successful,
+ * return it.
+ *
* (5) Check each element of g->path, looking for an ordinary appliance.
* If one is found, return it.
+ *
+ * The supermin appliance cache directory lives in
+ * $TMPDIR/.guestfs-$UID/ and consists of four files:
+ *
+ * $TMPDIR/.guestfs-$UID/checksum - the checksum
+ * $TMPDIR/.guestfs-$UID/kernel - symlink to the kernel
+ * $TMPDIR/.guestfs-$UID/initrd - the febootstrap initrd
+ * $TMPDIR/.guestfs-$UID/root - the appliance
+ *
+ * Since multiple instances of libguestfs with the same UID may be
+ * racing to create an appliance, we need to be careful when building
+ * and using the appliance.
+ *
+ * If a cached appliance with checksum exists (step (2) above) then we
+ * make a hard link to it with our current PID, so that we have a copy
+ * even if the appliance is replaced by another process building an
+ * appliance afterwards:
+ *
+ * $TMPDIR/.guestfs-$UID/kernel.$PID
+ * $TMPDIR/.guestfs-$UID/initrd.$PID
+ * $TMPDIR/.guestfs-$UID/root.$PID
+ *
+ * A lock is taken on "checksum" while we perform the link.
+ *
+ * Linked files are deleted by a garbage collection sweep which can be
+ * initiated by any libguestfs process with the same UID when the
+ * corresponding PID no longer exists. (This is safe: the parent is
+ * always around in guestfs_launch() while qemu is starting up, and
+ * after that qemu will either have finished with the files or be
+ * holding them open, so we can unlink them).
+ *
+ * When building a new appliance (step (3)), it is built into randomly
+ * named temporary files in the $TMPDIR. Then a lock is acquired on
+ * $TMPDIR/.guestfs-$UID/checksum (this file being created if
+ * necessary), the files are renamed into their final location, and
+ * the lock is released.
*/
int
guestfs___build_appliance (guestfs_h *g,
char **kernel, char **initrd, char **appliance)
{
int r;
+ uid_t uid = geteuid ();
/* Step (1). */
char *supermin_path;
char *checksum = calculate_supermin_checksum (g, supermin_path);
if (checksum) {
/* Step (3): cached appliance exists? */
- r = check_for_cached_appliance (g, supermin_path, checksum,
+ r = check_for_cached_appliance (g, supermin_path, checksum, uid,
kernel, initrd, appliance);
if (r != 0) {
free (supermin_path);
}
/* Step (4): build supermin appliance. */
- r = build_supermin_appliance (g, supermin_path, checksum,
+ r = build_supermin_appliance (g, supermin_path, checksum, uid,
kernel, initrd, appliance);
free (supermin_path);
free (checksum);
}
static int
-contains_supermin_appliance (guestfs_h *g, const char *path, void *data)
+contains_ordinary_appliance (guestfs_h *g, const char *path, void *data)
{
- return dir_contains_files (path, "supermin.d", "kmod.whitelist", NULL);
+ return dir_contains_files (path, kernel_name, initrd_name, NULL);
}
static int
-contains_ordinary_appliance (guestfs_h *g, const char *path, void *data)
+contains_supermin_appliance (guestfs_h *g, const char *path, void *data)
{
- return dir_contains_files (path, kernel_name, initrd_name, NULL);
+ return dir_contains_files (path, "supermin.d", NULL);
}
/* supermin_path is a path which is known to contain a supermin
{
size_t len = 2 * strlen (supermin_path) + 256;
char cmd[len];
- snprintf (cmd, len,
- "febootstrap-supermin-helper%s "
- "-f checksum "
- "'%s/supermin.d' "
- host_cpu,
- g->verbose ? " --verbose" : "",
- supermin_path);
+ int pass_u_g_args = getuid () != geteuid () || getgid () != getegid ();
+
+ if (!pass_u_g_args)
+ snprintf (cmd, len,
+ "febootstrap-supermin-helper%s "
+ "-f checksum "
+ "'%s/supermin.d' "
+ host_cpu,
+ g->verbose ? " --verbose" : "",
+ supermin_path);
+ else
+ snprintf (cmd, len,
+ "febootstrap-supermin-helper%s "
+ "-u %i "
+ "-g %i "
+ "-f checksum "
+ "'%s/supermin.d' "
+ host_cpu,
+ g->verbose ? " --verbose" : "",
+ geteuid (), getegid (),
+ supermin_path);
if (g->verbose)
guestfs___print_timestamped_message (g, "%s", cmd);
return safe_strndup (g, checksum, len);
}
-/* Check for cached appliance in $TMPDIR/$checksum. Check it exists
- * and passes some basic security checks.
- *
- * Returns:
- * 1 = exists, and passes
- * 0 = does not exist
- * -1 = error which should abort the whole launch process
- */
static int
-security_check_cache_file (guestfs_h *g, const char *filename,
- const struct stat *statbuf)
+process_exists (int pid)
{
- uid_t uid = geteuid ();
+ if (kill (pid, 0) == 0)
+ return 1;
- if (statbuf->st_uid != uid) {
- error (g, ("libguestfs cached appliance %s is not owned by UID %d\n"),
- filename, uid);
- return -1;
- }
+ if (errno == ESRCH)
+ return 0;
- if ((statbuf->st_mode & 0022) != 0) {
- error (g, ("libguestfs cached appliance %s is writable by group or other (mode %o)\n"),
- filename, statbuf->st_mode);
- return -1;
+ return -1;
+}
+
+/* Garbage collect appliance hard links. Files that match
+ * (kernel|initrd|root).$PID where the corresponding PID doesn't exist
+ * are deleted. Note that errors in this function don't matter.
+ * There may also be other libguestfs processes racing to do the same
+ * thing here.
+ */
+static void
+garbage_collect_appliances (const char *cachedir)
+{
+ DIR *dir;
+ struct dirent *d;
+ int pid;
+
+ dir = opendir (cachedir);
+ if (dir == NULL)
+ return;
+
+ while ((d = readdir (dir)) != NULL) {
+ if (sscanf (d->d_name, "kernel.%d", &pid) == 1 &&
+ process_exists (pid) == 0)
+ unlinkat (dirfd (dir), d->d_name, 0);
+ else if (sscanf (d->d_name, "initrd.%d", &pid) == 1 &&
+ process_exists (pid) == 0)
+ unlinkat (dirfd (dir), d->d_name, 0);
+ else if (sscanf (d->d_name, "root.%d", &pid) == 1 &&
+ process_exists (pid) == 0)
+ unlinkat (dirfd (dir), d->d_name, 0);
}
- return 0;
+ closedir (dir);
}
static int
check_for_cached_appliance (guestfs_h *g,
const char *supermin_path, const char *checksum,
+ uid_t uid,
char **kernel, char **initrd, char **appliance)
{
const char *tmpdir = guestfs_tmpdir ();
- size_t len = strlen (tmpdir) + strlen (checksum) + 2;
+ /* len must be longer than the length of any pathname we can
+ * generate in this function.
+ */
+ size_t len = strlen (tmpdir) + 128;
char cachedir[len];
- snprintf (cachedir, len, "%s/%s", tmpdir, checksum);
+ snprintf (cachedir, len, "%s/.guestfs-%d", tmpdir, uid);
+ char filename[len];
+ snprintf (filename, len, "%s/checksum", cachedir);
- /* Touch the directory to prevent it being deleting in a rare race
- * between us doing the checks and a tmp cleaner running. Note this
- * doesn't create the directory, and we ignore any error.
- */
- (void) utime (cachedir, NULL);
+ (void) mkdir (cachedir, 0755);
/* See if the cache directory exists and passes some simple checks
- * to make sure it has not been tampered with. Note that geteuid()
- * forms a part of the checksum.
+ * to make sure it has not been tampered with.
*/
struct stat statbuf;
if (lstat (cachedir, &statbuf) == -1)
return 0;
-
- if (security_check_cache_file (g, cachedir, &statbuf) == -1)
+ if (statbuf.st_uid != uid) {
+ error (g, _("security: cached appliance %s is not owned by UID %d"),
+ filename, uid);
return -1;
+ }
+ if (!S_ISDIR (statbuf.st_mode)) {
+ error (g, _("security: cached appliance %s is not a directory (mode %o)"),
+ filename, statbuf.st_mode);
+ return -1;
+ }
+ if ((statbuf.st_mode & 0022) != 0) {
+ error (g, _("security: cached appliance %s is writable by group or other (mode %o)"),
+ cachedir, statbuf.st_mode);
+ return -1;
+ }
- int ret;
-
- *kernel = safe_malloc (g, len + 8 /* / + "kernel" + \0 */);
- *initrd = safe_malloc (g, len + 8 /* / + "initrd" + \0 */);
- *appliance = safe_malloc (g, len + 6 /* / + "root" + \0 */);
- sprintf (*kernel, "%s/kernel", cachedir);
- sprintf (*initrd, "%s/initrd", cachedir);
- sprintf (*appliance, "%s/root", cachedir);
+ (void) utime (cachedir, NULL);
- /* Touch the files to prevent them being deleted, and to bring the
- * cache up to date. Note this doesn't create the files.
- */
- (void) utime (*kernel, NULL);
+ garbage_collect_appliances (cachedir);
- /* NB. *kernel is a symlink, so we want to check the kernel, not the
- * link (stat, not lstat). We don't do a security check on the
- * kernel since it's always under /boot.
- */
- if (stat (*kernel, &statbuf) == -1) {
- ret = 0;
- goto error;
+ /* Try to open and acquire a lock on the checksum file. */
+ int fd = open (filename, O_RDONLY);
+ if (fd == -1)
+ return 0;
+ (void) futimens (fd, NULL);
+ struct flock fl;
+ fl.l_type = F_RDLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 1;
+ again:
+ if (fcntl (fd, F_SETLKW, &fl) == -1) {
+ if (errno == EINTR)
+ goto again;
+ perrorf (g, "fcntl: F_SETLKW: %s", filename);
+ close (fd);
+ return -1;
}
- (void) utime (*initrd, NULL);
-
- if (lstat (*initrd, &statbuf) == -1) {
- ret = 0;
- goto error;
+ /* Read the checksum file. */
+ size_t clen = strlen (checksum);
+ char checksum_on_disk[clen];
+ ssize_t rr = read (fd, checksum_on_disk, clen);
+ if (rr == -1) {
+ perrorf (g, "read: %s", filename);
+ close (fd);
+ return -1;
}
-
- if (security_check_cache_file (g, *initrd, &statbuf) == -1) {
- ret = -1;
- goto error;
+ if ((size_t) rr != clen) {
+ close (fd);
+ return 0;
}
- (void) utime (*appliance, NULL);
+ if (memcmp (checksum, checksum_on_disk, clen) != 0) {
+ close (fd);
+ return 0;
+ }
- if (lstat (*appliance, &statbuf) == -1) {
- ret = 0;
- goto error;
+ /* At this point, cachedir exists, and checksum matches, and we have
+ * a read lock on the checksum file. Make hard links to the files.
+ */
+ if (hard_link_to_cached_appliance (g, cachedir,
+ kernel, initrd, appliance) == -1) {
+ close (fd);
+ return -1;
}
- if (security_check_cache_file (g, *appliance, &statbuf) == -1) {
- ret = -1;
- goto error;
+ /* Releases the lock on checksum. */
+ if (close (fd) == -1) {
+ perrorf (g, "close");
+ return -1;
}
/* Exists! */
return 1;
-
- error:
- free (*kernel);
- free (*initrd);
- free (*appliance);
- return ret;
}
-/* Build supermin appliance from supermin_path to $TMPDIR/$checksum.
+/* Build supermin appliance from supermin_path to $TMPDIR/.guestfs-$UID.
*
* Returns:
* 0 = built
static int
build_supermin_appliance (guestfs_h *g,
const char *supermin_path, const char *checksum,
+ uid_t uid,
char **kernel, char **initrd, char **appliance)
{
if (g->verbose)
guestfs___print_timestamped_message (g, "begin building supermin appliance");
const char *tmpdir = guestfs_tmpdir ();
- size_t cdlen = strlen (tmpdir) + strlen (checksum) + 2;
- char cachedir[cdlen];
- snprintf (cachedir, cdlen, "%s/%s", tmpdir, checksum);
-
- /* Don't worry about this failing, because the
- * febootstrap-supermin-helper command will fail if the directory
- * doesn't exist. Note the directory might already exist, eg. if a
- * tmp cleaner has removed the existing appliance but not the
- * directory itself.
+
+ /* len must be longer than the length of any pathname we can
+ * generate in this function.
*/
- (void) mkdir (cachedir, 0755);
+ size_t len = strlen (tmpdir) + 128;
+
+ /* Build the appliance into a temporary directory. */
+ char tmpcd[len];
+ snprintf (tmpcd, len, "%s/guestfs.XXXXXX", tmpdir);
+
+ if (mkdtemp (tmpcd) == NULL) {
+ perrorf (g, "mkdtemp");
+ return -1;
+ }
if (g->verbose)
guestfs___print_timestamped_message (g, "run febootstrap-supermin-helper");
- int r = run_supermin_helper (g, supermin_path, cachedir, cdlen);
+ int r = run_supermin_helper (g, supermin_path, tmpcd, len);
if (r == -1)
return -1;
if (g->verbose)
guestfs___print_timestamped_message (g, "finished building supermin appliance");
- *kernel = safe_malloc (g, cdlen + 8 /* / + "kernel" + \0 */);
- *initrd = safe_malloc (g, cdlen + 8 /* / + "initrd" + \0 */);
- *appliance = safe_malloc (g, cdlen + 6 /* / + "root" + \0 */);
- sprintf (*kernel, "%s/kernel", cachedir);
- sprintf (*initrd, "%s/initrd", cachedir);
- sprintf (*appliance, "%s/root", cachedir);
+ char cachedir[len];
+ snprintf (cachedir, len, "%s/.guestfs-%d", tmpdir, uid);
+ char filename[len];
+ char filename2[len];
+ snprintf (filename, len, "%s/checksum", cachedir);
+
+ /* Open and acquire write lock on checksum file. The file might
+ * not exist, in which case we want to create it.
+ */
+ int fd = open (filename, O_WRONLY|O_CREAT, 0755);
+ if (fd == -1) {
+ perrorf (g, "open: %s", filename);
+ return -1;
+ }
+ struct flock fl;
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 1;
+ again:
+ if (fcntl (fd, F_SETLKW, &fl) == -1) {
+ if (errno == EINTR)
+ goto again;
+ perrorf (g, "fcntl: F_SETLKW: %s", filename);
+ close (fd);
+ return -1;
+ }
+
+ /* At this point we have acquired a write lock on the checksum
+ * file so we go ahead and replace it with the new checksum, and
+ * rename in appliance files into this directory.
+ */
+ size_t clen = strlen (checksum);
+ if (ftruncate (fd, clen) == -1) {
+ perrorf (g, "ftruncate: %s", filename);
+ close (fd);
+ return -1;
+ }
+
+ ssize_t rr = write (fd, checksum, clen);
+ if (rr == -1) {
+ perrorf (g, "write: %s", filename);
+ close (fd);
+ return -1;
+ }
+ if ((size_t) rr != clen) {
+ error (g, "partial write: %s", filename);
+ close (fd);
+ return -1;
+ }
+
+ snprintf (filename, len, "%s/kernel", tmpcd);
+ snprintf (filename2, len, "%s/kernel", cachedir);
+ unlink (filename2);
+ if (rename (filename, filename2) == -1) {
+ perrorf (g, "rename: %s %s", filename, filename2);
+ close (fd);
+ return -1;
+ }
+
+ snprintf (filename, len, "%s/initrd", tmpcd);
+ snprintf (filename2, len, "%s/initrd", cachedir);
+ unlink (filename2);
+ if (rename (filename, filename2) == -1) {
+ perrorf (g, "rename: %s %s", filename, filename2);
+ close (fd);
+ return -1;
+ }
+
+ snprintf (filename, len, "%s/root", tmpcd);
+ snprintf (filename2, len, "%s/root", cachedir);
+ unlink (filename2);
+ if (rename (filename, filename2) == -1) {
+ perrorf (g, "rename: %s %s", filename, filename2);
+ close (fd);
+ return -1;
+ }
+
+ rmdir (tmpcd);
+
+ /* Now finish off by linking to the cached appliance and returning it. */
+ if (hard_link_to_cached_appliance (g, cachedir,
+ kernel, initrd, appliance) == -1) {
+ close (fd);
+ return -1;
+ }
+
+ /* Releases the lock on checksum. */
+ if (close (fd) == -1) {
+ perrorf (g, "close");
+ return -1;
+ }
return 0;
}
+/* NB: lock on checksum file must be held when this is called. */
+static int
+hard_link_to_cached_appliance (guestfs_h *g,
+ const char *cachedir,
+ char **kernel, char **initrd, char **appliance)
+{
+ pid_t pid = getpid ();
+ size_t len = strlen (cachedir) + 32;
+
+ *kernel = safe_malloc (g, len);
+ *initrd = safe_malloc (g, len);
+ *appliance = safe_malloc (g, len);
+ snprintf (*kernel, len, "%s/kernel.%d", cachedir, pid);
+ snprintf (*initrd, len, "%s/initrd.%d", cachedir, pid);
+ snprintf (*appliance, len, "%s/root.%d", cachedir, pid);
+
+ char filename[len];
+ snprintf (filename, len, "%s/kernel", cachedir);
+ (void) unlink (*kernel);
+ if (link (filename, *kernel) == -1) {
+ perrorf (g, "link: %s %s", filename, *kernel);
+ goto error;
+ }
+ (void) lutimes (filename, NULL); /* lutimes because it's a symlink */
+
+ snprintf (filename, len, "%s/initrd", cachedir);
+ (void) unlink (*initrd);
+ if (link (filename, *initrd) == -1) {
+ perrorf (g, "link: %s %s", filename, *initrd);
+ goto error;
+ }
+ (void) utime (filename, NULL);
+
+ snprintf (filename, len, "%s/root", cachedir);
+ (void) unlink (*appliance);
+ if (link (filename, *appliance) == -1) {
+ perrorf (g, "link: %s %s", filename, *appliance);
+ goto error;
+ }
+ (void) utime (filename, NULL);
+
+ return 0;
+
+ error:
+ free (*kernel);
+ free (*initrd);
+ free (*appliance);
+ return -1;
+}
+
/* Run febootstrap-supermin-helper and tell it to generate the
- * appliance. Note that we have to do an explicit fork/exec here.
- * 'system' goes via the shell, and on systems that have bash, bash
- * has a misfeature where it resets the euid to uid which breaks
- * virt-v2v. 'posix_spawn' was also considered but that doesn't allow
- * us to reset the umask.
+ * appliance.
*/
static int
run_supermin_helper (guestfs_h *g, const char *supermin_path,
const char *argv[30];
size_t i = 0;
+ char uid[32];
+ snprintf (uid, sizeof uid, "%i", geteuid ());
+ char gid[32];
+ snprintf (gid, sizeof gid, "%i", getegid ());
char supermin_d[pathlen + 32];
snprintf (supermin_d, pathlen + 32, "%s/supermin.d", supermin_path);
char kernel[cdlen + 32];
char root[cdlen + 32];
snprintf (root, cdlen + 32, "%s/root", cachedir);
+ int pass_u_g_args = getuid () != geteuid () || getgid () != getegid ();
+
argv[i++] = "febootstrap-supermin-helper";
if (g->verbose)
argv[i++] = "--verbose";
+ if (pass_u_g_args) {
+ argv[i++] = "-u";
+ argv[i++] = uid;
+ argv[i++] = "-g";
+ argv[i++] = gid;
+ }
argv[i++] = "-f";
argv[i++] = "ext2";
argv[i++] = supermin_d;
*/
umask (0022);
- /* Set uid/gid in the child. This is a workaround for a misfeature
- * in bash which breaks virt-v2v - see the comment at the top of
- * this function.
- */
- if (getuid () == 0) {
- int egid = getegid ();
- int euid = geteuid ();
-
- if (egid != 0 || euid != 0) {
- if (seteuid (0) == -1) {
- perror ("seteuid");
- _exit (EXIT_FAILURE);
- }
-
- if (setgid (egid) == -1) {
- perror ("setgid");
- _exit (EXIT_FAILURE);
- }
-
- if (setuid (euid) == -1) {
- perror ("setuid");
- _exit (EXIT_FAILURE);
- }
- }
- }
execvp ("febootstrap-supermin-helper", (char * const *) argv);
perror ("execvp");
_exit (EXIT_FAILURE);