2 * Copyright (C) 2009-2010 Red Hat Inc.
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #define _BSD_SOURCE /* for mkdtemp, usleep */
34 #include <sys/select.h>
39 #include <rpc/types.h>
46 #ifdef HAVE_SYS_TYPES_H
47 #include <sys/types.h>
50 #ifdef HAVE_SYS_WAIT_H
54 #ifdef HAVE_SYS_SOCKET_H
55 #include <sys/socket.h>
62 #include <arpa/inet.h>
63 #include <netinet/in.h>
66 #include "glthread/lock.h"
69 #include "guestfs-internal.h"
70 #include "guestfs-internal-actions.h"
71 #include "guestfs_protocol.h"
73 static int launch_appliance (guestfs_h *g);
74 static int qemu_supports (guestfs_h *g, const char *option);
76 /* Add a string to the current command line. */
78 incr_cmdline_size (guestfs_h *g)
80 if (g->cmdline == NULL) {
81 /* g->cmdline[0] is reserved for argv[0], set in guestfs_launch. */
83 g->cmdline = safe_malloc (g, sizeof (char *));
88 g->cmdline = safe_realloc (g, g->cmdline, sizeof (char *) * g->cmdline_size);
92 add_cmdline (guestfs_h *g, const char *str)
94 if (g->state != CONFIG) {
96 _("command line cannot be altered after qemu subprocess launched"));
100 incr_cmdline_size (g);
101 g->cmdline[g->cmdline_size-1] = safe_strdup (g, str);
106 guestfs___checkpoint_cmdline (guestfs_h *g)
108 return g->cmdline_size;
112 guestfs___rollback_cmdline (guestfs_h *g, int pos)
116 assert (g->cmdline_size >= pos);
118 for (i = g->cmdline_size - 1; i >= pos; --i)
119 free (g->cmdline[i]);
121 g->cmdline_size = pos;
124 /* Internal command to return the command line. */
126 guestfs__debug_cmdline (guestfs_h *g)
131 if (g->cmdline == NULL) {
132 r = safe_malloc (g, sizeof (char *) * 1);
137 r = safe_malloc (g, sizeof (char *) * (g->cmdline_size + 1));
138 r[0] = safe_strdup (g, g->qemu); /* g->cmdline[0] is always NULL */
140 for (i = 1; i < g->cmdline_size; ++i)
141 r[i] = safe_strdup (g, g->cmdline[i]);
143 r[g->cmdline_size] = NULL;
145 return r; /* caller frees */
149 guestfs__config (guestfs_h *g,
150 const char *qemu_param, const char *qemu_value)
152 if (qemu_param[0] != '-') {
153 error (g, _("guestfs_config: parameter must begin with '-' character"));
157 /* A bit fascist, but the user will probably break the extra
158 * parameters that we add if they try to set any of these.
160 if (STREQ (qemu_param, "-kernel") ||
161 STREQ (qemu_param, "-initrd") ||
162 STREQ (qemu_param, "-nographic") ||
163 STREQ (qemu_param, "-serial") ||
164 STREQ (qemu_param, "-full-screen") ||
165 STREQ (qemu_param, "-std-vga") ||
166 STREQ (qemu_param, "-vnc")) {
167 error (g, _("guestfs_config: parameter '%s' isn't allowed"), qemu_param);
171 if (add_cmdline (g, qemu_param) != 0) return -1;
173 if (qemu_value != NULL) {
174 if (add_cmdline (g, qemu_value) != 0) return -1;
180 /* cache=off improves reliability in the event of a host crash.
182 * However this option causes qemu to try to open the file with
183 * O_DIRECT. This fails on some filesystem types (notably tmpfs).
184 * So we check if we can open the file with or without O_DIRECT,
185 * and use cache=off (or not) accordingly.
188 test_cache_off (guestfs_h *g, const char *filename)
190 int fd = open (filename, O_RDONLY|O_DIRECT);
196 fd = open (filename, O_RDONLY);
202 perrorf (g, "%s", filename);
206 /* Check string parameter matches ^[-_[:alnum:]]+$ (in C locale). */
208 valid_format_iface (const char *str)
210 size_t len = strlen (str);
218 if (c != '-' && c != '_' && !c_isalnum (c))
225 guestfs__add_drive_opts (guestfs_h *g, const char *filename,
226 const struct guestfs_add_drive_opts_argv *optargs)
232 if (strchr (filename, ',') != NULL) {
233 error (g, _("filename cannot contain ',' (comma) character"));
237 readonly = optargs->bitmask & GUESTFS_ADD_DRIVE_OPTS_READONLY_BITMASK
238 ? optargs->readonly : 0;
239 format = optargs->bitmask & GUESTFS_ADD_DRIVE_OPTS_FORMAT_BITMASK
240 ? optargs->format : NULL;
241 iface = optargs->bitmask & GUESTFS_ADD_DRIVE_OPTS_IFACE_BITMASK
242 ? optargs->iface : DRIVE_IF;
244 if (format && !valid_format_iface (format)) {
245 error (g, _("%s parameter is empty or contains disallowed characters"),
249 if (!valid_format_iface (iface)) {
250 error (g, _("%s parameter is empty or contains disallowed characters"),
255 /* For writable files, see if we can use cache=off. This also
256 * checks for the existence of the file. For readonly we have
257 * to do the check explicitly.
259 int use_cache_off = readonly ? 0 : test_cache_off (g, filename);
260 if (use_cache_off == -1)
264 if (access (filename, F_OK) == -1) {
265 perrorf (g, "%s", filename);
270 /* Construct the final -drive parameter. */
271 size_t len = 64 + strlen (filename) + strlen (iface);
272 if (format) len += strlen (format);
275 snprintf (buf, len, "file=%s%s%s%s%s,if=%s",
277 readonly ? ",snapshot=on" : "",
278 use_cache_off ? ",cache=off" : "",
279 format ? ",format=" : "",
280 format ? format : "",
283 return guestfs__config (g, "-drive", buf);
287 guestfs__add_drive (guestfs_h *g, const char *filename)
289 struct guestfs_add_drive_opts_argv optargs = {
293 return guestfs__add_drive_opts (g, filename, &optargs);
297 guestfs__add_drive_ro (guestfs_h *g, const char *filename)
299 struct guestfs_add_drive_opts_argv optargs = {
300 .bitmask = GUESTFS_ADD_DRIVE_OPTS_READONLY_BITMASK,
304 return guestfs__add_drive_opts (g, filename, &optargs);
308 guestfs__add_drive_with_if (guestfs_h *g, const char *filename,
311 struct guestfs_add_drive_opts_argv optargs = {
312 .bitmask = GUESTFS_ADD_DRIVE_OPTS_IFACE_BITMASK,
316 return guestfs__add_drive_opts (g, filename, &optargs);
320 guestfs__add_drive_ro_with_if (guestfs_h *g, const char *filename,
323 struct guestfs_add_drive_opts_argv optargs = {
324 .bitmask = GUESTFS_ADD_DRIVE_OPTS_IFACE_BITMASK
325 | GUESTFS_ADD_DRIVE_OPTS_READONLY_BITMASK,
330 return guestfs__add_drive_opts (g, filename, &optargs);
334 guestfs__add_cdrom (guestfs_h *g, const char *filename)
336 if (strchr (filename, ',') != NULL) {
337 error (g, _("filename cannot contain ',' (comma) character"));
341 if (access (filename, F_OK) == -1) {
342 perrorf (g, "%s", filename);
346 return guestfs__config (g, "-cdrom", filename);
349 static int is_openable (guestfs_h *g, const char *path, int flags);
352 guestfs__launch (guestfs_h *g)
355 if (g->state != CONFIG) {
356 error (g, _("the libguestfs handle has already been launched"));
360 /* Make the temporary directory. */
362 TMP_TEMPLATE_ON_STACK (dir_template);
363 g->tmpdir = safe_strdup (g, dir_template);
364 if (mkdtemp (g->tmpdir) == NULL) {
365 perrorf (g, _("%s: cannot create temporary directory"), dir_template);
370 /* Allow anyone to read the temporary directory. The socket in this
371 * directory won't be readable but anyone can see it exists if they
372 * want. (RHBZ#610880).
374 if (chmod (g->tmpdir, 0755) == -1)
375 fprintf (stderr, "chmod: %s: %m (ignored)\n", g->tmpdir);
377 return launch_appliance (g);
381 launch_appliance (guestfs_h *g)
386 struct sockaddr_un addr;
388 /* At present you must add drives before starting the appliance. In
389 * future when we enable hotplugging you won't need to do this.
392 error (g, _("you must call guestfs_add_drive before guestfs_launch"));
396 /* Start the clock ... */
397 gettimeofday (&g->launch_t, NULL);
399 /* Locate and/or build the appliance. */
400 char *kernel = NULL, *initrd = NULL, *appliance = NULL;
401 if (guestfs___build_appliance (g, &kernel, &initrd, &appliance) == -1)
405 guestfs___print_timestamped_message (g, "begin testing qemu features");
407 /* Get qemu help text and version. */
408 if (qemu_supports (g, NULL) == -1)
411 /* Using virtio-serial, we need to create a local Unix domain socket
412 * for qemu to connect to.
414 snprintf (unixsock, sizeof unixsock, "%s/sock", g->tmpdir);
417 g->sock = socket (AF_UNIX, SOCK_STREAM, 0);
419 perrorf (g, "socket");
423 if (fcntl (g->sock, F_SETFL, O_NONBLOCK) == -1) {
424 perrorf (g, "fcntl");
428 addr.sun_family = AF_UNIX;
429 strncpy (addr.sun_path, unixsock, UNIX_PATH_MAX);
430 addr.sun_path[UNIX_PATH_MAX-1] = '\0';
432 if (bind (g->sock, &addr, sizeof addr) == -1) {
437 if (listen (g->sock, 1) == -1) {
438 perrorf (g, "listen");
443 if (pipe (wfd) == -1 || pipe (rfd) == -1) {
450 guestfs___print_timestamped_message (g, "finished testing qemu features");
464 if (r == 0) { /* Child (qemu). */
467 /* Set up the full command line. Do this in the subprocess so we
468 * don't need to worry about cleaning up.
470 g->cmdline[0] = g->qemu;
472 if (qemu_supports (g, "-nodefconfig"))
473 add_cmdline (g, "-nodefconfig");
475 /* qemu sometimes needs this option to enable hardware
476 * virtualization, but some versions of 'qemu-kvm' will use KVM
477 * regardless (even where this option appears in the help text).
478 * It is rumoured that there are versions of qemu where supplying
479 * this option when hardware virtualization is not available will
480 * cause qemu to fail, so we we have to check at least that
481 * /dev/kvm is openable. That's not reliable, since /dev/kvm
482 * might be openable by qemu but not by us (think: SELinux) in
483 * which case the user would not get hardware virtualization,
484 * although at least shouldn't fail. A giant clusterfuck with the
485 * qemu command line, again.
487 if (qemu_supports (g, "-enable-kvm") &&
488 is_openable (g, "/dev/kvm", O_RDWR))
489 add_cmdline (g, "-enable-kvm");
491 /* Newer versions of qemu (from around 2009/12) changed the
492 * behaviour of monitors so that an implicit '-monitor stdio' is
493 * assumed if we are in -nographic mode and there is no other
494 * -monitor option. Only a single stdio device is allowed, so
495 * this broke the '-serial stdio' option. There is a new flag
496 * called -nodefaults which gets rid of all this default crud, so
497 * let's use that to avoid this and any future surprises.
499 if (qemu_supports (g, "-nodefaults"))
500 add_cmdline (g, "-nodefaults");
502 add_cmdline (g, "-nographic");
504 snprintf (buf, sizeof buf, "%d", g->memsize);
505 add_cmdline (g, "-m");
506 add_cmdline (g, buf);
508 /* Force exit instead of reboot on panic */
509 add_cmdline (g, "-no-reboot");
511 /* These options recommended by KVM developers to improve reliability. */
512 if (qemu_supports (g, "-no-hpet"))
513 add_cmdline (g, "-no-hpet");
515 if (qemu_supports (g, "-rtc-td-hack"))
516 add_cmdline (g, "-rtc-td-hack");
518 /* Create the virtio serial bus. */
519 add_cmdline (g, "-device");
520 add_cmdline (g, "virtio-serial");
523 /* Use virtio-console (a variant form of virtio-serial) for the
524 * guest's serial console.
526 add_cmdline (g, "-chardev");
527 add_cmdline (g, "stdio,id=console");
528 add_cmdline (g, "-device");
529 add_cmdline (g, "virtconsole,chardev=console,name=org.libguestfs.console.0");
531 /* When the above works ... until then: */
532 add_cmdline (g, "-serial");
533 add_cmdline (g, "stdio");
536 /* Set up virtio-serial for the communications channel. */
537 add_cmdline (g, "-chardev");
538 snprintf (buf, sizeof buf, "socket,path=%s,id=channel0", unixsock);
539 add_cmdline (g, buf);
540 add_cmdline (g, "-device");
541 add_cmdline (g, "virtserialport,chardev=channel0,name=org.libguestfs.channel.0");
543 /* Enable user networking. */
544 if (g->enable_network) {
545 add_cmdline (g, "-netdev");
546 add_cmdline (g, "user,id=usernet,net=169.254.0.0/16");
547 add_cmdline (g, "-device");
548 add_cmdline (g, NET_IF ",netdev=usernet");
551 #define LINUX_CMDLINE \
552 "panic=1 " /* force kernel to panic if daemon exits */ \
553 "console=ttyS0 " /* serial console */ \
554 "udevtimeout=300 " /* good for very slow systems (RHBZ#480319) */ \
555 "noapic " /* workaround for RHBZ#502058 - ok if not SMP */ \
556 "acpi=off " /* we don't need ACPI, turn it off */ \
557 "printk.time=1 " /* display timestamp before kernel messages */ \
558 "cgroup_disable=memory " /* saves us about 5 MB of RAM */
560 /* Linux kernel command line. */
561 snprintf (buf, sizeof buf,
563 "%s " /* (selinux) */
564 "%s " /* (verbose) */
565 "TERM=%s " /* (TERM environment variable) */
567 g->selinux ? "selinux=1 enforcing=0" : "selinux=0",
568 g->verbose ? "guestfs_verbose=1" : "",
569 getenv ("TERM") ? : "linux",
570 g->append ? g->append : "");
572 add_cmdline (g, "-kernel");
573 add_cmdline (g, kernel);
574 add_cmdline (g, "-initrd");
575 add_cmdline (g, initrd);
576 add_cmdline (g, "-append");
577 add_cmdline (g, buf);
579 /* Add the ext2 appliance drive (last of all). */
581 const char *cachemode = "";
582 if (qemu_supports (g, "cache=")) {
583 if (qemu_supports (g, "unsafe"))
584 cachemode = ",cache=unsafe";
585 else if (qemu_supports (g, "writeback"))
586 cachemode = ",cache=writeback";
589 char buf2[PATH_MAX + 64];
590 add_cmdline (g, "-drive");
591 snprintf (buf2, sizeof buf2, "file=%s,snapshot=on,if=" DRIVE_IF "%s",
592 appliance, cachemode);
593 add_cmdline (g, buf2);
596 /* Finish off the command line. */
597 incr_cmdline_size (g);
598 g->cmdline[g->cmdline_size-1] = NULL;
601 guestfs___print_timestamped_argv (g, (const char **)g->cmdline);
604 /* Set up stdin, stdout. */
610 if (dup (wfd[0]) == -1) {
612 perror ("dup failed");
613 _exit (EXIT_FAILURE);
615 if (dup (rfd[1]) == -1)
623 /* Set up a new process group, so we can signal this process
624 * and all subprocesses (eg. if qemu is really a shell script).
629 setenv ("LC_ALL", "C", 1);
631 execv (g->qemu, g->cmdline); /* Run qemu. */
633 _exit (EXIT_FAILURE);
636 /* Parent (library). */
646 /* Fork the recovery process off which will kill qemu if the parent
647 * process fails to do so (eg. if the parent segfaults).
650 if (g->recovery_proc) {
653 pid_t qemu_pid = g->pid;
654 pid_t parent_pid = getppid ();
656 /* Writing to argv is hideously complicated and error prone. See:
657 * http://anoncvs.postgresql.org/cvsweb.cgi/pgsql/src/backend/utils/misc/ps_status.c?rev=1.33.2.1;content-type=text%2Fplain
660 /* Loop around waiting for one or both of the other processes to
661 * disappear. It's fair to say this is very hairy. The PIDs that
662 * we are looking at might be reused by another process. We are
663 * effectively polling. Is the cure worse than the disease?
666 if (kill (qemu_pid, 0) == -1) /* qemu's gone away, we aren't needed */
667 _exit (EXIT_SUCCESS);
668 if (kill (parent_pid, 0) == -1) {
669 /* Parent's gone away, qemu still around, so kill qemu. */
671 _exit (EXIT_SUCCESS);
677 /* Don't worry, if the fork failed, this will be -1. The recovery
678 * process isn't essential.
684 /* Close the other ends of the pipe. */
688 if (fcntl (wfd[1], F_SETFL, O_NONBLOCK) == -1 ||
689 fcntl (rfd[0], F_SETFL, O_NONBLOCK) == -1) {
690 perrorf (g, "fcntl");
694 g->fd[0] = wfd[1]; /* stdin of child */
695 g->fd[1] = rfd[0]; /* stdout of child */
697 g->fd[0] = open ("/dev/null", O_RDWR);
698 if (g->fd[0] == -1) {
699 perrorf (g, "open /dev/null");
702 g->fd[1] = dup (g->fd[0]);
703 if (g->fd[1] == -1) {
710 g->state = LAUNCHING;
712 /* Wait for qemu to start and to connect back to us via
713 * virtio-serial and send the GUESTFS_LAUNCH_FLAG message.
715 r = guestfs___accept_from_daemon (g);
719 close (g->sock); /* Close the listening socket. */
720 g->sock = r; /* This is the accepted data socket. */
722 if (fcntl (g->sock, F_SETFL, O_NONBLOCK) == -1) {
723 perrorf (g, "fcntl");
729 r = guestfs___recv_from_daemon (g, &size, &buf);
732 if (r == -1) return -1;
734 if (size != GUESTFS_LAUNCH_FLAG) {
735 error (g, _("guestfs_launch failed, see earlier error messages"));
740 guestfs___print_timestamped_message (g, "appliance is up");
742 /* This is possible in some really strange situations, such as
743 * guestfsd starts up OK but then qemu immediately exits. Check for
744 * it because the caller is probably expecting to be able to send
745 * commands after this function returns.
747 if (g->state != READY) {
748 error (g, _("qemu launched and contacted daemon, but state != READY"));
759 if (g->pid > 0) kill (g->pid, 9);
760 if (g->recoverypid > 0) kill (g->recoverypid, 9);
761 if (g->pid > 0) waitpid (g->pid, NULL, 0);
762 if (g->recoverypid > 0) waitpid (g->recoverypid, NULL, 0);
767 memset (&g->launch_t, 0, sizeof g->launch_t);
781 /* Return the location of the tmpdir (eg. "/tmp") and allow users
782 * to override it at runtime using $TMPDIR.
785 guestfs_tmpdir (void)
795 const char *t = getenv ("TMPDIR");
801 /* Compute Y - X and return the result in milliseconds.
802 * Approximately the same as this code:
803 * http://www.mpp.mpg.de/~huber/util/timevaldiff.c
806 timeval_diff (const struct timeval *x, const struct timeval *y)
810 msec = (y->tv_sec - x->tv_sec) * 1000;
811 msec += (y->tv_usec - x->tv_usec) / 1000;
816 guestfs___print_timestamped_argv (guestfs_h *g, const char * argv[])
822 gettimeofday (&tv, NULL);
823 fprintf (stderr, "[%05" PRIi64 "ms] ", timeval_diff (&g->launch_t, &tv));
826 if (argv[i][0] == '-') /* -option starts a new line */
827 fprintf (stderr, " \\\n ");
829 if (i > 0) fputc (' ', stderr);
831 /* Does it need shell quoting? This only deals with simple cases. */
832 needs_quote = strcspn (argv[i], " ") != strlen (argv[i]);
834 if (needs_quote) fputc ('\'', stderr);
835 fprintf (stderr, "%s", argv[i]);
836 if (needs_quote) fputc ('\'', stderr);
840 fputc ('\n', stderr);
844 guestfs___print_timestamped_message (guestfs_h *g, const char *fs, ...)
852 err = vasprintf (&msg, fs, args);
857 gettimeofday (&tv, NULL);
859 fprintf (stderr, "[%05" PRIi64 "ms] %s\n",
860 timeval_diff (&g->launch_t, &tv), msg);
865 static int read_all (guestfs_h *g, FILE *fp, char **ret);
867 /* Test qemu binary (or wrapper) runs, and do 'qemu -help' and
868 * 'qemu -version' so we know what options this qemu supports and
872 test_qemu (guestfs_h *g)
877 snprintf (cmd, sizeof cmd, "LC_ALL=C '%s' -nographic -help", g->qemu);
879 fp = popen (cmd, "r");
880 /* qemu -help should always work (qemu -version OTOH wasn't
881 * supported by qemu 0.9). If this command doesn't work then it
882 * probably indicates that the qemu binary is missing.
885 /* XXX This error is never printed, even if the qemu binary
886 * doesn't exist. Why?
889 perrorf (g, _("%s: command failed: If qemu is located on a non-standard path, try setting the LIBGUESTFS_QEMU environment variable."), cmd);
893 if (read_all (g, fp, &g->qemu_help) == -1)
896 if (pclose (fp) == -1)
899 snprintf (cmd, sizeof cmd, "LC_ALL=C '%s' -nographic -version 2>/dev/null",
902 fp = popen (cmd, "r");
904 /* Intentionally ignore errors. */
905 read_all (g, fp, &g->qemu_version);
913 read_all (guestfs_h *g, FILE *fp, char **ret)
920 *ret = safe_realloc (g, *ret, n + 1);
925 *ret = safe_realloc (g, *ret, n + BUFSIZ);
927 r = fread (p, 1, BUFSIZ, fp);
936 /* Test if option is supported by qemu command line (just by grepping
939 * The first time this is used, it has to run the external qemu
940 * binary. If that fails, it returns -1.
942 * To just do the first-time run of the qemu binary, call this with
943 * option == NULL, in which case it will return -1 if there was an
947 qemu_supports (guestfs_h *g, const char *option)
950 if (test_qemu (g) == -1)
957 return strstr (g->qemu_help, option) != NULL;
960 /* Check if a file can be opened. */
962 is_openable (guestfs_h *g, const char *path, int flags)
964 int fd = open (path, flags);
974 /* You had to call this function after launch in versions <= 1.0.70,
975 * but it is now a no-op.
978 guestfs__wait_ready (guestfs_h *g)
980 if (g->state != READY) {
981 error (g, _("qemu has not been launched yet"));
989 guestfs__kill_subprocess (guestfs_h *g)
991 if (g->state == CONFIG) {
992 error (g, _("no subprocess to kill"));
997 fprintf (stderr, "sending SIGTERM to process %d\n", g->pid);
999 if (g->pid > 0) kill (g->pid, SIGTERM);
1000 if (g->recoverypid > 0) kill (g->recoverypid, 9);
1005 /* Access current state. */
1007 guestfs__is_config (guestfs_h *g)
1009 return g->state == CONFIG;
1013 guestfs__is_launching (guestfs_h *g)
1015 return g->state == LAUNCHING;
1019 guestfs__is_ready (guestfs_h *g)
1021 return g->state == READY;
1025 guestfs__is_busy (guestfs_h *g)
1027 return g->state == BUSY;
1031 guestfs__get_state (guestfs_h *g)