2 * Copyright (C) 2009-2011 Red Hat Inc.
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #define _BSD_SOURCE /* for mkdtemp, usleep */
34 #include <sys/select.h>
39 #include <rpc/types.h>
46 #ifdef HAVE_SYS_TYPES_H
47 #include <sys/types.h>
50 #ifdef HAVE_SYS_WAIT_H
54 #ifdef HAVE_SYS_SOCKET_H
55 #include <sys/socket.h>
62 #include <arpa/inet.h>
63 #include <netinet/in.h>
66 #include "glthread/lock.h"
69 #include "guestfs-internal.h"
70 #include "guestfs-internal-actions.h"
71 #include "guestfs_protocol.h"
73 static int launch_appliance (guestfs_h *g);
74 static int64_t timeval_diff (const struct timeval *x, const struct timeval *y);
75 static int qemu_supports (guestfs_h *g, const char *option);
77 /* Add a string to the current command line. */
79 incr_cmdline_size (guestfs_h *g)
81 if (g->cmdline == NULL) {
82 /* g->cmdline[0] is reserved for argv[0], set in guestfs_launch. */
84 g->cmdline = safe_malloc (g, sizeof (char *));
89 g->cmdline = safe_realloc (g, g->cmdline, sizeof (char *) * g->cmdline_size);
93 add_cmdline (guestfs_h *g, const char *str)
95 if (g->state != CONFIG) {
97 _("command line cannot be altered after qemu subprocess launched"));
101 incr_cmdline_size (g);
102 g->cmdline[g->cmdline_size-1] = safe_strdup (g, str);
107 guestfs___checkpoint_cmdline (guestfs_h *g)
109 return g->cmdline_size;
113 guestfs___rollback_cmdline (guestfs_h *g, int pos)
117 assert (g->cmdline_size >= pos);
119 for (i = g->cmdline_size - 1; i >= pos; --i)
120 free (g->cmdline[i]);
122 g->cmdline_size = pos;
125 /* Internal command to return the command line. */
127 guestfs__debug_cmdline (guestfs_h *g)
132 if (g->cmdline == NULL) {
133 r = safe_malloc (g, sizeof (char *) * 1);
138 r = safe_malloc (g, sizeof (char *) * (g->cmdline_size + 1));
139 r[0] = safe_strdup (g, g->qemu); /* g->cmdline[0] is always NULL */
141 for (i = 1; i < g->cmdline_size; ++i)
142 r[i] = safe_strdup (g, g->cmdline[i]);
144 r[g->cmdline_size] = NULL;
146 return r; /* caller frees */
150 guestfs__config (guestfs_h *g,
151 const char *qemu_param, const char *qemu_value)
153 if (qemu_param[0] != '-') {
154 error (g, _("guestfs_config: parameter must begin with '-' character"));
158 /* A bit fascist, but the user will probably break the extra
159 * parameters that we add if they try to set any of these.
161 if (STREQ (qemu_param, "-kernel") ||
162 STREQ (qemu_param, "-initrd") ||
163 STREQ (qemu_param, "-nographic") ||
164 STREQ (qemu_param, "-serial") ||
165 STREQ (qemu_param, "-full-screen") ||
166 STREQ (qemu_param, "-std-vga") ||
167 STREQ (qemu_param, "-vnc")) {
168 error (g, _("guestfs_config: parameter '%s' isn't allowed"), qemu_param);
172 if (add_cmdline (g, qemu_param) != 0) return -1;
174 if (qemu_value != NULL) {
175 if (add_cmdline (g, qemu_value) != 0) return -1;
181 /* cache=off improves reliability in the event of a host crash.
183 * However this option causes qemu to try to open the file with
184 * O_DIRECT. This fails on some filesystem types (notably tmpfs).
185 * So we check if we can open the file with or without O_DIRECT,
186 * and use cache=off (or not) accordingly.
189 test_cache_off (guestfs_h *g, const char *filename)
191 int fd = open (filename, O_RDONLY|O_DIRECT);
197 fd = open (filename, O_RDONLY);
203 perrorf (g, "%s", filename);
207 /* Check string parameter matches ^[-_[:alnum:]]+$ (in C locale). */
209 valid_format_iface (const char *str)
211 size_t len = strlen (str);
219 if (c != '-' && c != '_' && !c_isalnum (c))
226 guestfs__add_drive_opts (guestfs_h *g, const char *filename,
227 const struct guestfs_add_drive_opts_argv *optargs)
233 if (strchr (filename, ',') != NULL) {
234 error (g, _("filename cannot contain ',' (comma) character"));
238 readonly = optargs->bitmask & GUESTFS_ADD_DRIVE_OPTS_READONLY_BITMASK
239 ? optargs->readonly : 0;
240 format = optargs->bitmask & GUESTFS_ADD_DRIVE_OPTS_FORMAT_BITMASK
241 ? optargs->format : NULL;
242 iface = optargs->bitmask & GUESTFS_ADD_DRIVE_OPTS_IFACE_BITMASK
243 ? optargs->iface : DRIVE_IF;
245 if (format && !valid_format_iface (format)) {
246 error (g, _("%s parameter is empty or contains disallowed characters"),
250 if (!valid_format_iface (iface)) {
251 error (g, _("%s parameter is empty or contains disallowed characters"),
256 /* For writable files, see if we can use cache=off. This also
257 * checks for the existence of the file. For readonly we have
258 * to do the check explicitly.
260 int use_cache_off = readonly ? 0 : test_cache_off (g, filename);
261 if (use_cache_off == -1)
265 if (access (filename, F_OK) == -1) {
266 perrorf (g, "%s", filename);
271 /* Construct the final -drive parameter. */
272 size_t len = 64 + strlen (filename) + strlen (iface);
273 if (format) len += strlen (format);
276 snprintf (buf, len, "file=%s%s%s%s%s,if=%s",
278 readonly ? ",snapshot=on" : "",
279 use_cache_off ? ",cache=off" : "",
280 format ? ",format=" : "",
281 format ? format : "",
284 return guestfs__config (g, "-drive", buf);
288 guestfs__add_drive (guestfs_h *g, const char *filename)
290 struct guestfs_add_drive_opts_argv optargs = {
294 return guestfs__add_drive_opts (g, filename, &optargs);
298 guestfs__add_drive_ro (guestfs_h *g, const char *filename)
300 struct guestfs_add_drive_opts_argv optargs = {
301 .bitmask = GUESTFS_ADD_DRIVE_OPTS_READONLY_BITMASK,
305 return guestfs__add_drive_opts (g, filename, &optargs);
309 guestfs__add_drive_with_if (guestfs_h *g, const char *filename,
312 struct guestfs_add_drive_opts_argv optargs = {
313 .bitmask = GUESTFS_ADD_DRIVE_OPTS_IFACE_BITMASK,
317 return guestfs__add_drive_opts (g, filename, &optargs);
321 guestfs__add_drive_ro_with_if (guestfs_h *g, const char *filename,
324 struct guestfs_add_drive_opts_argv optargs = {
325 .bitmask = GUESTFS_ADD_DRIVE_OPTS_IFACE_BITMASK
326 | GUESTFS_ADD_DRIVE_OPTS_READONLY_BITMASK,
331 return guestfs__add_drive_opts (g, filename, &optargs);
335 guestfs__add_cdrom (guestfs_h *g, const char *filename)
337 if (strchr (filename, ',') != NULL) {
338 error (g, _("filename cannot contain ',' (comma) character"));
342 if (access (filename, F_OK) == -1) {
343 perrorf (g, "%s", filename);
347 return guestfs__config (g, "-cdrom", filename);
350 static int is_openable (guestfs_h *g, const char *path, int flags);
353 guestfs__launch (guestfs_h *g)
356 if (g->state != CONFIG) {
357 error (g, _("the libguestfs handle has already been launched"));
361 /* Make the temporary directory. */
363 TMP_TEMPLATE_ON_STACK (dir_template);
364 g->tmpdir = safe_strdup (g, dir_template);
365 if (mkdtemp (g->tmpdir) == NULL) {
366 perrorf (g, _("%s: cannot create temporary directory"), dir_template);
371 /* Allow anyone to read the temporary directory. The socket in this
372 * directory won't be readable but anyone can see it exists if they
373 * want. (RHBZ#610880).
375 if (chmod (g->tmpdir, 0755) == -1)
376 fprintf (stderr, "chmod: %s: %m (ignored)\n", g->tmpdir);
378 return launch_appliance (g);
382 launch_appliance (guestfs_h *g)
386 char guestfsd_sock[256];
387 struct sockaddr_un addr;
389 /* At present you must add drives before starting the appliance. In
390 * future when we enable hotplugging you won't need to do this.
393 error (g, _("you must call guestfs_add_drive before guestfs_launch"));
397 /* Start the clock ... */
398 gettimeofday (&g->launch_t, NULL);
400 /* Locate and/or build the appliance. */
401 char *kernel = NULL, *initrd = NULL, *appliance = NULL;
402 if (guestfs___build_appliance (g, &kernel, &initrd, &appliance) == -1)
406 guestfs___print_timestamped_message (g, "begin testing qemu features");
408 /* Get qemu help text and version. */
409 if (qemu_supports (g, NULL) == -1)
412 /* Using virtio-serial, we need to create a local Unix domain socket
413 * for qemu to connect to.
415 snprintf (guestfsd_sock, sizeof guestfsd_sock, "%s/guestfsd.sock", g->tmpdir);
416 unlink (guestfsd_sock);
418 g->sock = socket (AF_UNIX, SOCK_STREAM, 0);
420 perrorf (g, "socket");
424 if (fcntl (g->sock, F_SETFL, O_NONBLOCK) == -1) {
425 perrorf (g, "fcntl");
429 addr.sun_family = AF_UNIX;
430 strncpy (addr.sun_path, guestfsd_sock, UNIX_PATH_MAX);
431 addr.sun_path[UNIX_PATH_MAX-1] = '\0';
433 if (bind (g->sock, &addr, sizeof addr) == -1) {
438 if (listen (g->sock, 1) == -1) {
439 perrorf (g, "listen");
444 if (pipe (wfd) == -1 || pipe (rfd) == -1) {
451 guestfs___print_timestamped_message (g, "finished testing qemu features");
465 if (r == 0) { /* Child (qemu). */
468 /* Set up the full command line. Do this in the subprocess so we
469 * don't need to worry about cleaning up.
471 g->cmdline[0] = g->qemu;
473 if (qemu_supports (g, "-nodefconfig"))
474 add_cmdline (g, "-nodefconfig");
476 /* qemu sometimes needs this option to enable hardware
477 * virtualization, but some versions of 'qemu-kvm' will use KVM
478 * regardless (even where this option appears in the help text).
479 * It is rumoured that there are versions of qemu where supplying
480 * this option when hardware virtualization is not available will
481 * cause qemu to fail, so we we have to check at least that
482 * /dev/kvm is openable. That's not reliable, since /dev/kvm
483 * might be openable by qemu but not by us (think: SELinux) in
484 * which case the user would not get hardware virtualization,
485 * although at least shouldn't fail. A giant clusterfuck with the
486 * qemu command line, again.
488 if (qemu_supports (g, "-enable-kvm") &&
489 is_openable (g, "/dev/kvm", O_RDWR))
490 add_cmdline (g, "-enable-kvm");
492 /* Newer versions of qemu (from around 2009/12) changed the
493 * behaviour of monitors so that an implicit '-monitor stdio' is
494 * assumed if we are in -nographic mode and there is no other
495 * -monitor option. Only a single stdio device is allowed, so
496 * this broke the '-serial stdio' option. There is a new flag
497 * called -nodefaults which gets rid of all this default crud, so
498 * let's use that to avoid this and any future surprises.
500 if (qemu_supports (g, "-nodefaults"))
501 add_cmdline (g, "-nodefaults");
503 add_cmdline (g, "-nographic");
505 snprintf (buf, sizeof buf, "%d", g->memsize);
506 add_cmdline (g, "-m");
507 add_cmdline (g, buf);
509 /* Force exit instead of reboot on panic */
510 add_cmdline (g, "-no-reboot");
512 /* These options recommended by KVM developers to improve reliability. */
513 if (qemu_supports (g, "-no-hpet"))
514 add_cmdline (g, "-no-hpet");
516 if (qemu_supports (g, "-rtc-td-hack"))
517 add_cmdline (g, "-rtc-td-hack");
519 /* Create the virtio serial bus. */
520 add_cmdline (g, "-device");
521 add_cmdline (g, "virtio-serial");
524 /* Use virtio-console (a variant form of virtio-serial) for the
525 * guest's serial console.
527 add_cmdline (g, "-chardev");
528 add_cmdline (g, "stdio,id=console");
529 add_cmdline (g, "-device");
530 add_cmdline (g, "virtconsole,chardev=console,name=org.libguestfs.console.0");
532 /* When the above works ... until then: */
533 add_cmdline (g, "-serial");
534 add_cmdline (g, "stdio");
537 /* Set up virtio-serial for the communications channel. */
538 add_cmdline (g, "-chardev");
539 snprintf (buf, sizeof buf, "socket,path=%s,id=channel0", guestfsd_sock);
540 add_cmdline (g, buf);
541 add_cmdline (g, "-device");
542 add_cmdline (g, "virtserialport,chardev=channel0,name=org.libguestfs.channel.0");
544 /* Enable user networking. */
545 if (g->enable_network) {
546 add_cmdline (g, "-netdev");
547 add_cmdline (g, "user,id=usernet,net=169.254.0.0/16");
548 add_cmdline (g, "-device");
549 add_cmdline (g, NET_IF ",netdev=usernet");
552 #define LINUX_CMDLINE \
553 "panic=1 " /* force kernel to panic if daemon exits */ \
554 "console=ttyS0 " /* serial console */ \
555 "udevtimeout=300 " /* good for very slow systems (RHBZ#480319) */ \
556 "noapic " /* workaround for RHBZ#502058 - ok if not SMP */ \
557 "acpi=off " /* we don't need ACPI, turn it off */ \
558 "printk.time=1 " /* display timestamp before kernel messages */ \
559 "cgroup_disable=memory " /* saves us about 5 MB of RAM */
561 /* Linux kernel command line. */
562 snprintf (buf, sizeof buf,
564 "%s " /* (selinux) */
565 "%s " /* (verbose) */
566 "TERM=%s " /* (TERM environment variable) */
568 g->selinux ? "selinux=1 enforcing=0" : "selinux=0",
569 g->verbose ? "guestfs_verbose=1" : "",
570 getenv ("TERM") ? : "linux",
571 g->append ? g->append : "");
573 add_cmdline (g, "-kernel");
574 add_cmdline (g, kernel);
575 add_cmdline (g, "-initrd");
576 add_cmdline (g, initrd);
577 add_cmdline (g, "-append");
578 add_cmdline (g, buf);
580 /* Add the ext2 appliance drive (last of all). */
582 const char *cachemode = "";
583 if (qemu_supports (g, "cache=")) {
584 if (qemu_supports (g, "unsafe"))
585 cachemode = ",cache=unsafe";
586 else if (qemu_supports (g, "writeback"))
587 cachemode = ",cache=writeback";
590 char buf2[PATH_MAX + 64];
591 add_cmdline (g, "-drive");
592 snprintf (buf2, sizeof buf2, "file=%s,snapshot=on,if=" DRIVE_IF "%s",
593 appliance, cachemode);
594 add_cmdline (g, buf2);
597 /* Finish off the command line. */
598 incr_cmdline_size (g);
599 g->cmdline[g->cmdline_size-1] = NULL;
602 guestfs___print_timestamped_argv (g, (const char **)g->cmdline);
605 /* Set up stdin, stdout. */
611 if (dup (wfd[0]) == -1) {
613 perror ("dup failed");
614 _exit (EXIT_FAILURE);
616 if (dup (rfd[1]) == -1)
624 /* Set up a new process group, so we can signal this process
625 * and all subprocesses (eg. if qemu is really a shell script).
630 setenv ("LC_ALL", "C", 1);
632 execv (g->qemu, g->cmdline); /* Run qemu. */
634 _exit (EXIT_FAILURE);
637 /* Parent (library). */
647 /* Fork the recovery process off which will kill qemu if the parent
648 * process fails to do so (eg. if the parent segfaults).
651 if (g->recovery_proc) {
654 pid_t qemu_pid = g->pid;
655 pid_t parent_pid = getppid ();
657 /* Writing to argv is hideously complicated and error prone. See:
658 * http://anoncvs.postgresql.org/cvsweb.cgi/pgsql/src/backend/utils/misc/ps_status.c?rev=1.33.2.1;content-type=text%2Fplain
661 /* Loop around waiting for one or both of the other processes to
662 * disappear. It's fair to say this is very hairy. The PIDs that
663 * we are looking at might be reused by another process. We are
664 * effectively polling. Is the cure worse than the disease?
667 if (kill (qemu_pid, 0) == -1) /* qemu's gone away, we aren't needed */
668 _exit (EXIT_SUCCESS);
669 if (kill (parent_pid, 0) == -1) {
670 /* Parent's gone away, qemu still around, so kill qemu. */
672 _exit (EXIT_SUCCESS);
678 /* Don't worry, if the fork failed, this will be -1. The recovery
679 * process isn't essential.
685 /* Close the other ends of the pipe. */
689 if (fcntl (wfd[1], F_SETFL, O_NONBLOCK) == -1 ||
690 fcntl (rfd[0], F_SETFL, O_NONBLOCK) == -1) {
691 perrorf (g, "fcntl");
695 g->fd[0] = wfd[1]; /* stdin of child */
696 g->fd[1] = rfd[0]; /* stdout of child */
698 g->fd[0] = open ("/dev/null", O_RDWR);
699 if (g->fd[0] == -1) {
700 perrorf (g, "open /dev/null");
703 g->fd[1] = dup (g->fd[0]);
704 if (g->fd[1] == -1) {
711 g->state = LAUNCHING;
713 /* Wait for qemu to start and to connect back to us via
714 * virtio-serial and send the GUESTFS_LAUNCH_FLAG message.
716 r = guestfs___accept_from_daemon (g);
720 close (g->sock); /* Close the listening socket. */
721 g->sock = r; /* This is the accepted data socket. */
723 if (fcntl (g->sock, F_SETFL, O_NONBLOCK) == -1) {
724 perrorf (g, "fcntl");
730 r = guestfs___recv_from_daemon (g, &size, &buf);
733 if (r == -1) return -1;
735 if (size != GUESTFS_LAUNCH_FLAG) {
736 error (g, _("guestfs_launch failed, see earlier error messages"));
741 guestfs___print_timestamped_message (g, "appliance is up");
743 /* This is possible in some really strange situations, such as
744 * guestfsd starts up OK but then qemu immediately exits. Check for
745 * it because the caller is probably expecting to be able to send
746 * commands after this function returns.
748 if (g->state != READY) {
749 error (g, _("qemu launched and contacted daemon, but state != READY"));
760 if (g->pid > 0) kill (g->pid, 9);
761 if (g->recoverypid > 0) kill (g->recoverypid, 9);
762 if (g->pid > 0) waitpid (g->pid, NULL, 0);
763 if (g->recoverypid > 0) waitpid (g->recoverypid, NULL, 0);
768 memset (&g->launch_t, 0, sizeof g->launch_t);
782 /* Return the location of the tmpdir (eg. "/tmp") and allow users
783 * to override it at runtime using $TMPDIR.
786 guestfs_tmpdir (void)
796 const char *t = getenv ("TMPDIR");
802 /* Compute Y - X and return the result in milliseconds.
803 * Approximately the same as this code:
804 * http://www.mpp.mpg.de/~huber/util/timevaldiff.c
807 timeval_diff (const struct timeval *x, const struct timeval *y)
811 msec = (y->tv_sec - x->tv_sec) * 1000;
812 msec += (y->tv_usec - x->tv_usec) / 1000;
817 guestfs___print_timestamped_argv (guestfs_h *g, const char * argv[])
823 gettimeofday (&tv, NULL);
824 fprintf (stderr, "[%05" PRIi64 "ms] ", timeval_diff (&g->launch_t, &tv));
827 if (argv[i][0] == '-') /* -option starts a new line */
828 fprintf (stderr, " \\\n ");
830 if (i > 0) fputc (' ', stderr);
832 /* Does it need shell quoting? This only deals with simple cases. */
833 needs_quote = strcspn (argv[i], " ") != strlen (argv[i]);
835 if (needs_quote) fputc ('\'', stderr);
836 fprintf (stderr, "%s", argv[i]);
837 if (needs_quote) fputc ('\'', stderr);
841 fputc ('\n', stderr);
845 guestfs___print_timestamped_message (guestfs_h *g, const char *fs, ...)
853 err = vasprintf (&msg, fs, args);
858 gettimeofday (&tv, NULL);
860 fprintf (stderr, "[%05" PRIi64 "ms] %s\n",
861 timeval_diff (&g->launch_t, &tv), msg);
866 static int read_all (guestfs_h *g, FILE *fp, char **ret);
868 /* Test qemu binary (or wrapper) runs, and do 'qemu -help' and
869 * 'qemu -version' so we know what options this qemu supports and
873 test_qemu (guestfs_h *g)
878 snprintf (cmd, sizeof cmd, "LC_ALL=C '%s' -nographic -help", g->qemu);
880 fp = popen (cmd, "r");
881 /* qemu -help should always work (qemu -version OTOH wasn't
882 * supported by qemu 0.9). If this command doesn't work then it
883 * probably indicates that the qemu binary is missing.
886 /* XXX This error is never printed, even if the qemu binary
887 * doesn't exist. Why?
890 perrorf (g, _("%s: command failed: If qemu is located on a non-standard path, try setting the LIBGUESTFS_QEMU environment variable."), cmd);
894 if (read_all (g, fp, &g->qemu_help) == -1)
897 if (pclose (fp) == -1)
900 snprintf (cmd, sizeof cmd, "LC_ALL=C '%s' -nographic -version 2>/dev/null",
903 fp = popen (cmd, "r");
905 /* Intentionally ignore errors. */
906 read_all (g, fp, &g->qemu_version);
914 read_all (guestfs_h *g, FILE *fp, char **ret)
921 *ret = safe_realloc (g, *ret, n + 1);
926 *ret = safe_realloc (g, *ret, n + BUFSIZ);
928 r = fread (p, 1, BUFSIZ, fp);
937 /* Test if option is supported by qemu command line (just by grepping
940 * The first time this is used, it has to run the external qemu
941 * binary. If that fails, it returns -1.
943 * To just do the first-time run of the qemu binary, call this with
944 * option == NULL, in which case it will return -1 if there was an
948 qemu_supports (guestfs_h *g, const char *option)
951 if (test_qemu (g) == -1)
958 return strstr (g->qemu_help, option) != NULL;
961 /* Check if a file can be opened. */
963 is_openable (guestfs_h *g, const char *path, int flags)
965 int fd = open (path, flags);
975 /* You had to call this function after launch in versions <= 1.0.70,
976 * but it is now a no-op.
979 guestfs__wait_ready (guestfs_h *g)
981 if (g->state != READY) {
982 error (g, _("qemu has not been launched yet"));
990 guestfs__kill_subprocess (guestfs_h *g)
992 if (g->state == CONFIG) {
993 error (g, _("no subprocess to kill"));
998 fprintf (stderr, "sending SIGTERM to process %d\n", g->pid);
1000 if (g->pid > 0) kill (g->pid, SIGTERM);
1001 if (g->recoverypid > 0) kill (g->recoverypid, 9);
1006 /* Access current state. */
1008 guestfs__is_config (guestfs_h *g)
1010 return g->state == CONFIG;
1014 guestfs__is_launching (guestfs_h *g)
1016 return g->state == LAUNCHING;
1020 guestfs__is_ready (guestfs_h *g)
1022 return g->state == READY;
1026 guestfs__is_busy (guestfs_h *g)
1028 return g->state == BUSY;
1032 guestfs__get_state (guestfs_h *g)