X-Git-Url: http://git.annexia.org/?p=libguestfs.git;a=blobdiff_plain;f=src%2Fguestfs.c;h=05c02f3f42ce71777c2e3d50a55c350ee5cb2d31;hp=4dc8dec7c98c46f7eb338b270aa1fd18c400b54d;hb=41b959fd9b057354f642d84398b875d02b88b864;hpb=5f8c46db05966a6e31b696d629fd3f74e4fb6c71 diff --git a/src/guestfs.c b/src/guestfs.c index 4dc8dec..05c02f3 100644 --- a/src/guestfs.c +++ b/src/guestfs.c @@ -143,7 +143,8 @@ struct guestfs_h int fd[2]; /* Stdin/stdout of qemu. */ int sock; /* Daemon communications socket. */ - int pid; /* Qemu PID. */ + pid_t pid; /* Qemu PID. */ + pid_t recoverypid; /* Recovery process PID. */ time_t start_t; /* The time when we started qemu. */ int stdout_watch; /* Watches qemu stdout for log messages. */ @@ -729,9 +730,9 @@ guestfs_launch (guestfs_h *g) /* Want to give userspace some room, so: */ memsize += 128; -#if AC_SIZEOF_LONG == 8 +#if SIZEOF_LONG == 8 /* On 64 bit, assume some overhead. */ - memsize += 32; + memsize += 64; #endif } else memsize = 512; @@ -842,6 +843,40 @@ guestfs_launch (guestfs_h *g) /* Parent (library). */ g->pid = r; + /* Fork the recovery process off which will kill qemu if the parent + * process fails to do so (eg. if the parent segfaults). + */ + r = fork (); + if (r == 0) { + pid_t qemu_pid = g->pid; + pid_t parent_pid = getppid (); + + /* Writing to argv is hideously complicated and error prone. See: + * http://anoncvs.postgresql.org/cvsweb.cgi/pgsql/src/backend/utils/misc/ps_status.c?rev=1.33.2.1;content-type=text%2Fplain + */ + + /* Loop around waiting for one or both of the other processes to + * disappear. It's fair to say this is very hairy. The PIDs that + * we are looking at might be reused by another process. We are + * effectively polling. Is the cure worse than the disease? + */ + for (;;) { + if (kill (qemu_pid, 0) == -1) /* qemu's gone away, we aren't needed */ + _exit (0); + if (kill (parent_pid, 0) == -1) { + /* Parent's gone away, qemu still around, so kill qemu. */ + kill (qemu_pid, 9); + _exit (0); + } + sleep (2); + } + } + + /* Don't worry, if the fork failed, this will be -1. The recovery + * process isn't essential. + */ + g->recoverypid = r; + /* Start the clock ... */ time (&g->start_t); @@ -938,11 +973,14 @@ guestfs_launch (guestfs_h *g) close (wfd[1]); close (rfd[0]); kill (g->pid, 9); + if (g->recoverypid > 0) kill (g->recoverypid, 9); waitpid (g->pid, NULL, 0); + if (g->recoverypid > 0) waitpid (g->recoverypid, NULL, 0); g->fd[0] = -1; g->fd[1] = -1; g->sock = -1; g->pid = 0; + g->recoverypid = 0; g->start_t = 0; g->stdout_watch = -1; g->sock_watch = -1; @@ -1018,6 +1056,7 @@ guestfs_kill_subprocess (guestfs_h *g) fprintf (stderr, "sending SIGTERM to process %d\n", g->pid); kill (g->pid, SIGTERM); + if (g->recoverypid > 0) kill (g->recoverypid, 9); return 0; } @@ -1135,7 +1174,9 @@ stdout_event (struct guestfs_main_loop *ml, guestfs_h *g, void *data, if (g->verbose) fprintf (stderr, "stdout_event: %p: child process died\n", g); /*kill (g->pid, SIGTERM);*/ + if (g->recoverypid > 0) kill (g->recoverypid, 9); waitpid (g->pid, NULL, 0); + if (g->recoverypid > 0) waitpid (g->recoverypid, NULL, 0); if (g->stdout_watch >= 0) g->main_loop->remove_handle (g->main_loop, g, g->stdout_watch); if (g->sock_watch >= 0) @@ -1147,6 +1188,7 @@ stdout_event (struct guestfs_main_loop *ml, guestfs_h *g, void *data, g->fd[1] = -1; g->sock = -1; g->pid = 0; + g->recoverypid = 0; g->start_t = 0; g->stdout_watch = -1; g->sock_watch = -1; @@ -1264,6 +1306,7 @@ sock_read_event (struct guestfs_main_loop *ml, guestfs_h *g, void *data, if (g->msg_in_size-4 < len) return; /* Need more of this message. */ /* Got the full message, begin processing it. */ +#if 0 if (g->verbose) { int i, j; @@ -1284,6 +1327,7 @@ sock_read_event (struct guestfs_main_loop *ml, guestfs_h *g, void *data, printf ("|\n"); } } +#endif /* Not in the expected state. */ if (g->state != BUSY) @@ -1292,6 +1336,9 @@ sock_read_event (struct guestfs_main_loop *ml, guestfs_h *g, void *data, /* Push the message up to the higher layer. */ if (g->reply_cb) g->reply_cb (g, g->reply_cb_data, &xdr); + else + /* This message (probably) should never be printed. */ + fprintf (stderr, "libguesfs: sock_read_event: !!! dropped message !!!\n"); g->msg_in_size -= len + 4; memmove (g->msg_in, g->msg_in+len+4, g->msg_in_size); @@ -1492,7 +1539,6 @@ int guestfs__send_sync (guestfs_h *g, int proc_nr, xdrproc_t xdrp, char *args) { - char buffer[GUESTFS_MESSAGE_MAX]; struct guestfs_message_header hdr; XDR xdr; unsigned len; @@ -1505,6 +1551,23 @@ guestfs__send_sync (guestfs_h *g, int proc_nr, return -1; } + /* This is probably an internal error. Or perhaps we should just + * free the buffer anyway? + */ + if (g->msg_out != NULL) { + error (g, "guestfs__send_sync: msg_out should be NULL"); + return -1; + } + + /* We have to allocate this message buffer on the heap because + * it is quite large (although will be mostly unused). We + * can't allocate it on the stack because in some environments + * we have quite limited stack space available, notably when + * running in the JVM. + */ + g->msg_out = safe_malloc (g, GUESTFS_MESSAGE_MAX + 4); + xdrmem_create (&xdr, g->msg_out + 4, GUESTFS_MESSAGE_MAX, XDR_ENCODE); + /* Serialize the header. */ hdr.prog = GUESTFS_PROGRAM; hdr.vers = GUESTFS_PROTOCOL_VERSION; @@ -1513,10 +1576,9 @@ guestfs__send_sync (guestfs_h *g, int proc_nr, hdr.serial = serial; hdr.status = GUESTFS_STATUS_OK; - xdrmem_create (&xdr, buffer, sizeof buffer, XDR_ENCODE); if (!xdr_guestfs_message_header (&xdr, &hdr)) { error (g, "xdr_guestfs_message_header failed"); - return -1; + goto cleanup1; } /* Serialize the args. If any, because some message types @@ -1525,24 +1587,23 @@ guestfs__send_sync (guestfs_h *g, int proc_nr, if (xdrp) { if (!(*xdrp) (&xdr, args)) { error (g, "dispatch failed to marshal args"); - return -1; + goto cleanup1; } } + /* Get the actual length of the message, resize the buffer to match + * the actual length, and write the length word at the beginning. + */ len = xdr_getpos (&xdr); xdr_destroy (&xdr); - /* Allocate the outgoing message buffer. */ - g->msg_out = safe_malloc (g, len + 4); - + g->msg_out = safe_realloc (g, g->msg_out, len + 4); g->msg_out_size = len + 4; g->msg_out_pos = 0; xdrmem_create (&xdr, g->msg_out, 4, XDR_ENCODE); xdr_uint32_t (&xdr, &len); - memcpy (g->msg_out + 4, buffer, len); - if (guestfs__switch_to_sending (g) == -1) goto cleanup1; @@ -1653,10 +1714,9 @@ send_file_complete_sync (guestfs_h *g) static int check_for_daemon_cancellation (guestfs_h *g); static int -send_file_chunk_sync (guestfs_h *g, int cancel, const char *buf, size_t len) +send_file_chunk_sync (guestfs_h *g, int cancel, const char *buf, size_t buflen) { - char data[GUESTFS_MAX_CHUNK_SIZE + 48]; - unsigned datalen; + unsigned len; int sent; guestfs_chunk chunk; XDR xdr; @@ -1667,6 +1727,14 @@ send_file_chunk_sync (guestfs_h *g, int cancel, const char *buf, size_t len) return -1; } + /* This is probably an internal error. Or perhaps we should just + * free the buffer anyway? + */ + if (g->msg_out != NULL) { + error (g, "guestfs__send_sync: msg_out should be NULL"); + return -1; + } + /* Did the daemon send a cancellation message? */ if (check_for_daemon_cancellation (g)) { if (g->verbose) @@ -1674,35 +1742,34 @@ send_file_chunk_sync (guestfs_h *g, int cancel, const char *buf, size_t len) return -2; } + /* Allocate the chunk buffer. Don't use the stack to avoid + * excessive stack usage and unnecessary copies. + */ + g->msg_out = safe_malloc (g, GUESTFS_MAX_CHUNK_SIZE + 4 + 48); + xdrmem_create (&xdr, g->msg_out + 4, GUESTFS_MAX_CHUNK_SIZE + 48, XDR_ENCODE); + /* Serialize the chunk. */ chunk.cancel = cancel; - chunk.data.data_len = len; + chunk.data.data_len = buflen; chunk.data.data_val = (char *) buf; - if (g->verbose) - fprintf (stderr, - "library sending chunk cancel = %d, len = %zu, buf = %p\n", - cancel, len, buf); - - xdrmem_create (&xdr, data, sizeof data, XDR_ENCODE); if (!xdr_guestfs_chunk (&xdr, &chunk)) { - error (g, "xdr_guestfs_chunk failed (buf = %p, len = %zu)", buf, len); + error (g, "xdr_guestfs_chunk failed (buf = %p, buflen = %zu)", + buf, buflen); xdr_destroy (&xdr); - return -1; + goto cleanup1; } - datalen = xdr_getpos (&xdr); + len = xdr_getpos (&xdr); xdr_destroy (&xdr); - /* Allocate outgoing message buffer. */ - g->msg_out = safe_malloc (g, datalen + 4); - g->msg_out_size = datalen + 4; + /* Reduce the size of the outgoing message buffer to the real length. */ + g->msg_out = safe_realloc (g, g->msg_out, len + 4); + g->msg_out_size = len + 4; g->msg_out_pos = 0; xdrmem_create (&xdr, g->msg_out, 4, XDR_ENCODE); - xdr_uint32_t (&xdr, &datalen); - - memcpy (g->msg_out + 4, data, datalen); + xdr_uint32_t (&xdr, &len); if (guestfs__switch_to_sending (g) == -1) goto cleanup1; @@ -1773,13 +1840,14 @@ check_for_daemon_cancellation (guestfs_h *g) /* Synchronously receive a file. */ -static int receive_file_data_sync (guestfs_h *g, void **buf); +/* Returns -1 = error, 0 = EOF, 1 = more data */ +static int receive_file_data_sync (guestfs_h *g, void **buf, int *len); int guestfs__receive_file_sync (guestfs_h *g, const char *filename) { void *buf; - int fd, r; + int fd, r, len; fd = open (filename, O_WRONLY|O_CREAT|O_TRUNC|O_NOCTTY, 0666); if (fd == -1) { @@ -1788,13 +1856,14 @@ guestfs__receive_file_sync (guestfs_h *g, const char *filename) } /* Receive the file in chunked encoding. */ - while ((r = receive_file_data_sync (g, &buf)) > 0) { - if (xwrite (fd, buf, r) == -1) { + while ((r = receive_file_data_sync (g, &buf, &len)) >= 0) { + if (xwrite (fd, buf, len) == -1) { perrorf (g, "%s: write", filename); free (buf); goto cancel; } free (buf); + if (r == 0) break; /* End of file. */ } if (r == -1) { @@ -1826,75 +1895,123 @@ guestfs__receive_file_sync (guestfs_h *g, const char *filename) return -1; } - while ((r = receive_file_data_sync (g, &buf)) > 0) - free (buf); /* just discard it */ + while ((r = receive_file_data_sync (g, NULL, NULL)) > 0) + ; /* just discard it */ return -1; } +/* Note that the reply callback can be called multiple times before + * the main loop quits and we get back to the synchronous code. So + * we have to be prepared to save multiple chunks on a list here. + */ struct receive_file_ctx { - int code; - void **buf; + int count; /* 0 if receive_file_cb not called, or + * else count number of chunks. + */ + guestfs_chunk *chunks; /* Array of chunks. */ }; static void +free_chunks (struct receive_file_ctx *ctx) +{ + int i; + + for (i = 0; i < ctx->count; ++i) + free (ctx->chunks[i].data.data_val); + + free (ctx->chunks); +} + +static void receive_file_cb (guestfs_h *g, void *data, XDR *xdr) { guestfs_main_loop *ml = guestfs_get_main_loop (g); struct receive_file_ctx *ctx = (struct receive_file_ctx *) data; guestfs_chunk chunk; + if (ctx->count == -1) /* Parse error occurred previously. */ + return; + ml->main_loop_quit (ml, g); memset (&chunk, 0, sizeof chunk); if (!xdr_guestfs_chunk (xdr, &chunk)) { error (g, "failed to parse file chunk"); - ctx->code = -1; - return; - } - if (chunk.cancel) { - error (g, "file receive cancelled by daemon"); - ctx->code = -2; - return; - } - if (chunk.data.data_len == 0) { /* end of transfer */ - ctx->code = 0; + free_chunks (ctx); + ctx->chunks = NULL; + ctx->count = -1; return; } - ctx->code = chunk.data.data_len; - *ctx->buf = chunk.data.data_val; /* caller frees */ + /* Copy the chunk to the list. */ + ctx->chunks = safe_realloc (g, ctx->chunks, + sizeof (guestfs_chunk) * (ctx->count+1)); + ctx->chunks[ctx->count] = chunk; + ctx->count++; } /* Receive a chunk of file data. */ +/* Returns -1 = error, 0 = EOF, 1 = more data */ static int -receive_file_data_sync (guestfs_h *g, void **buf) +receive_file_data_sync (guestfs_h *g, void **buf, int *len_r) { struct receive_file_ctx ctx; guestfs_main_loop *ml = guestfs_get_main_loop (g); + int i, len; - ctx.code = -3; - ctx.buf = buf; + ctx.count = 0; + ctx.chunks = NULL; guestfs_set_reply_callback (g, receive_file_cb, &ctx); (void) ml->main_loop_run (ml, g); guestfs_set_reply_callback (g, NULL, NULL); - if (g->verbose) - fprintf (stderr, "receive_file_data_sync: code %d\n", ctx.code); - - switch (ctx.code) { - case 0: /* end of file */ - return 0; - case -1: case -2: + if (ctx.count == 0) { + error (g, "receive_file_data_sync: reply callback not called\n"); return -1; - case -3: - error (g, "failed to call receive_file_cb"); + } + + if (ctx.count == -1) { + error (g, "receive_file_data_sync: parse error in reply callback\n"); + /* callback already freed the chunks */ return -1; - default: /* received n bytes of data */ - return ctx.code; } + + if (g->verbose) + fprintf (stderr, "receive_file_data_sync: got %d chunks\n", ctx.count); + + /* Process each chunk in the list. */ + if (buf) *buf = NULL; /* Accumulate data in this buffer. */ + len = 0; + + for (i = 0; i < ctx.count; ++i) { + if (ctx.chunks[i].cancel) { + error (g, "file receive cancelled by daemon"); + free_chunks (&ctx); + if (buf) free (*buf); + if (len_r) *len_r = 0; + return -1; + } + + if (ctx.chunks[i].data.data_len == 0) { /* end of transfer */ + free_chunks (&ctx); + if (len_r) *len_r = len; + return 0; + } + + if (buf) { + *buf = safe_realloc (g, *buf, len + ctx.chunks[i].data.data_len); + memcpy (*buf+len, ctx.chunks[i].data.data_val, + ctx.chunks[i].data.data_len); + } + len += ctx.chunks[i].data.data_len; + } + + if (len_r) *len_r = len; + free_chunks (&ctx); + return 1; } /* This is the default main loop implementation, using select(2). */