From 33377fc61d4356a0e83cf7ef39d545842647481c Mon Sep 17 00:00:00 2001 From: Richard Jones Date: Fri, 8 May 2009 15:19:03 +0100 Subject: [PATCH] Allow recovery from guest failure. --- src/generator.ml | 24 +++++++++++----- src/guestfs.c | 85 +++++++++++++++++++++++++++++++++++++------------------- 2 files changed, 74 insertions(+), 35 deletions(-) diff --git a/src/generator.ml b/src/generator.ml index b7b1907..1e6c0bc 100755 --- a/src/generator.ml +++ b/src/generator.ml @@ -438,6 +438,16 @@ actions using the low-level API. For more information on states, see L."); + ("end_busy", (RErr, []), -1, [NotInFish], + [], + "leave the busy state", + "\ +This sets the state to C, or if in C then it leaves the +state as is. This is only used when implementing +actions using the low-level API. + +For more information on states, see L."); + ] let daemon_functions = [ @@ -2824,7 +2834,7 @@ check_state (guestfs_h *g, const char *caller) name; ); pr " if (serial == -1) {\n"; - pr " guestfs_set_ready (g);\n"; + pr " guestfs_end_busy (g);\n"; pr " return %s;\n" error_code; pr " }\n"; pr "\n"; @@ -2839,7 +2849,7 @@ check_state (guestfs_h *g, const char *caller) pr "\n"; pr " r = guestfs__send_file_sync (g, %s);\n" n; pr " if (r == -1) {\n"; - pr " guestfs_set_ready (g);\n"; + pr " guestfs_end_busy (g);\n"; pr " return %s;\n" error_code; pr " }\n"; pr " if (r == -2) /* daemon cancelled */\n"; @@ -2859,21 +2869,21 @@ check_state (guestfs_h *g, const char *caller) pr " guestfs_set_reply_callback (g, NULL, NULL);\n"; pr " if (ctx.cb_sequence != 1) {\n"; pr " error (g, \"%%s reply failed, see earlier error messages\", \"%s\");\n" name; - pr " guestfs_set_ready (g);\n"; + pr " guestfs_end_busy (g);\n"; pr " return %s;\n" error_code; pr " }\n"; pr "\n"; pr " if (check_reply_header (g, &ctx.hdr, GUESTFS_PROC_%s, serial) == -1) {\n" (String.uppercase shortname); - pr " guestfs_set_ready (g);\n"; + pr " guestfs_end_busy (g);\n"; pr " return %s;\n" error_code; pr " }\n"; pr "\n"; pr " if (ctx.hdr.status == GUESTFS_STATUS_ERROR) {\n"; pr " error (g, \"%%s\", ctx.err.error_message);\n"; - pr " guestfs_set_ready (g);\n"; + pr " guestfs_end_busy (g);\n"; pr " return %s;\n" error_code; pr " }\n"; pr "\n"; @@ -2883,14 +2893,14 @@ check_state (guestfs_h *g, const char *caller) function | FileOut n -> pr " if (guestfs__receive_file_sync (g, %s) == -1) {\n" n; - pr " guestfs_set_ready (g);\n"; + pr " guestfs_end_busy (g);\n"; pr " return %s;\n" error_code; pr " }\n"; pr "\n"; | _ -> () ) (snd style); - pr " guestfs_set_ready (g);\n"; + pr " guestfs_end_busy (g);\n"; (match fst style with | RErr -> pr " return 0;\n" diff --git a/src/guestfs.c b/src/guestfs.c index 15111db..be7b0c7 100644 --- a/src/guestfs.c +++ b/src/guestfs.c @@ -1095,6 +1095,25 @@ guestfs_set_busy (guestfs_h *g) return 0; } +int +guestfs_end_busy (guestfs_h *g) +{ + switch (g->state) + { + case BUSY: + g->state = READY; + break; + case CONFIG: + case READY: + break; + case LAUNCHING: + case NO_HANDLE: + error (g, "guestfs_end_busy: called when in state %d", g->state); + return -1; + } + return 0; +} + /* Structure-freeing functions. These rely on the fact that the * structure format is identical to the XDR format. See note in * generator.ml. @@ -1126,6 +1145,39 @@ guestfs_free_lvm_lv_list (struct guestfs_lvm_lv_list *x) free (x); } +/* We don't know if stdout_event or sock_read_event will be the + * first to receive EOF if the qemu process dies. This function + * has the common cleanup code for both. + */ +static void +child_cleanup (guestfs_h *g) +{ + if (g->verbose) + fprintf (stderr, "stdout_event: %p: child process died\n", g); + /*kill (g->pid, SIGTERM);*/ + if (g->recoverypid > 0) kill (g->recoverypid, 9); + waitpid (g->pid, NULL, 0); + if (g->recoverypid > 0) waitpid (g->recoverypid, NULL, 0); + if (g->stdout_watch >= 0) + g->main_loop->remove_handle (g->main_loop, g, g->stdout_watch); + if (g->sock_watch >= 0) + g->main_loop->remove_handle (g->main_loop, g, g->sock_watch); + close (g->fd[0]); + close (g->fd[1]); + close (g->sock); + g->fd[0] = -1; + g->fd[1] = -1; + g->sock = -1; + g->pid = 0; + g->recoverypid = 0; + g->start_t = 0; + g->stdout_watch = -1; + g->sock_watch = -1; + g->state = CONFIG; + if (g->subprocess_quit_cb) + g->subprocess_quit_cb (g, g->subprocess_quit_cb_data); +} + /* This function is called whenever qemu prints something on stdout. * Qemu's stdout is also connected to the guest's serial console, so * we see kernel messages here too. @@ -1152,30 +1204,7 @@ stdout_event (struct guestfs_main_loop *ml, guestfs_h *g, void *data, n = read (fd, buf, sizeof buf); if (n == 0) { /* Hopefully this indicates the qemu child process has died. */ - if (g->verbose) - fprintf (stderr, "stdout_event: %p: child process died\n", g); - /*kill (g->pid, SIGTERM);*/ - if (g->recoverypid > 0) kill (g->recoverypid, 9); - waitpid (g->pid, NULL, 0); - if (g->recoverypid > 0) waitpid (g->recoverypid, NULL, 0); - if (g->stdout_watch >= 0) - g->main_loop->remove_handle (g->main_loop, g, g->stdout_watch); - if (g->sock_watch >= 0) - g->main_loop->remove_handle (g->main_loop, g, g->sock_watch); - close (g->fd[0]); - close (g->fd[1]); - close (g->sock); - g->fd[0] = -1; - g->fd[1] = -1; - g->sock = -1; - g->pid = 0; - g->recoverypid = 0; - g->start_t = 0; - g->stdout_watch = -1; - g->sock_watch = -1; - g->state = CONFIG; - if (g->subprocess_quit_cb) - g->subprocess_quit_cb (g, g->subprocess_quit_cb_data); + child_cleanup (g); return; } @@ -1221,11 +1250,11 @@ sock_read_event (struct guestfs_main_loop *ml, guestfs_h *g, void *data, } n = read (g->sock, g->msg_in + g->msg_in_size, g->msg_in_allocated - g->msg_in_size); - if (n == 0) - /* Disconnected? Ignore it because stdout_watch will get called - * and will do the cleanup. - */ + if (n == 0) { + /* Disconnected. */ + child_cleanup (g); return; + } if (n == -1) { if (errno != EINTR && errno != EAGAIN) -- 1.8.3.1