X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=hostinfod%2Fmain.c;h=fa835bb363f58ef4749e35f7c720a2728f08e2e9;hb=19feefcaa1b219675e09c7d52169f7b0815aa123;hp=e272210d81c9147d56ab2224524b4ae942a9df48;hpb=11bf652b21fb6e20dc072c7e250aeb26821b818e;p=virt-hostinfo.git diff --git a/hostinfod/main.c b/hostinfod/main.c index e272210..fa835bb 100644 --- a/hostinfod/main.c +++ b/hostinfod/main.c @@ -22,7 +22,17 @@ #include #include +#include #include +#include +#include +#include +#include +#include +#include +#include + +#define UNIX_PATH_MAX 108 #include #include @@ -30,15 +40,17 @@ #include #include #include +#include #include #include "hostinfod.h" static void main_loop (void); -static void set_reread_socket_dir (const apr_pollfd_t *); +static void set_reread_socket_dir (const apr_pollfd_t *, void *); static void do_reread_socket_dir (void); - -typedef void (*poll_callback) (const apr_pollfd_t *); +static struct guest_description *guest_added (const char *sock_path, const char *name); +static void guest_removed (struct guest_description *); +static void guest_event (const apr_pollfd_t *, void *); const char *conf_file = DEFAULT_CONF_FILE; char *socket_dir = NULL; @@ -55,7 +67,15 @@ static int reread_socket_dir = 1; static int quit = 0; apr_pool_t *pool = NULL; -static apr_pollset_t *set; +static apr_pollset_t *set = NULL; + +static apr_hash_t *guests = NULL; /* Hash "driver-name" -> guest_description */ + +typedef void (*poll_callback) (const apr_pollfd_t *, void *data); +struct callback_data { + poll_callback callback; + void *data; +}; static void usage (void) @@ -72,8 +92,7 @@ usage (void) " Configuration file (default: %s)\n" " -f | --foreground\n" " Run in the foreground (don't fork)\n" - " -v Enable verbose messages (sent to syslog, and to\n" - " stderr if -d option is given)\n", + " -v Enable verbose messages (sent to syslog)\n", DEFAULT_CONF_FILE); } @@ -141,6 +160,9 @@ main (int argc, char *argv[]) /* Monitor the socket directory. */ monitor_socket_dir (); + /* Create the guests hash. */ + guests = apr_hash_make (pool); + /* Create the initial pollset, just containing inotify socket. */ r = apr_pollset_create (&set, 1024 /* ? */, pool, 0); if (r != APR_SUCCESS) { @@ -159,7 +181,13 @@ main (int argc, char *argv[]) tpollfd->reqevents = APR_POLLIN; tpollfd->rtnevents = 0; tpollfd->desc.s = tsock; - tpollfd->client_data = set_reread_socket_dir; + + struct callback_data *callback_data = + apr_palloc (pool, sizeof *callback_data); + callback_data->callback = set_reread_socket_dir; + callback_data->data = NULL; + tpollfd->client_data = callback_data; + r = apr_pollset_add (set, tpollfd); if (r != APR_SUCCESS) { paprerror (r, "apr_pollset_add"); @@ -177,8 +205,9 @@ main (int argc, char *argv[]) messages_to_stderr = 0; } - message (PACKAGE_STRING); + message ("%s started", PACKAGE_STRING); main_loop (); + message ("%s exiting", PACKAGE_STRING); apr_terminate (); return 0; @@ -200,7 +229,7 @@ main_loop (void) } /* Poll. */ - r = apr_pollset_poll (set, 0, &numdescs, &descs); + r = apr_pollset_poll (set, -1, &numdescs, &descs); if (r != APR_SUCCESS) { paprerror (r, "apr_pollset_poll"); exit (1); @@ -208,16 +237,16 @@ main_loop (void) /* Perform the callbacks. */ for (i = 0; i < numdescs; ++i) { - poll_callback callback; + struct callback_data *callback_data; - callback = descs[i].client_data; - callback (&descs[i]); + callback_data = descs[i].client_data; + callback_data->callback (&descs[i], callback_data->data); } } } static void -set_reread_socket_dir (const apr_pollfd_t *_) +set_reread_socket_dir (const apr_pollfd_t *ignored1, void *ignored2) { reread_socket_dir = 1; } @@ -225,24 +254,454 @@ set_reread_socket_dir (const apr_pollfd_t *_) static void do_reread_socket_dir (void) { - char buf[256]; - - debug ("reading socket directory"); + static int count = 0; + int added = 0, removed = 0; + char buf[PATH_MAX]; + int r; + DIR *dir; + struct dirent *d; + struct stat statbuf; + struct guest_description *hval; + apr_hash_index_t *hi; + + count++; + debug ("reading socket directory (counter = %d)", count); /* Discard anything which appears on the inotify socket. We will * reread the whole directory each time. */ - while (read (sockets_inotify_fd, buf, sizeof buf) > 0) - ; + do { + r = read (sockets_inotify_fd, buf, sizeof buf); + if (r == -1) { + if (errno != EAGAIN && errno != EWOULDBLOCK) { + perrorf ("inotify socket: read"); + exit (1); + } + } + } while (r > 0); + + dir = opendir (socket_dir); + if (dir == NULL) { + perrorf ("%s: failed to open socket directory", socket_dir); + exit (1); + } + + while (errno = 0, (d = readdir (dir)) != NULL) { + /* We expect the name to be "-" (where + * is the libvirt driver name, and is the name of the + * domain). Skip any dot-entries and anything that doesn't have + * this form. + */ + if (d->d_name[0] == '.') + continue; + if (strlen (d->d_name) < 3 || strchr (&d->d_name[1], '-') == NULL) + continue; + + /* It must be a Unix domain socket - skip anything else. */ + snprintf (buf, sizeof buf, "%s/%s", socket_dir, d->d_name); + if (stat (buf, &statbuf) == -1) { + perrorf ("stat: %s", buf); + continue; + } + if (!S_ISSOCK (statbuf.st_mode)) + continue; + + /* See if we have an entry matching this already. */ + hval = (struct guest_description *) + apr_hash_get (guests, d->d_name, APR_HASH_KEY_STRING); + if (!hval) { + hval = guest_added (buf, d->d_name); + if (!hval) + continue; + + /* NB. It's not well documented, but the hash table + * implementation DOES NOT copy the key internally. Therefore + * we have to use hval->name (ie. our copy) as the key, NOT + * d->d_name, even though they are the same string. + */ + apr_hash_set (guests, hval->name, APR_HASH_KEY_STRING, hval); + added++; + } + + hval->counter = count; + } + if (errno != 0) { + perrorf ("%s: error reading socket directory", socket_dir); + exit (1); + } + + if (closedir (dir) == -1) { + perrorf ("%s: error closing socket directory", socket_dir); + exit (1); + } + + /* Iterate over the hash and look for any guests which have + * gone away. The guest_description.counter field won't have + * been updated. + */ + for (hi = apr_hash_first (pool, guests); hi; hi = apr_hash_next (hi)) { + apr_hash_this(hi, NULL, NULL, (void **) &hval); + if (hval->counter != count) { + /* This hash table implementation allows you to delete the + * current entry safely. + */ + apr_hash_set (guests, hval->name, APR_HASH_KEY_STRING, NULL); + + /* guest_removed frees hval but does not unregister it from the + * hash. + */ + guest_removed (hval); + removed++; + } + } + + debug ("finished reading socket directory, added %d, removed %d, guests %d", + added, removed, apr_hash_count (guests)); +} + +/* This is called whenever we detect that a guest socket has been + * created in the socket directory. + */ +static struct guest_description * +guest_added (const char *sock_path, const char *name) +{ + struct guest_description *hval = NULL; + int sock; + int r; + unsigned retries = 0, tns; + enum guest_state state; + apr_pool_t *guest_pool; + struct sockaddr_un addr; + struct timespec ts; + + sock = socket (AF_UNIX, SOCK_STREAM, 0); + if (sock == -1) { + perrorf ("socket"); + return NULL; + } + + if (fcntl (sock, F_SETFL, O_NONBLOCK) == -1) { + perrorf ("fcntl: O_NONBLOCK"); + close (sock); + return NULL; + } + if (fcntl (sock, F_SETFD, FD_CLOEXEC) == -1) { + perrorf ("fcntl: FD_CLOEXEC"); + close (sock); + return NULL; + } + + addr.sun_family = AF_UNIX; + strncpy (addr.sun_path, sock_path, UNIX_PATH_MAX); + addr.sun_path[UNIX_PATH_MAX-1] = '\0'; + + again: + r = connect (sock, (struct sockaddr *) &addr, sizeof addr); + if (r == -1) { + /* Nasty race condition: The moment the listener binds the socket, + * we see it in the directory and can try to connect to it. + * However the listener might not have called listen(2) yet, which + * means if we are faster than the other end, we will get + * ECONNREFUSED. If this happens, sleep a bit and try again a few + * times. + */ + if (errno == ECONNREFUSED) { + if (retries <= 10) { + tns = 1 << retries; + ts.tv_sec = tns / 1000000000; + ts.tv_nsec = tns % 1000000000; + nanosleep (&ts, NULL); + retries++; + goto again; + } + } + + if (errno != EINPROGRESS) { + /* Dead socket - cull these dead sockets from the directory. */ + perrorf ("connect: %s", sock_path); + close (sock); + unlink (sock_path); + return NULL; + } + state = guest_state_connecting; + } + else + state = guest_state_request; + + /* Create a pool which can be used for allocations + * during the lifetime of this guest connection. + */ + apr_pool_create (&guest_pool, pool); + + hval = apr_pcalloc (guest_pool, sizeof *hval); + hval->pool = guest_pool; + + /* Create the remaining hash fields. */ + hval->state = state; + hval->name = apr_pstrdup (hval->pool, name); + hval->sock_path = apr_pstrdup (hval->pool, sock_path); + hval->sock = sock; + hval->request_max = 4096; + hval->request = apr_palloc (hval->pool, hval->request_max); + + /* Convert Unix fd into APR socket type. */ + r = apr_os_sock_put (&hval->aprsock, &sock, hval->pool); + if (r != APR_SUCCESS) { + paprerror (r, "apr_os_sock_put: %s", sock_path); + exit (1); + } + + /* Register the socket in the pollset. */ + hval->pollfd.p = hval->pool; + hval->pollfd.desc_type = APR_POLL_SOCKET; + if (hval->state == guest_state_connecting) + hval->pollfd.reqevents = APR_POLLOUT; + else + hval->pollfd.reqevents = APR_POLLIN; + hval->pollfd.rtnevents = 0; + hval->pollfd.desc.s = hval->aprsock; + + struct callback_data *callback_data = + apr_palloc (hval->pool, sizeof *callback_data); + callback_data->callback = guest_event; + callback_data->data = hval; + hval->pollfd.client_data = callback_data; + + r = apr_pollset_add (set, &hval->pollfd); + if (r != APR_SUCCESS) { + paprerror (r, "apr_pollset_add: %s", sock_path); + exit (1); + } + message ("new guest added: %s", hval->name); + return hval; +} +/* This is called whenever we detect that a guest socket has been + * removed from the socket directory. The guest_description parameter + * is freed after this call and must not be used again. + */ +static void +guest_removed (struct guest_description *hval) +{ + apr_status_t r; + message ("guest removed: %s", hval->name); + /* Unregister the socket from the pollset. */ + r = apr_pollset_remove (set, &hval->pollfd); + if (r != APR_SUCCESS) + paprerror (r, "apr_pollset_remove for %s", hval->name); + if (close (hval->sock) == -1) + pwarningf ("close: %s", hval->sock_path); + /* This also frees hval and all related data. */ + apr_pool_destroy (hval->pool); +} +/* Forcibly remove a guest, removing the socket from the + * socket directory and cleaning up any resources used in + * the daemon. The guest_description parameter is freed + * after this call and must not be used again. + */ +static void +guest_force_close (struct guest_description *hval) +{ + debug ("forcibly closing guest: %s", hval->name); + apr_hash_set (guests, hval->name, APR_HASH_KEY_STRING, NULL); + unlink (hval->sock_path); + guest_removed (hval); +} +/* Difference between two timespec structures (r = a - b) */ +static struct timespec * +diff_timespec (struct timespec *r, + const struct timespec *a, const struct timespec *b) +{ + if (a->tv_nsec - b->tv_nsec < 0) { + r->tv_sec = a->tv_sec - b->tv_sec - 1; + r->tv_nsec = 1000000000 + a->tv_nsec - b->tv_nsec; + } else { + r->tv_sec = a->tv_sec - b->tv_sec; + r->tv_nsec = a->tv_nsec - b->tv_nsec; + } + return r; +} +/* This is called when there is some event from the guest, eg. + * connection finished, read, write or closed. + */ +static void +guest_event (const apr_pollfd_t *pollfd, void *hvalv) +{ + struct guest_description *hval = hvalv; + int err, max, r, extra; + socklen_t len; + char *p; + struct timespec now; + +#ifdef HAVE_CLOCK_GETTIME + clock_gettime (CLOCK_MONOTONIC, &now); +#else + struct timeval tv; + gettimeofday (&tv, NULL); + now.tv_sec = tv.tv_sec; + now.tv_nsec = tv.tv_usec * 1000; +#endif + + /* If the guest keeps doing bad stuff, eventually lose patience with it. */ + if (hval->penalty >= 100) { + error ("%s: guest did too much bad stuff, so we stopped talking to it", + hval->name); + guest_force_close (hval); + return; + } + + /* Decrement the penalty once a minute, so the guest can recover. */ + if (hval->penalty > 0) { + struct timespec diff; + + diff_timespec (&diff, &now, &hval->last_penalty_decr); + + if (diff.tv_sec >= 60) { + hval->penalty--; + hval->last_penalty_decr = now; + } + } + + switch (hval->state) { + case guest_state_connecting: + /* Once we get a write event, we know the socket has + * connected, or there is an error. + */ + err = 0; + len = sizeof err; + getsockopt (hval->sock, SOL_SOCKET, SO_ERROR, &err, &len); + if (err == 0) + hval->state = guest_state_request; + else { + errno = err; + perrorf ("connect: %s", hval->sock_path); + guest_force_close (hval); + return; + } + break; + + case guest_state_request: + /* Reading the guest's request, a single line terminated by \r?\n */ + max = hval->request_max - hval->request_posn; + if (max <= 0) { /* Request too long w/o termination. */ + hval->penalty++; + hval->request_posn = 0; + break; + } + r = read (hval->sock, &hval->request[hval->request_posn], max); + if (r == 0) { /* Socket closed. */ + guest_force_close (hval); + return; + } + if (r == -1) { + if (errno != EAGAIN && errno != EWOULDBLOCK) { + perrorf ("read: %s", hval->sock_path); + guest_force_close (hval); + return; + } + break; + } + + hval->request_posn += r; + + /* Have we got a terminating \n character in the buffer yet? Note + * the buffer is not NUL-terminated which is why we use memchr. + */ + again: + p = memchr (hval->request, '\n', hval->request_posn); + if (p == NULL) + break; + + /* Is there more after the \n char? Normal guests shouldn't do + * this, but it can be an attempt to reestablish synchronization. + * It's documented that we throw away all but the last command sent, + * so let's do that. + */ + extra = &hval->request[hval->request_posn]-(p+1); + if (extra > 0) { + hval->penalty++; + memmove (hval->request, p+1, extra); + hval->request_posn = extra; + goto again; + } + + /* Looks like we've got ourselves a command. Remove trailing + * \r?\n char(s) and NUL-terminate the command string. + */ + assert (*p == '\n'); + assert (hval->request_posn >= 1); + assert (p == &hval->request[hval->request_posn-1]); + hval->request_posn--; + p--; + + if (hval->request_posn > 0 && *p == '\r') { + hval->request_posn--; + p--; + } + + *(p+1) = '\0'; + + execute_command (&now, hval, hval->request); + + hval->request_posn = 0; + break; + + case guest_state_reply: + /* Keep writing out the reply buffer until we've sent + * the whole thing. + */ + max = hval->reply_size - hval->reply_posn; + if (max <= 0) { + hval->state = guest_state_request; + break; + } + + r = write (hval->sock, &hval->reply[hval->reply_posn], max); + if (r == -1) { + if (errno != EAGAIN && errno != EWOULDBLOCK) { + perrorf ("write: %s", hval->sock_path); + guest_force_close (hval); + return; + } + break; + } + + hval->reply_posn += r; + if (hval->reply_posn >= hval->reply_size) + hval->state = guest_state_request; + + break; + + case guest_state_dead: + /* We shouldn't get an event here. */ + hval->penalty++; + } + + /* Depending on the (new) state we want to set the + * events that we would like poll to give us next time. + */ + switch (hval->state) { + case guest_state_connecting: + hval->pollfd.reqevents = APR_POLLOUT; + break; + case guest_state_request: + hval->pollfd.reqevents = APR_POLLIN; + break; + case guest_state_reply: + hval->pollfd.reqevents = APR_POLLOUT; + break; + case guest_state_dead: + hval->pollfd.reqevents = 0; + break; + } }