/* virt-hostinfo * Copyright (C) 2009 Red Hat Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #define UNIX_PATH_MAX 108 #include #include #include #include #include #include #include #include #include "hostinfod.h" static void main_loop (void); static void set_reread_socket_dir (const apr_pollfd_t *, void *); static void do_reread_socket_dir (void); static struct guest_description *guest_added (const char *sock_path, const char *name); static void guest_removed (struct guest_description *); static void guest_event (const apr_pollfd_t *, void *); static void modify_pollfd_reqevents (struct guest_description *, apr_int16_t); const char *conf_file = DEFAULT_CONF_FILE; char *socket_dir = NULL; char *guests_file = NULL; char *libvirt_uri = NULL; int libvirt_uri_set_on_cmdline = 0; int verbose = 0; int verbose_set_on_cmdline = 0; int foreground = 0; int foreground_set_on_cmdline = 0; int messages_to_stderr = 1; static int reread_socket_dir = 1; static int quit = 0; apr_pool_t *pool = NULL; static apr_pollset_t *set = NULL; static apr_hash_t *guests = NULL; /* Hash "driver-name" -> guest_description */ typedef void (*poll_callback) (const apr_pollfd_t *, void *data); struct callback_data { poll_callback callback; void *data; }; static void usage (void) { printf ("hostinfod (virt-hostinfo daemon)\n" "Copyright (C) 2009 Red Hat Inc.\n" "\n" "Usage:\n" " hostinfod [--options]\n" "\n" "Options:\n" " --help Display full usage\n" " -c file | --config file\n" " Configuration file (default: %s)\n" " -C uri | --connect uri\n" " Set libvirt connection URI (default: NULL)\n" " -f | --foreground\n" " Run in the foreground (don't fork)\n" " -v Enable verbose messages (sent to syslog)\n", DEFAULT_CONF_FILE); } void initialize (void) { apr_initialize (); apr_pool_create (&pool, NULL); init_syslog (); } int main (int argc, char *argv[]) { static const apr_getopt_option_t options[] = { { "config", 'c', TRUE, "configuration file" }, { "connect", 'C', TRUE, "libvirt connection URI" }, { "foreground", 'f', FALSE, "run in foreground (don't fork)" }, { "verbose", 'v', FALSE, "enable verbose messages" }, { "help", '?', FALSE, "display help" }, { NULL, 0, 0, NULL }, }; apr_status_t r; apr_getopt_t *opt; int c; const char *optarg; /* REGISTER_COMMAND macro should have caused this to be * initialized. If it's not, then something is badly wrong ... */ if (!pool) { error ("internal error: daemon not initialized - no commands registered"); exit (1); } apr_getopt_init (&opt, pool, argc, argv); socket_dir = apr_pstrdup (pool, DEFAULT_SOCKET_DIR); guests_file = apr_pstrdup (pool, DEFAULT_GUESTS_FILE); /* Command line. */ while ((r = apr_getopt_long (opt, options, &c, &optarg)) == APR_SUCCESS) { switch (c) { case 'c': conf_file = optarg; /* If the user is specifying this on the command line, then * it should exist. They may have typo'd the name. */ if (access (conf_file, R_OK) == -1) { perrorf ("%s", conf_file); exit (1); } break; case 'C': libvirt_uri = optarg; libvirt_uri_set_on_cmdline = 1; break; case 'f': foreground = 1; foreground_set_on_cmdline = 1; break; case 'v': verbose = 1; verbose_set_on_cmdline = 1; break; case '?': usage (); exit (0); default: abort (); } } if (r != APR_EOF) { fprintf (stderr, "%s: unknown command line option\n", argv[0]); exit (1); } /* Read the config file. */ read_main_conf_file (); /* Connect to libvirt. */ init_libvirt (); /* Monitor the socket directory. */ monitor_socket_dir (); /* Create the guests hash. */ guests = apr_hash_make (pool); /* Create the initial pollset, just containing inotify socket. */ r = apr_pollset_create (&set, 1024 /* ? */, pool, 0); if (r != APR_SUCCESS) { paprerror (r, "apr_pollset_create"); exit (1); } apr_socket_t *tsock = NULL; r = apr_os_sock_put (&tsock, &sockets_inotify_fd, pool); if (r != APR_SUCCESS) { paprerror (r, "apr_os_sock_put"); exit (1); } apr_pollfd_t *tpollfd = apr_palloc (pool, sizeof *tpollfd); tpollfd->p = pool; tpollfd->desc_type = APR_POLL_SOCKET; tpollfd->reqevents = APR_POLLIN; tpollfd->rtnevents = 0; tpollfd->desc.s = tsock; struct callback_data *callback_data = apr_palloc (pool, sizeof *callback_data); callback_data->callback = set_reread_socket_dir; callback_data->data = NULL; tpollfd->client_data = callback_data; r = apr_pollset_add (set, tpollfd); if (r != APR_SUCCESS) { paprerror (r, "apr_pollset_add"); exit (1); } /* Daemonize. */ chdir ("/"); if (!foreground) { apr_proc_detach (1); /* After we detach from the terminal, all further messages * should just go to syslog. */ messages_to_stderr = 0; } message ("%s started", PACKAGE_STRING); main_loop (); message ("%s exiting", PACKAGE_STRING); apr_terminate (); return 0; } static void main_loop (void) { apr_status_t r; apr_int32_t numdescs; const apr_pollfd_t *descs; int i; while (!quit) { /* A socket has appeared or disappeared from the socket directory. */ if (reread_socket_dir) { do_reread_socket_dir (); reread_socket_dir = 0; } /* Poll. */ numdescs = 0; descs = NULL; r = apr_pollset_poll (set, -1, &numdescs, &descs); if (r != APR_SUCCESS) { paprerror (r, "apr_pollset_poll"); exit (1); } /* Perform the callbacks. */ for (i = 0; i < numdescs; ++i) { struct callback_data *callback_data; callback_data = descs[i].client_data; callback_data->callback (&descs[i], callback_data->data); } } } static void set_reread_socket_dir (const apr_pollfd_t *ignored1, void *ignored2) { reread_socket_dir = 1; } static void do_reread_socket_dir (void) { static int count = 0; int added = 0, removed = 0; char buf[PATH_MAX]; int r; DIR *dir; struct dirent *d; struct stat statbuf; struct guest_description *hval; apr_hash_index_t *hi; count++; debug ("reading socket directory (counter = %d)", count); /* Discard anything which appears on the inotify socket. We will * reread the whole directory each time. */ do { r = read (sockets_inotify_fd, buf, sizeof buf); if (r == -1) { if (errno != EAGAIN && errno != EWOULDBLOCK) { perrorf ("inotify socket: read"); exit (1); } } } while (r > 0); dir = opendir (socket_dir); if (dir == NULL) { perrorf ("%s: failed to open socket directory", socket_dir); exit (1); } while (errno = 0, (d = readdir (dir)) != NULL) { /* We expect the name to be "-" (where * is the libvirt driver name, and is the name of the * domain). Skip any dot-entries and anything that doesn't have * this form. */ if (d->d_name[0] == '.') continue; if (strlen (d->d_name) < 3 || strchr (&d->d_name[1], '-') == NULL) continue; /* It must be a Unix domain socket - skip anything else. */ snprintf (buf, sizeof buf, "%s/%s", socket_dir, d->d_name); if (stat (buf, &statbuf) == -1) { perrorf ("stat: %s", buf); continue; } if (!S_ISSOCK (statbuf.st_mode)) continue; /* See if we have an entry matching this already. */ hval = (struct guest_description *) apr_hash_get (guests, d->d_name, APR_HASH_KEY_STRING); if (!hval) { hval = guest_added (buf, d->d_name); if (!hval) continue; /* NB. It's not well documented, but the hash table * implementation DOES NOT copy the key internally. Therefore * we have to use hval->name (ie. our copy) as the key, NOT * d->d_name, even though they are the same string. */ apr_hash_set (guests, hval->name, APR_HASH_KEY_STRING, hval); added++; } hval->counter = count; } if (errno != 0) { perrorf ("%s: error reading socket directory", socket_dir); exit (1); } if (closedir (dir) == -1) { perrorf ("%s: error closing socket directory", socket_dir); exit (1); } /* Iterate over the hash and look for any guests which have * gone away. The guest_description.counter field won't have * been updated. */ for (hi = apr_hash_first (pool, guests); hi; hi = apr_hash_next (hi)) { /* On RHEL 5 this gives: * dereferencing type-punned pointer will break strict-aliasing rules * XXX */ apr_hash_this (hi, NULL, NULL, (void **) &hval); if (hval->counter != count) { /* This hash table implementation allows you to delete the * current entry safely. */ apr_hash_set (guests, hval->name, APR_HASH_KEY_STRING, NULL); /* guest_removed frees hval but does not unregister it from the * hash. */ guest_removed (hval); removed++; } } debug ("finished reading socket directory, added %d, removed %d, guests %d", added, removed, apr_hash_count (guests)); } /* This is called whenever we detect that a guest socket has been * created in the socket directory. */ static struct guest_description * guest_added (const char *sock_path, const char *name) { struct guest_description *hval = NULL; int sock; int r; unsigned retries = 0, tns; enum guest_state state; apr_pool_t *guest_pool = NULL; struct sockaddr_un addr; struct timespec ts; sock = socket (AF_UNIX, SOCK_STREAM, 0); if (sock == -1) { perrorf ("socket"); return NULL; } if (fcntl (sock, F_SETFL, O_NONBLOCK) == -1) { perrorf ("fcntl: O_NONBLOCK"); close (sock); return NULL; } if (fcntl (sock, F_SETFD, FD_CLOEXEC) == -1) { perrorf ("fcntl: FD_CLOEXEC"); close (sock); return NULL; } addr.sun_family = AF_UNIX; strncpy (addr.sun_path, sock_path, UNIX_PATH_MAX); addr.sun_path[UNIX_PATH_MAX-1] = '\0'; again: r = connect (sock, (struct sockaddr *) &addr, sizeof addr); if (r == -1) { /* Nasty race condition: The moment the listener binds the socket, * we see it in the directory and can try to connect to it. * However the listener might not have called listen(2) yet, which * means if we are faster than the other end, we will get * ECONNREFUSED. If this happens, sleep a bit and try again a few * times. */ if (errno == ECONNREFUSED) { if (retries <= 10) { tns = 1 << retries; ts.tv_sec = tns / 1000000000; ts.tv_nsec = tns % 1000000000; nanosleep (&ts, NULL); retries++; goto again; } } if (errno != EINPROGRESS) { /* Dead socket - cull these dead sockets from the directory. */ perrorf ("connect: %s", sock_path); close (sock); unlink (sock_path); return NULL; } state = guest_state_connecting; } else state = guest_state_request; /* Create a pool which can be used for allocations * during the lifetime of this guest connection. */ apr_pool_create (&guest_pool, pool); hval = apr_pcalloc (guest_pool, sizeof *hval); hval->pool = guest_pool; /* Create the remaining hash fields. */ hval->state = state; hval->name = apr_pstrdup (hval->pool, name); hval->sock_path = apr_pstrdup (hval->pool, sock_path); hval->sock = sock; hval->request_max = 4096; hval->request = apr_palloc (hval->pool, hval->request_max); hval->lasttime = apr_hash_make (hval->pool); /* Convert Unix fd into APR socket type. */ r = apr_os_sock_put (&hval->aprsock, &sock, hval->pool); if (r != APR_SUCCESS) { paprerror (r, "apr_os_sock_put: %s", sock_path); exit (1); } /* Register the socket in the pollset. */ hval->pollfd.p = hval->pool; hval->pollfd.desc_type = APR_POLL_SOCKET; if (hval->state == guest_state_connecting) hval->pollfd.reqevents = APR_POLLOUT; else hval->pollfd.reqevents = APR_POLLIN; hval->pollfd.rtnevents = 0; hval->pollfd.desc.s = hval->aprsock; struct callback_data *callback_data = apr_palloc (hval->pool, sizeof *callback_data); callback_data->callback = guest_event; callback_data->data = hval; hval->pollfd.client_data = callback_data; r = apr_pollset_add (set, &hval->pollfd); if (r != APR_SUCCESS) { paprerror (r, "apr_pollset_add: %s", sock_path); exit (1); } message ("new guest added: %s", hval->name); return hval; } /* This is called whenever we detect that a guest socket has been * removed from the socket directory. The guest_description parameter * is freed after this call and must not be used again. */ static void guest_removed (struct guest_description *hval) { apr_status_t r; message ("guest removed: %s", hval->name); /* Unregister the socket from the pollset. */ r = apr_pollset_remove (set, &hval->pollfd); if (r != APR_SUCCESS) paprerror (r, "%s: apr_pollset_remove", hval->name); if (close (hval->sock) == -1) pwarningf ("close: %s", hval->sock_path); /* This also frees hval and all related data. */ apr_pool_destroy (hval->pool); } /* Forcibly remove a guest, removing the socket from the * socket directory and cleaning up any resources used in * the daemon. The guest_description parameter is freed * after this call and must not be used again. */ static void guest_force_close (struct guest_description *hval) { debug ("forcibly closing guest: %s", hval->name); apr_hash_set (guests, hval->name, APR_HASH_KEY_STRING, NULL); unlink (hval->sock_path); guest_removed (hval); } /* Difference between two timespec structures (r = a - b) */ struct timespec * diff_timespec (struct timespec *r, const struct timespec *a, const struct timespec *b) { if (a->tv_nsec - b->tv_nsec < 0) { r->tv_sec = a->tv_sec - b->tv_sec - 1; r->tv_nsec = 1000000000 + a->tv_nsec - b->tv_nsec; } else { r->tv_sec = a->tv_sec - b->tv_sec; r->tv_nsec = a->tv_nsec - b->tv_nsec; } return r; } /* This is called when there is some event from the guest, eg. * connection finished, read, write or closed. */ static void guest_event (const apr_pollfd_t *pollfd, void *hvalv) { struct guest_description *hval = hvalv; int err, max, r, extra; socklen_t len; char *p; struct timespec now; #ifdef HAVE_CLOCK_GETTIME clock_gettime (CLOCK_MONOTONIC, &now); #else struct timeval tv; gettimeofday (&tv, NULL); now.tv_sec = tv.tv_sec; now.tv_nsec = tv.tv_usec * 1000; #endif /* If the guest keeps doing bad stuff, eventually lose patience with it. */ if (hval->penalty >= 100) { error ("%s: guest did too much bad stuff, so we stopped talking to it", hval->name); guest_force_close (hval); return; } /* Decrement the penalty once a minute, so the guest can recover. */ if (hval->penalty > 0) { struct timespec diff; diff_timespec (&diff, &now, &hval->last_penalty_decr); if (diff.tv_sec >= 60) { hval->penalty--; hval->last_penalty_decr = now; } } switch (hval->state) { case guest_state_connecting: /* Once we get a write event, we know the socket has * connected, or there is an error. */ err = 0; len = sizeof err; getsockopt (hval->sock, SOL_SOCKET, SO_ERROR, &err, &len); if (err == 0) hval->state = guest_state_request; else { errno = err; perrorf ("connect: %s", hval->sock_path); guest_force_close (hval); return; } break; case guest_state_request: /* Reading the guest's request, a single line terminated by \r?\n */ max = hval->request_max - hval->request_posn; if (max <= 0) { /* Request too long w/o termination. */ hval->penalty++; hval->request_posn = 0; break; } r = read (hval->sock, &hval->request[hval->request_posn], max); if (r == 0) { /* Socket closed. */ guest_force_close (hval); return; } if (r == -1) { if (errno != EAGAIN && errno != EWOULDBLOCK) { perrorf ("read: %s", hval->sock_path); guest_force_close (hval); return; } break; } hval->request_posn += r; /* Have we got a terminating \n character in the buffer yet? Note * the buffer is not NUL-terminated which is why we use memchr. */ again: p = memchr (hval->request, '\n', hval->request_posn); if (p == NULL) break; /* Is there more after the \n char? Normal guests shouldn't do * this, but it can be an attempt to reestablish synchronization. * It's documented that we throw away all but the last command sent, * so let's do that. */ extra = &hval->request[hval->request_posn]-(p+1); if (extra > 0) { hval->penalty++; memmove (hval->request, p+1, extra); hval->request_posn = extra; goto again; } /* Looks like we've got ourselves a command. Remove trailing * \r?\n char(s) and NUL-terminate the command string. */ assert (*p == '\n'); assert (hval->request_posn >= 1); assert (p == &hval->request[hval->request_posn-1]); hval->request_posn--; p--; if (hval->request_posn > 0 && *p == '\r') { hval->request_posn--; p--; } *(p+1) = '\0'; execute_command (&now, hval, hval->request); hval->request_posn = 0; break; case guest_state_reply: /* Keep writing out the reply buffer until we've sent * the whole thing. */ max = hval->reply_size - hval->reply_posn; if (max <= 0) { hval->state = guest_state_request; break; } r = write (hval->sock, &hval->reply[hval->reply_posn], max); if (r == -1) { if (errno != EAGAIN && errno != EWOULDBLOCK) { perrorf ("write: %s", hval->sock_path); guest_force_close (hval); return; } break; } hval->reply_posn += r; if (hval->reply_posn >= hval->reply_size) hval->state = guest_state_request; break; case guest_state_dead: /* We shouldn't get an event here. */ hval->penalty++; } /* Depending on the (new) state we want to set the * events that we would like poll to give us next time. */ switch (hval->state) { case guest_state_connecting: modify_pollfd_reqevents (hval, APR_POLLOUT); break; case guest_state_request: modify_pollfd_reqevents (hval, APR_POLLIN); break; case guest_state_reply: modify_pollfd_reqevents (hval, APR_POLLOUT); break; case guest_state_dead: modify_pollfd_reqevents (hval, 0); break; } } /* It turns out you can't just update the pollfd->reqevents * field. Instead you have to remove the pollfd and reregister * it in the pollset. */ static void modify_pollfd_reqevents (struct guest_description *hval, apr_int16_t new_reqevents) { apr_status_t r; if (hval->pollfd.reqevents != new_reqevents) { r = apr_pollset_remove (set, &hval->pollfd); if (r != APR_SUCCESS) { paprerror (r, "%s: apr_pollset_remove", hval->name); return; } hval->pollfd.reqevents = new_reqevents; r = apr_pollset_add (set, &hval->pollfd); if (r != APR_SUCCESS) paprerror (r, "%s: apr_pollset_add", hval->name); } }