From dcbfcc566fa812fd3085c89a8cfed7fe34bb05e8 Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Sat, 8 Aug 2009 11:15:35 +0100 Subject: [PATCH] Hostinfo day 3: Further work on the daemon. --- .gitignore | 4 +- hostinfo-test/Makefile.am | 6 +- hostinfo-test/hostinfo-test.pl | 1 + hostinfod/Makefile.am | 13 +- hostinfod/commands.c | 45 ++++ hostinfod/configuration.c | 12 +- hostinfod/hostinfo-protocol.pod | 403 ++++++++++++++++++++++++++++++++++ hostinfod/hostinfo.pod | 7 +- hostinfod/hostinfod.h | 39 ++++ hostinfod/main.c | 465 ++++++++++++++++++++++++++++++++++++++-- 10 files changed, 967 insertions(+), 28 deletions(-) create mode 100644 hostinfod/commands.c create mode 100644 hostinfod/hostinfo-protocol.pod diff --git a/.gitignore b/.gitignore index eb47ddd..779429b 100644 --- a/.gitignore +++ b/.gitignore @@ -15,16 +15,18 @@ conf/guests.conf conf/hostinfo.conf depcomp hostinfod/hostinfo.8 +hostinfod/hostinfo-protocol.5 hostinfod/hostinfod hostinfo-set/hostinfo-set.8 hostinfo-set/hostinfo-set hostinfo-status/hostinfo-status.8 hostinfo-status/hostinfo-status -hostinfo-test/hostinfo-test.8 +hostinfo-test/hostinfo-test.1 hostinfo-test/hostinfo-test localconfigure local.conf local.guests.conf +local.sockets install-sh missing stamp-h1 diff --git a/hostinfo-test/Makefile.am b/hostinfo-test/Makefile.am index fd206ad..5430676 100644 --- a/hostinfo-test/Makefile.am +++ b/hostinfo-test/Makefile.am @@ -19,11 +19,11 @@ EXTRA_DIST = hostinfo-test.pl bin_SCRIPTS = hostinfo-test -man_MANS = hostinfo-test.8 +man_MANS = hostinfo-test.1 -hostinfo-test.8: hostinfo-test +hostinfo-test.1: hostinfo-test $(POD2MAN) \ - --section 8 \ + --section 1 \ -c "Virtualization Support" \ --release "$(PACKAGE_NAME)-$(PACKAGE_VERSION)" \ $< > $@ diff --git a/hostinfo-test/hostinfo-test.pl b/hostinfo-test/hostinfo-test.pl index 8f9694e..d265f74 100755 --- a/hostinfo-test/hostinfo-test.pl +++ b/hostinfo-test/hostinfo-test.pl @@ -66,6 +66,7 @@ or there was an error. L, L, L, +L, L, L, L, diff --git a/hostinfod/Makefile.am b/hostinfod/Makefile.am index f512655..16272e0 100644 --- a/hostinfod/Makefile.am +++ b/hostinfod/Makefile.am @@ -15,11 +15,12 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -EXTRA_DIST = hostinfo.pod +EXTRA_DIST = hostinfo.pod hostinfo-protocol.pod sbin_PROGRAMS = hostinfod hostinfod_SOURCES = \ + commands.c \ configuration.c \ error.c \ hostinfod.h \ @@ -32,9 +33,10 @@ hostinfod_CFLAGS = \ -DDEFAULT_CONF_FILE=\"$(sysconfdir)/hostinfo/hostinfo.conf\" \ -DDEFAULT_GUESTS_FILE=\"$(sysconfdir)/hostinfo/guests.conf\" \ -DDEFAULT_SOCKET_DIR=\"$(localstatedir)/lib/hostinfo\" + hostinfod_LDADD = $(HOSTINFOD_LIBS) -man_MANS = hostinfo.8 hostinfod.8 +man_MANS = hostinfo.8 hostinfod.8 hostinfo-protocol.5 hostinfo.8: hostinfo.pod $(POD2MAN) \ @@ -42,3 +44,10 @@ hostinfo.8: hostinfo.pod -c "Virtualization Support" \ --release "$(PACKAGE_NAME)-$(PACKAGE_VERSION)" \ $< > $@ + +hostinfo-protocol.5: hostinfo-protocol.pod + $(POD2MAN) \ + --section 5 \ + -c "Virtualization Support" \ + --release "$(PACKAGE_NAME)-$(PACKAGE_VERSION)" \ + $< > $@ diff --git a/hostinfod/commands.c b/hostinfod/commands.c new file mode 100644 index 0000000..1bbddc0 --- /dev/null +++ b/hostinfod/commands.c @@ -0,0 +1,45 @@ +/* virt-hostinfo + * Copyright (C) 2009 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* This code parses and executes the commands sent by guests. It + * is therefore particularly security sensitive. The protocol is + * documented in hostinfo-protocol(5). + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include + +#include "hostinfod.h" + +void +execute_command (time_t now, + struct guest_description *hval, const char *command) +{ + debug ("%s: %s", hval->name, command); + + + + + +} diff --git a/hostinfod/configuration.c b/hostinfod/configuration.c index 0d71908..90200f8 100644 --- a/hostinfod/configuration.c +++ b/hostinfod/configuration.c @@ -120,8 +120,10 @@ process_conf_file (const char *path, int exit_if_not_exist, while ((r = getline (&line, &len, fp)) != -1) { lineno++; + /*debug ("%s:%d: '%s' (len = %d)", path, lineno, line, len);*/ + /* Remove trailing \n */ - real_len = len; + real_len = strlen (line); if (real_len > 0 && line[real_len-1] == '\n') line[--real_len] = '\0'; @@ -142,7 +144,13 @@ process_conf_file (const char *path, int exit_if_not_exist, debug ("configuration file: section [%s]", line); - if (process_section && process_section (path, lineno, line) == -1) + if (!process_section) { + error ("%s:%d: unexpected section header ([%s]) in file", + path, lineno, line); + exit (1); + } + + if (process_section (path, lineno, line) == -1) exit (1); } else { /* Key value */ diff --git a/hostinfod/hostinfo-protocol.pod b/hostinfod/hostinfo-protocol.pod new file mode 100644 index 0000000..df838c0 --- /dev/null +++ b/hostinfod/hostinfo-protocol.pod @@ -0,0 +1,403 @@ +=encoding utf8 + +=head1 NAME + +hostinfo-protocol - hostinfo client commands and protocol + +=head1 SYNOPSIS + + >>> PING "hello" + <<< 1.0 200 hello + +=head1 DESCRIPTION + +This manpage documents the hostinfo protocol. For other aspects of +the hostinfo system, please see the associated manpages listed in the +I section below. + +Hostinfo is a protocol that virtual machines (guests) can use to +access limited information about the physical host that they are +running on. For example, the virtual machine sees only virtual CPUs, +but using the hostinfo protocol you can query the number of physical +CPUs on the real machine. + +Accessing hostinfo does not require any special libraries or software. +The hostinfo service is made available on a (virtual) serial port +attached to the guest. Programs send text commands to this serial +port and read the replies. The format of these commands and replies +are what this manpage documents. + +=head2 ENABLING HOSTINFO FOR A GUEST + +Before hostinfo can be used from a guest, it must be enabled by the +host's system administrator. This is outside the scope of this +manpage - see L. + +=head1 PROTOCOL + +=head2 SERIAL PORT + +The specifics of how you access serial ports under your operating +system are not covered in this manpage, but on Linux you would open a +special device like C and on DOS/Windows it would be +something like C. + +Hostinfo is I exported to the guest through the second serial +port (C on Linux, C on DOS/Windows). However the +system administrator can change this, and might do so particularly if +the serial ports are used for something else. Contact the host system +administrator or run the L command on the host. + +Software written to use the hostinfo protocol should be configurable +to use any serial port, I it can try to determine the serial port +dynamically (although this may be risky/undesirable depending on what +the other serial ports are used for). + +=head2 REQUESTS AND REPLIES + +The basic protocol consists of sending a text-based command (the +request), and then reading the reply. + +A typical request/reply cycle looks like: + + >>> PING "hello" + <<< 1.0 200 hello + +In this case the request was the literal string C<"PING \"hello\"\r\n"> +(note: followed by carriage return [CR] and line feed [LF]). + +The reply was C<"1.0 200 hello\r\n">. + +The EEE and EEE symbols are not part of +the protocol. They indicate messages sent to the host and +received from the host respectively. + +The request is a command followed by some number of arguments, +followed by CRLF. Commands available are described below. + +The reply consists of: + +=over 4 + +=item 1.0 + +The protocol version number, always C<1.x> in the current +iteration of the protocol. + +=item EspaceE 200 + +The 3 digit status code (compatible with HTTP +status codes, see RFC 2616). + +=item EspaceE hello + +A space followed by the (optional) short response, B: + +=item multi-line response + +Some commands (but not PING) can return a multi-line response. + +=back + +A few commands return a multi-line response: + + >>> CAPABILITIES + <<< 1.0 200 + <<< Content-Type: text/xml + <<< Content-Length: 123 + <<< + <<< + <<< + <<< + <<< i686 + (etc.) + +The multi-line response consists of headers and blank line and a body, +and is a compatible subset of HTTP (RFC 2616). + +To tell the difference between a short, single-line response +and a multi-line response: + +For the short response, the 3 digit HTTP status code will be followed +by a space character (even if the short response itself is empty). +For example C<"1.0 200 hello\r\n"> or C<"1.0 200 \r\n">. + +For the multi-line response, the 3 digit HTTP status code will be +followed by the CR LF immediately. For example C<"1.0 200\r\n">. + +When a command returns an error, the request / response looks like +this: + + >>> NOSUCHCOMMAND + <<< 1.0 404 Command not found + +As in HTTP, C<4xx> and C<5xx> status codes indicate classes of +error. Following the error code is an explanatory string. + +Errors never have a multi-line response. + +=head2 FREQUENCY OF REQUESTS + +The guest will usually be limited in the frequency of requests it is +permitted to make. This limit is set by the host system administrator +(see L). If the guest exceeds this frequency too often, +then the result will be that the host stops answering requests. See +I below. + +=head1 COMMANDS + +Requests consist of a command followed by zero or more arguments. +Arguments are separated from the command and from each other by a +single space. After the command and arguments, send CRLF. + +Commands are written in this manpage all in uppercase. However they +are not case sensitive, and you can send them in lowercase or mixed +case. + +The request is always a single line, always consists only of 7 bit +printable ASCII (apart from the final CRLF), and must be less or equal +to 4096 characters in length (that includes the final CRLF). + +Arguments that are strings I be quoted (using double-quotes). +Special characters inside the strings are escaped using backslashes. +The rules are precisely the same as for C literal strings, so +for example C<"\t"> is a string containing a single tab character. + +Arguments that are integers appear as integer literals. + +Other argument types that are allowed are: booleans (I or +I). + +Note that integers, booleans must never be quoted. + +=head2 PING + + PING echodata + +=head3 Arguments + +echodata [string]: A string that is echoed back in the response. This +must be 1-16 characters in length, consisting I of 7 bit ASCII +alpha-numeric characters ([0-9a-zA-Z]{1,16}). + +=head3 Returns + +Returns C back to the caller. + +=head3 Description + +This command is used to test the hostinfo connection. + +The possible responses to this are: + +=over 4 + +=item * + +The command succeeds and echos back the same C string. +This indicates that everything is working. + +=item * + +The command succeeds but echos back different C. Indicates +a synchronization error or some corruption on the serial port +channel (see I below). + +=item * + +The command returns an error. The error will indicate the problem. +Note as with all the other requests, you are limited in the rate you +can ping the host, by a setting that the host system administrator +controls. + +=item * + +The command returns nothing / hangs / returns a corrupted message. +See I, I below, and +I in the L manual page. + +=back + + + + + + + + + + + +=head1 COMMON STATUS CODES + +=head2 2xx + +All 2xx codes indicate the command completed successfully. + +=over 4 + +=item 200 + +This is the usual status code that is returned to indicate +successful completion of the command. + +=back + +=head2 4xx + +All 4xx codes indicate a client error - malformed or unknown +command etc. + +=over 4 + +=item 400 Bad request + +This indicates a malformed request. Causes include: No command, +incorrect number or type of arguments, not having a single space +between the command and each argument, not correctly quoting strings, +invalid integers. + +=item 401 Command disabled + +The host system administrator has configured hostinfo to prevent this +guest from using this command or accessing the requested piece of +information. Contact the host system administrator and ask them to +adjust the configuration to allow this command, or see L. + +=item 404 Command not found + +No such command. New commands can be added in later revisions of this +protocol. If you try to use these commands with older hostinfo +services, you will receive this error. + +=item 406 Too frequent + +This indicates that the client is trying to access the requested +resource too often. The client should access the resource no more +frequently than is configured by the host system administrator. +(After too many of these errors, the hostinfo service will be +completely disabled: see I below). + +=back + +=head2 5xx + +All 5xx codes indicate a server error. The command was well-formed +but the host was unable to fulfil this request. + +=over 4 + +=item 500 Internal server error + +This indicates a problem on the host side - for example, it might be +that the hostinfo daemon cannot contact libvirt. For security +reasons, the cause of these failures is never revealed to the guest. +However it is logged on the host side, so the host system +administrator can determine the precise cause of the error. (See also +I in L manpage). + +=back + +=head1 OTHER ISSUES + +=head2 TESTING + +Use L to test hostinfo from the guest. This script +should work on any guest that can run Perl. + +=head2 LOSS OF SERVICE + +The daemon on the host side that services hostinfo requests is written +defensively. In particular, it will refuse service (eventually just +ignoring the guest completely) if the guest behaves badly, which +includes: trying to flood the host with data, sending requests more +frequently than the host system administrator has configured. + +In the case where the guest loses service (gets no response from +any commands), the only solution is to contact the host system +administrator. + +The host system administrator can restart the daemon and/or the guest, +which should restore service. The host system administrator can also +troubleshoot problems by following the I section in +L. + +=head2 SYNCHRONIZATION + +Serial ports don't have any inherent way to synchronize the data +stream. + +If the client believes it has lost synchronization, it can +regain it through the following steps: + +=over 4 + +=item 1. + +Send CR LF twice. + +=item 2. + +Wait 5 seconds, discarding anything that is read on the +serial port during this time. + +=item 3. + +Send a PING command and check that the correct response is +received. + +=back + +=head2 MULTIPLE CLIENTS + +The serial port only supports reading a single command at a time. If +multiple clients try to connect to the serial port and send commands +at the same time, then the results will be unpredictable. + +If you need to have multiple clients accessing hostinfo inside a +guest, then you must run some sort of service or daemon inside the +guest which multiplexes these requests onto the single serial port. + +The protocol does not support "pipelining" requests (that is, issuing +more than one request at a time or overlapping requests and replies). +If multiple commands are sent at once, then the daemon may discard all +but the final command. + +=head1 FILES + +=over 4 + +=item /dev/ttyS1 + +=back + +=head1 SEE ALSO + +L, +L, +L, +L, +L. + +=head1 AUTHORS + +Richard W.M. Jones (C) + +=head1 COPYRIGHT + +Copyright (C) 2009 Red Hat Inc. +L + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. diff --git a/hostinfod/hostinfo.pod b/hostinfod/hostinfo.pod index be9c3f6..3fe8bc0 100644 --- a/hostinfod/hostinfo.pod +++ b/hostinfod/hostinfo.pod @@ -266,7 +266,9 @@ available to it, every N seconds, which means in total there could up to M requests every N seconds per guest. If a guest exceeds the rate at which it is allowed to make requests, -then the daemon sleeps before replying (just for that guest). +then the daemon will after some number of violations stop talking to +the guest. The only way to restore service to the guest will be to +reboot the guest or restart the daemon. Setting this to C means there is no limit. Guests can flood the host with requests. @@ -398,9 +400,10 @@ Use the L program in the guest to test this. L, L, -L, +L, L, L, +L, L, L. diff --git a/hostinfod/hostinfod.h b/hostinfod/hostinfod.h index f1da3e4..103783e 100644 --- a/hostinfod/hostinfod.h +++ b/hostinfod/hostinfod.h @@ -19,8 +19,44 @@ #ifndef HOSTINFOD_H #define HOSTINFOD_H +#include + #include #include +#include +#include +#include + +enum guest_state { + guest_state_connecting, /* Connecting to socket. */ + guest_state_request, /* Waiting or reading the request. */ + guest_state_reply, /* Sending the reply. */ + guest_state_dead /* Connection is dead. */ +}; + +struct guest_description { + int counter; + apr_pool_t *pool; /* Pool for lifetime of guest connection. */ + const char *name; /* "driver-name" */ + const char *sock_path; /* Full path to socket. */ + int sock; /* Real socket. */ + apr_socket_t *aprsock; /* APR socket. */ + apr_pollfd_t pollfd; /* APR poll descriptor. */ + enum guest_state state; /* State of the connection. */ + + /* Increments every time guest does something bad, decremented once per min */ + unsigned penalty; + time_t last_penalty_decr; + + unsigned request_max; /* Max. length of request buffer. */ + unsigned request_posn; /* Position in request buffer. */ + char *request; /* Request buffer. */ + + unsigned reply_alloc; /* Allocated for reply buffer. */ + unsigned reply_size; /* Size used in reply buffer. */ + unsigned reply_posn; /* Position in reply buffer. */ + char *reply; /* Reply buffer. */ +}; /* main.c */ extern const char *conf_file; @@ -57,4 +93,7 @@ extern void read_main_conf_file (void); extern int sockets_inotify_fd; extern void monitor_socket_dir (void); +/* commands.c */ +extern void execute_command (time_t now, struct guest_description *hval, const char *command); + #endif /* HOSTINFOD_H */ diff --git a/hostinfod/main.c b/hostinfod/main.c index e272210..17da054 100644 --- a/hostinfod/main.c +++ b/hostinfod/main.c @@ -22,7 +22,16 @@ #include #include +#include #include +#include +#include +#include +#include +#include +#include + +#define UNIX_PATH_MAX 108 #include #include @@ -30,15 +39,17 @@ #include #include #include +#include #include #include "hostinfod.h" static void main_loop (void); -static void set_reread_socket_dir (const apr_pollfd_t *); +static void set_reread_socket_dir (const apr_pollfd_t *, void *); static void do_reread_socket_dir (void); - -typedef void (*poll_callback) (const apr_pollfd_t *); +static struct guest_description *guest_added (const char *sock_path, const char *name); +static void guest_removed (struct guest_description *); +static void guest_event (const apr_pollfd_t *, void *); const char *conf_file = DEFAULT_CONF_FILE; char *socket_dir = NULL; @@ -55,7 +66,15 @@ static int reread_socket_dir = 1; static int quit = 0; apr_pool_t *pool = NULL; -static apr_pollset_t *set; +static apr_pollset_t *set = NULL; + +static apr_hash_t *guests = NULL; /* Hash "driver-name" -> guest_description */ + +typedef void (*poll_callback) (const apr_pollfd_t *, void *data); +struct callback_data { + poll_callback callback; + void *data; +}; static void usage (void) @@ -72,8 +91,7 @@ usage (void) " Configuration file (default: %s)\n" " -f | --foreground\n" " Run in the foreground (don't fork)\n" - " -v Enable verbose messages (sent to syslog, and to\n" - " stderr if -d option is given)\n", + " -v Enable verbose messages (sent to syslog)\n", DEFAULT_CONF_FILE); } @@ -141,6 +159,9 @@ main (int argc, char *argv[]) /* Monitor the socket directory. */ monitor_socket_dir (); + /* Create the guests hash. */ + guests = apr_hash_make (pool); + /* Create the initial pollset, just containing inotify socket. */ r = apr_pollset_create (&set, 1024 /* ? */, pool, 0); if (r != APR_SUCCESS) { @@ -159,7 +180,13 @@ main (int argc, char *argv[]) tpollfd->reqevents = APR_POLLIN; tpollfd->rtnevents = 0; tpollfd->desc.s = tsock; - tpollfd->client_data = set_reread_socket_dir; + + struct callback_data *callback_data = + apr_palloc (pool, sizeof *callback_data); + callback_data->callback = set_reread_socket_dir; + callback_data->data = NULL; + tpollfd->client_data = callback_data; + r = apr_pollset_add (set, tpollfd); if (r != APR_SUCCESS) { paprerror (r, "apr_pollset_add"); @@ -177,8 +204,9 @@ main (int argc, char *argv[]) messages_to_stderr = 0; } - message (PACKAGE_STRING); + message ("%s started", PACKAGE_STRING); main_loop (); + message ("%s exiting", PACKAGE_STRING); apr_terminate (); return 0; @@ -200,7 +228,7 @@ main_loop (void) } /* Poll. */ - r = apr_pollset_poll (set, 0, &numdescs, &descs); + r = apr_pollset_poll (set, -1, &numdescs, &descs); if (r != APR_SUCCESS) { paprerror (r, "apr_pollset_poll"); exit (1); @@ -208,16 +236,16 @@ main_loop (void) /* Perform the callbacks. */ for (i = 0; i < numdescs; ++i) { - poll_callback callback; + struct callback_data *callback_data; - callback = descs[i].client_data; - callback (&descs[i]); + callback_data = descs[i].client_data; + callback_data->callback (&descs[i], callback_data->data); } } } static void -set_reread_socket_dir (const apr_pollfd_t *_) +set_reread_socket_dir (const apr_pollfd_t *ignored1, void *ignored2) { reread_socket_dir = 1; } @@ -225,24 +253,425 @@ set_reread_socket_dir (const apr_pollfd_t *_) static void do_reread_socket_dir (void) { - char buf[256]; - - debug ("reading socket directory"); + static int count = 0; + int added = 0, removed = 0; + char buf[PATH_MAX]; + int r; + DIR *dir; + struct dirent *d; + struct stat statbuf; + struct guest_description *hval; + apr_hash_index_t *hi; + + count++; + debug ("reading socket directory (counter = %d)", count); /* Discard anything which appears on the inotify socket. We will * reread the whole directory each time. */ - while (read (sockets_inotify_fd, buf, sizeof buf) > 0) - ; + do { + r = read (sockets_inotify_fd, buf, sizeof buf); + if (r == -1) { + if (errno != EAGAIN && errno != EWOULDBLOCK) { + perrorf ("inotify socket: read"); + exit (1); + } + } + } while (r > 0); + + dir = opendir (socket_dir); + if (dir == NULL) { + perrorf ("%s: failed to open socket directory", socket_dir); + exit (1); + } + + while (errno = 0, (d = readdir (dir)) != NULL) { + /* We expect the name to be "-" (where + * is the libvirt driver name, and is the name of the + * domain). Skip any dot-entries and anything that doesn't have + * this form. + */ + if (d->d_name[0] == '.') + continue; + if (strlen (d->d_name) < 3 || strchr (&d->d_name[1], '-') == NULL) + continue; + + /* It must be a Unix domain socket - skip anything else. */ + snprintf (buf, sizeof buf, "%s/%s", socket_dir, d->d_name); + if (stat (buf, &statbuf) == -1) { + perrorf ("stat: %s", buf); + continue; + } + if (!S_ISSOCK (statbuf.st_mode)) + continue; + + /* See if we have an entry matching this already. */ + hval = (struct guest_description *) + apr_hash_get (guests, d->d_name, APR_HASH_KEY_STRING); + if (!hval) { + hval = guest_added (buf, d->d_name); + if (!hval) + continue; + + /* NB. It's not well documented, but the hash table + * implementation DOES NOT copy the key internally. Therefore + * we have to use hval->name (ie. our copy) as the key, NOT + * d->d_name, even though they are the same string. + */ + apr_hash_set (guests, hval->name, APR_HASH_KEY_STRING, hval); + added++; + } + + hval->counter = count; + } + if (errno != 0) { + perrorf ("%s: error reading socket directory", socket_dir); + exit (1); + } + + if (closedir (dir) == -1) { + perrorf ("%s: error closing socket directory", socket_dir); + exit (1); + } + + /* Iterate over the hash and look for any guests which have + * gone away. The guest_description.counter field won't have + * been updated. + */ + for (hi = apr_hash_first (pool, guests); hi; hi = apr_hash_next (hi)) { + apr_hash_this(hi, NULL, NULL, (void **) &hval); + if (hval->counter != count) { + /* This hash table implementation allows you to delete the + * current entry safely. + */ + apr_hash_set (guests, hval->name, APR_HASH_KEY_STRING, NULL); + + /* guest_removed frees hval but does not unregister it from the + * hash. + */ + guest_removed (hval); + removed++; + } + } + + debug ("finished reading socket directory, added %d, removed %d, guests %d", + added, removed, apr_hash_count (guests)); +} + +/* This is called whenever we detect that a guest socket has been + * created in the socket directory. + */ +static struct guest_description * +guest_added (const char *sock_path, const char *name) +{ + struct guest_description *hval = NULL; + int sock; + int r; + unsigned retries = 0, tns; + enum guest_state state; + apr_pool_t *guest_pool; + struct sockaddr_un addr; + struct timespec ts; + + sock = socket (AF_UNIX, SOCK_STREAM, 0); + if (sock == -1) { + perrorf ("socket"); + return NULL; + } + + if (fcntl (sock, F_SETFL, O_NONBLOCK) == -1) { + perrorf ("fcntl: O_NONBLOCK"); + close (sock); + return NULL; + } + if (fcntl (sock, F_SETFD, FD_CLOEXEC) == -1) { + perrorf ("fcntl: FD_CLOEXEC"); + close (sock); + return NULL; + } + + addr.sun_family = AF_UNIX; + strncpy (addr.sun_path, sock_path, UNIX_PATH_MAX); + addr.sun_path[UNIX_PATH_MAX-1] = '\0'; + + again: + r = connect (sock, (struct sockaddr *) &addr, sizeof addr); + if (r == -1) { + /* Nasty race condition: The moment the listener binds the socket, + * we see it in the directory and can try to connect to it. + * However the listener might not have called listen(2) yet, which + * means if we are faster than the other end, we will get + * ECONNREFUSED. If this happens, sleep a bit and try again a few + * times. + */ + if (errno == ECONNREFUSED) { + if (retries <= 10) { + tns = 1 << retries; + ts.tv_sec = tns / 1000000000; + ts.tv_nsec = tns % 1000000000; + nanosleep (&ts, NULL); + retries++; + goto again; + } + } + + if (errno != EINPROGRESS) { + /* Dead socket - cull these dead sockets from the directory. */ + perrorf ("connect: %s", sock_path); + close (sock); + unlink (sock_path); + return NULL; + } + state = guest_state_connecting; + } + else + state = guest_state_request; + + /* Create a pool which can be used for allocations + * during the lifetime of this guest connection. + */ + apr_pool_create (&guest_pool, pool); + + hval = apr_pcalloc (guest_pool, sizeof *hval); + hval->pool = guest_pool; + + /* Create the remaining hash fields. */ + hval->state = state; + hval->name = apr_pstrdup (hval->pool, name); + hval->sock_path = apr_pstrdup (hval->pool, sock_path); + hval->sock = sock; + hval->request_max = 4096; + hval->request = apr_palloc (hval->pool, hval->request_max); + + /* Convert Unix fd into APR socket type. */ + r = apr_os_sock_put (&hval->aprsock, &sock, hval->pool); + if (r != APR_SUCCESS) { + paprerror (r, "apr_os_sock_put: %s", sock_path); + exit (1); + } + + /* Register the socket in the pollset. */ + hval->pollfd.p = hval->pool; + hval->pollfd.desc_type = APR_POLL_SOCKET; + if (hval->state == guest_state_connecting) + hval->pollfd.reqevents = APR_POLLOUT; + else + hval->pollfd.reqevents = APR_POLLIN; + hval->pollfd.rtnevents = 0; + hval->pollfd.desc.s = hval->aprsock; + + struct callback_data *callback_data = + apr_palloc (hval->pool, sizeof *callback_data); + callback_data->callback = guest_event; + callback_data->data = hval; + hval->pollfd.client_data = callback_data; + + r = apr_pollset_add (set, &hval->pollfd); + if (r != APR_SUCCESS) { + paprerror (r, "apr_pollset_add: %s", sock_path); + exit (1); + } + + message ("new guest added: %s", hval->name); + return hval; +} + +/* This is called whenever we detect that a guest socket has been + * removed from the socket directory. The guest_description parameter + * is freed after this call and must not be used again. + */ +static void +guest_removed (struct guest_description *hval) +{ + apr_status_t r; + + message ("guest removed: %s", hval->name); + + /* Unregister the socket from the pollset. */ + r = apr_pollset_remove (set, &hval->pollfd); + if (r != APR_SUCCESS) + paprerror (r, "apr_pollset_remove for %s", hval->name); + + if (close (hval->sock) == -1) + pwarningf ("close: %s", hval->sock_path); + + /* This also frees hval and all related data. */ + apr_pool_destroy (hval->pool); +} + +/* Forcibly remove a guest, removing the socket from the + * socket directory and cleaning up any resources used in + * the daemon. The guest_description parameter is freed + * after this call and must not be used again. + */ +static void +guest_force_close (struct guest_description *hval) +{ + debug ("forcibly closing guest: %s", hval->name); + + apr_hash_set (guests, hval->name, APR_HASH_KEY_STRING, NULL); + unlink (hval->sock_path); + guest_removed (hval); +} + +/* This is called when there is some event from the guest, eg. + * connection finished, read, write or closed. + */ +static void +guest_event (const apr_pollfd_t *pollfd, void *hvalv) +{ + struct guest_description *hval = hvalv; + int err, max, r, extra; + socklen_t len; + char *p; + time_t now; + + time (&now); + + /* If the guest keeps doing bad stuff, eventually lose patience with it. */ + if (hval->penalty >= 100) { + error ("%s: guest did too much bad stuff, so we stopped talking to it", + hval->name); + guest_force_close (hval); + return; + } + /* Decrement the penalty once a minute, so the guest can recover. */ + if (hval->penalty > 0 && now - hval->last_penalty_decr >= 60) { + hval->penalty--; + hval->last_penalty_decr = now; + } + + switch (hval->state) { + case guest_state_connecting: + /* Once we get a write event, we know the socket has + * connected, or there is an error. + */ + err = 0; + len = sizeof err; + getsockopt (hval->sock, SOL_SOCKET, SO_ERROR, &err, &len); + if (err == 0) + hval->state = guest_state_request; + else { + errno = err; + perrorf ("connect: %s", hval->sock_path); + guest_force_close (hval); + return; + } + break; + + case guest_state_request: + /* Reading the guest's request, a single line terminated by \r?\n */ + max = hval->request_max - hval->request_posn; + if (max <= 0) { /* Request too long w/o termination. */ + hval->penalty++; + hval->request_posn = 0; + break; + } + r = read (hval->sock, &hval->request[hval->request_posn], max); + if (r == 0) { /* Socket closed. */ + guest_force_close (hval); + return; + } + if (r == -1) { + if (errno != EAGAIN && errno != EWOULDBLOCK) { + perrorf ("read: %s", hval->sock_path); + guest_force_close (hval); + return; + } + break; + } + + hval->request_posn += r; + + /* Have we got a terminating \n character in the buffer yet? Note + * the buffer is not NUL-terminated which is why we use memchr. + */ + again: + p = memchr (hval->request, '\n', hval->request_posn); + if (p == NULL) + break; + /* Is there more after the \n char? Normal guests shouldn't do + * this, but it can be an attempt to reestablish synchronization. + * It's documented that we throw away all but the last command sent, + * so let's do that. + */ + extra = &hval->request[hval->request_posn]-(p+1); + if (extra > 0) { + hval->penalty++; + memmove (hval->request, p+1, extra); + hval->request_posn = extra; + goto again; + } + /* Looks like we've got ourselves a command. Remove trailing + * \r?\n char(s) and NUL-terminate the command string. + */ + assert (*p == '\n'); + assert (hval->request_posn >= 1); + assert (p == &hval->request[hval->request_posn-1]); + hval->request_posn--; + p--; + + if (hval->request_posn > 0 && *p == '\r') { + hval->request_posn--; + p--; + } + *(p+1) = '\0'; + execute_command (now, hval, hval->request); + hval->request_posn = 0; + break; + + case guest_state_reply: + /* Keep writing out the reply buffer until we've sent + * the whole thing. + */ + max = hval->reply_size - hval->reply_posn; + if (max <= 0) { + hval->state = guest_state_request; + break; + } + r = write (hval->sock, &hval->reply[hval->reply_posn], max); + if (r == -1) { + if (errno != EAGAIN && errno != EWOULDBLOCK) { + perrorf ("write: %s", hval->sock_path); + guest_force_close (hval); + return; + } + break; + } + hval->reply_posn += r; + if (hval->reply_posn >= hval->reply_size) + hval->state = guest_state_request; + break; + case guest_state_dead: + /* We shouldn't get an event here. */ + hval->penalty++; + } + /* Depending on the (new) state we want to set the + * events that we would like poll to give us next time. + */ + switch (hval->state) { + case guest_state_connecting: + hval->pollfd.reqevents = APR_POLLOUT; + break; + case guest_state_request: + hval->pollfd.reqevents = APR_POLLIN; + break; + case guest_state_reply: + hval->pollfd.reqevents = APR_POLLOUT; + break; + case guest_state_dead: + hval->pollfd.reqevents = 0; + break; + } } -- 1.8.3.1