miniexpect.pod

   1 =encoding utf8
   2
   3 =head1 NAME
   4
   5 miniexpect - A very simple expect library for C.
   6
   7 =head1 SYNOPSIS
   8
   9  #include <errno.h>
  10  #include <sys/wait.h>
  11  #include <pcre.h>
  12  #include <miniexpect.h>
  13
  14  mexp_h *h;
  15  h = mexp_spawnl ("ssh", "ssh", "host", NULL);
  16  switch (mexp_expect (h, regexps, ovector, ovecsize)) {
  17    ...
  18  }
  19  mexp_close (h);
  20
  21  cc prog.c -o prog -lminiexpect -lpcre
  22
  23 =head1 DESCRIPTION
  24
  25 Miniexpect is a very simple expect-like library for C.  Expect is a
  26 way to control an external program that wants to be run interactively.
  27
  28 Miniexpect has a saner interface than libexpect, and doesn't depend on
  29 Tcl.  It is also thread safe, const-correct and uses modern C
  30 standards.
  31
  32 Miniexpect is a standalone library, except for a single dependency: it
  33 requires the PCRE (Perl Compatible Regular Expressions) library from
  34 L<http://www.pcre.org/>.  The PCRE dependency is fundamental because
  35 we want to offer the most powerful regular expression syntax to match
  36 on, but more importantly because PCRE has a convenient way to detect
  37 partial matches which made this library very simple to implement.
  38
  39 This manual page documents the API.  Examples of how to use the API
  40 can be found in the source directory.
  41
  42 =head1 CONCEPTS
  43
  44 Miniexpect lets you start up an external program, control it (by
  45 sending commands to it), and close it down gracefully.  Two things
  46 make this different from other APIs like L<popen(3)> and L<system(3)>:
  47 Firstly miniexpect creates a pseudoterminal (pty).  Secondly
  48 miniexpect lets you match the output of the program using regular
  49 expressions.  Both of these are handy for controlling interactive
  50 programs that might (for example) ask for passwords, but you can use
  51 miniexpect on just about any external program.
  52
  53 You can control multiple programs at the same time.
  54
  55 =head1 SPAWNING THE SUBPROCESS
  56
  57 There are four calls for creating a subprocess:
  58
  59 B<mexp_h *mexp_spawnl (const char *file, const char *arg, ...);>
  60
  61 This creates a subprocess running the external program C<file> (the
  62 current C<$PATH> is searched unless you give an absolute path).
  63 C<arg, ...> are the arguments to the program.  You should terminate
  64 the list of arguments with C<NULL>.  Usually the first argument should
  65 be the name of the program.
  66
  67 The return value is a handle (see next section).
  68
  69 If there was an error running the subprocess, C<NULL> is returned and
  70 the error is available in C<errno>.
  71
  72 For example, to run an ssh subprocess you could do:
  73
  74  h = mexp_spawnl ("ssh", "ssh", "-l", "root", "host", NULL);
  75
  76 or to run a particular ssh binary:
  77
  78  h = mexp_spawnl ("/usr/local/bin/ssh", "ssh", "-l", "root", "host", NULL);
  79
  80 An alternative to C<mexp_spawnl> is:
  81
  82 B<mexp_h *mexp_spawnv (const char *file, char **argv);>
  83
  84 This is the same as C<mexp_spawnl> except that you pass the arguments
  85 in a NULL-terminated array.
  86
  87 There are also two versions of the above calls which take flags:
  88
  89 B<mexp_h *mexp_spawnlf (unsigned flags, const char *file, const char *arg, ...);>
  90
  91 B<mexp_h *mexp_spawnvf (unsigned flags, const char *file, char **argv);>
  92
  93 The flags may contain the following values, logically ORed together:
  94
  95 =over 4
  96
  97 =item B<MEXP_SPAWN_KEEP_SIGNALS>
  98
  99 Do not reset signal handlers to C<SIG_DFL> in the subprocess.
 100
 101 =item B<MEXP_SPAWN_KEEP_FDS>
 102
 103 Do not close file descriptors E<ge> 3 in the subprocess.
 104
 105 =item B<MEXP_SPAWN_COOKED_MODE> or B<MEXP_SPAWN_RAW_MODE>
 106
 107 Configure the pty in cooked mode or raw mode.  Raw mode is the
 108 default.
 109
 110 =back
 111
 112 =head1 HANDLES
 113
 114 After spawning a subprocess, you get back a handle which is a pointer
 115 to a struct:
 116
 117  struct mexp_h;
 118  typedef struct mexp_h mexp_h;
 119
 120 Various methods can be used on the handle:
 121
 122 B<int mexp_get_fd (mexp_h *h);>
 123
 124 Return the file descriptor of the pty of the subprocess.  You can read
 125 and write to this if you want, although convenience functions are also
 126 provided (see below).
 127
 128 B<pid_t mexp_get_pid (mexp_h *h);>
 129
 130 Return the process ID of the subprocess.  You can send it signals if
 131 you want.
 132
 133 B<int mexp_get_timeout_ms (mexp_h *h);>
 134
 135 B<void mexp_set_timeout_ms (mexp_h *h, int millisecs);>
 136
 137 B<void mexp_set_timeout (mexp_h *h, int secs);>
 138
 139 Get or set the timeout used by C<mexp_expect> [see below].  The
 140 resolution is milliseconds (1/1000th of a second).  Set this before
 141 calling C<mexp_expect>.  Passing -1 to either of the C<set_> methods
 142 means no timeout.  The default setting is 60000 milliseconds (60
 143 seconds).
 144
 145 B<size_t mexp_get_read_size (mexp *h);>
 146
 147 B<void mexp_set_read_size (mexp *h, size_t read_size);>
 148
 149 Get or set the natural size (in bytes) for reads from the subprocess.
 150 The default is 1024.  Most callers will not need to change this.
 151
 152 B<int mexp_get_pcre_error (mexp *h);>
 153
 154 When C<mexp_expect> [see below] calls the PCRE function
 155 L<pcre_exec(3)>, it stashes the return value in the C<pcre_error>
 156 field in the handle, and that field is returned by this method.
 157
 158 There are two uses for this:
 159
 160 =over 4
 161
 162 =item 1.
 163
 164 If C<mexp_expect> returns C<MEXP_PCRE_ERROR>, then the actual PCRE
 165 error code returned by L<pcre_exec(3)> is available by calling this
 166 method.  For a list of PCRE error codes, see L<pcreapi(3)>.
 167
 168 =item 2.
 169
 170 A more unusual use is if you ever need to get the captured substrings
 171 from your regular expression (calling L<pcre_get_substring(3)>).  The
 172 third parameter of that function (C<stringcount>) is the value
 173 returned by L<pcre_exec(3)>, and so you can call it like this:
 174
 175  pcre_get_substring (h->buffer, ovector,
 176                      mexp_get_pcre_error (h), 1, &matched);
 177
 178 =back
 179
 180 The following fields in the handle do not have methods, but can be
 181 accessed directly instead:
 182
 183  char *buffer;
 184  size_t len;
 185  size_t alloc;
 186
 187 If C<mexp_expect> returns a match then these variables contain the
 188 read buffer.  Note this buffer does not contain the full input from
 189 the process, but it will contain at least the part matched by the
 190 regular expression (and maybe some more).  C<buffer> is the read
 191 buffer and C<len> is the number of bytes of data in the buffer.
 192
 193  ssize_t next_match;
 194
 195 If C<mexp_expect> returns a match, then C<next_match> points to the
 196 first byte in the buffer I<after> the fully matched expression.  (It
 197 may be C<-1> which means it is invalid).  The next time that
 198 C<mexp_expect> is called, it will start by consuming the data
 199 C<buffer[next_match...len-1]>.  Callers may also need to read from
 200 that point in the buffer before calling L<read(2)> on the file
 201 descriptor.  Callers may also set this, for example setting it to
 202 C<-1> in order to ignore the remainder of the buffer.  In most cases
 203 callers can ignore this field, and C<mexp_expect> will just do the
 204 right thing when called repeatedly.
 205
 206  void *user1;
 207  void *user2;
 208  void *user3;
 209
 210 Opaque pointers for use by the caller.  The library will not touch
 211 these.
 212
 213 =head1 CLOSING THE HANDLE
 214
 215 To close the handle and clean up the subprocess, call:
 216
 217 B<int mexp_close (mexp_h *h);>
 218
 219 This returns the status code from the subprocess.  This is in the form
 220 of a L<waitpid(2)>/L<system(3)> status so you have to use the macros
 221 C<WIFEXITED>, C<WEXITSTATUS>, C<WIFSIGNALED>, C<WTERMSIG> etc defined
 222 in C<E<lt>sys/wait.hE<gt>> to parse it.
 223
 224 If there was a system call error, then C<-1> is returned.  The error
 225 will be in C<errno>.
 226
 227 Notes:
 228
 229 =over 4
 230
 231 =item *
 232
 233 Even in error cases, the handle is always closed and its memory is
 234 freed by this call.
 235
 236 =item *
 237
 238 It is normal for the kernel to send SIGHUP to the subprocess.
 239
 240 If the subprocess doesn't catch the SIGHUP, then it will die
 241 with status:
 242
 243  WIFSIGNALED (status) && WTERMSIG (status) == SIGHUP
 244
 245 This case should not necessarily be considered an error.
 246
 247 =back
 248
 249 This is how code should check for and print errors from C<mexp_close>:
 250
 251   status = mexp_close (h);
 252   if (status == -1) {
 253     perror ("mexp_close");
 254     return -1;
 255   }
 256   if (WIFSIGNALED (status) && WTERMSIG (status) == SIGHUP)
 257     goto ignore; /* not an error */
 258   if (!WIFEXITED (status) || WEXITSTATUS (status) != 0)
 259     /* You could use the W* macros to print a better error message. */
 260     fprintf (stderr, "error: subprocess failed, status = %d", status);
 261     return -1;
 262   }
 263  ignore:
 264   /* no error case */
 265
 266 =head1 EXPECT FUNCTION
 267
 268 Miniexpect contains a powerful regular expression matching function
 269 based on L<pcre(3)>:
 270
 271 B<int mexp_expect (mexp_h *h, const mexp_regexp *regexps,
 272 int *ovector, int ovecsize);>
 273
 274 The output of the subprocess is matched against the list of PCRE
 275 regular expressions in C<regexps>.  C<regexps> is a list of regular
 276 expression structures:
 277
 278  struct mexp_regexp {
 279    int r;
 280    const pcre *re;
 281    const pcre_extra *extra;
 282    int options;
 283  };
 284  typedef struct mexp_regexp mexp_regexp;
 285
 286 C<r> is the integer code returned from C<mexp_expect> if this regular
 287 expression matches.  It B<must> be E<gt> 0.  C<r == 0> indicates the
 288 end of the list of regular expressions.  C<re> is the compiled regular
 289 expression.
 290
 291 Possible return values are:
 292
 293 =over 4
 294
 295 =item C<MEXP_TIMEOUT>
 296
 297 No input matched before the timeout (C<h-E<gt>timeout>) was
 298 reached.
 299
 300 =item C<MEXP_EOF>
 301
 302 The subprocess closed the connection.
 303
 304 =item C<MEXP_ERROR>
 305
 306 There was a system call error (eg. from the read call).  The error is
 307 returned in C<errno>.
 308
 309 =item C<MEXP_PCRE_ERROR>
 310
 311 There was a C<pcre_exec> error.  C<h-E<gt>pcre_error> is set to the
 312 error code.  See L<pcreapi(3)> for a list of the C<PCRE_*> error codes
 313 and what they mean.
 314
 315 =item C<r> E<gt> 0
 316
 317 If any regexp matches, the associated integer code (C<regexps[].r>)
 318 is returned.
 319
 320 =back
 321
 322 Notes:
 323
 324 =over 4
 325
 326 =item *
 327
 328 C<regexps> may be NULL or an empty list, which means we don't match
 329 against a regular expression.  This is useful if you just want to wait
 330 for EOF or timeout.
 331
 332 =item *
 333
 334 C<regexps[].re>, C<regexps[].extra>, C<regexps[].options>, C<ovector>
 335 and C<ovecsize> are passed through to the L<pcre_exec(3)> function.
 336
 337 =item *
 338
 339 If multiple regular expressions are passed, then they are checked in
 340 turn and the I<first> regular expression that matches is returned
 341 I<even if the match happens later in the input than another regular
 342 expression>.
 343
 344 For example if the input is C<"hello world"> and you pass the two
 345 regular expressions:
 346
 347  regexps[0].re = world
 348  regexps[1].re = hello
 349
 350 then the first regular expression (C<"world">) may match and the
 351 C<"hello"> part of the input may be ignored.
 352
 353 In some cases this can even lead to unpredictable matching.  In the
 354 case above, if we only happened to read C<"hello wor">, then the
 355 second regular expression (C<"hello">) I<would> match.
 356
 357 If this is a concern, combine your regular expressions into a single
 358 one, eg. C<(hello)|(world)>.
 359
 360 =back
 361
 362 =head2 mexp_expect example
 363
 364 It is easier to understand C<mexp_expect> by considering a simple
 365 example.
 366
 367 In this example we are waiting for ssh to either send us a password
 368 prompt, or (if no password was required) a command prompt, and based
 369 on the output we will either send back a password or a command.
 370
 371 The unusual C<(mexp_regexp[]){...}> syntax is called a "compound
 372 literal" and is available in C99.  If you need to use an older
 373 compiler, you can just use a local variable instead.
 374
 375  mexp_h *h;
 376  char *errptr;
 377  int offset;
 378  pcre *password_re, *prompt_re;
 379  const int ovecsize = 12;
 380  int ovector[ovecsize];
 381
 382  password_re = pcre_compile ("assword", 0, &errptr, &offset, NULL);
 383  prompt_re = pcre_compile ("[$#] ", 0, &errptr, &offset, NULL);
 384
 385  switch (mexp_expect (h,
 386                       (mexp_regexp[]) {
 387                         { 100, .re = password_re },
 388                         { 101, .re = prompt_re },
 389                         { 0 },
 390                       }, ovector, ovecsize)) {
 391   case 100:
 392     /* here you would send a password */
 393     break;
 394   case 101:
 395     /* here you would send a command */
 396     break;
 397   case MEXP_EOF:
 398     fprintf (stderr, "error: ssh closed the connection unexpectedly\n");
 399     exit (EXIT_FAILURE);
 400   case MEXP_TIMEOUT:
 401     fprintf (stderr, "error: timeout before reaching the prompt\n");
 402     exit (EXIT_FAILURE);
 403   case MEXP_ERROR:
 404     perror ("mexp_expect");
 405     exit (EXIT_FAILURE);
 406   case MEXP_PCRE_ERROR:
 407     fprintf (stderr, "error: PCRE error: %d\n", h->pcre_error);
 408     exit (EXIT_FAILURE);
 409  }
 410
 411 =head1 SENDING COMMANDS TO THE SUBPROCESS
 412
 413 You can write to the subprocess simply by writing to C<h-E<gt>fd>.
 414 However we also provide a convenience function:
 415
 416 B<int mexp_printf (mexp_h *h, const char *fs, ...);>
 417
 418 This returns the number of bytes, if the whole message was written OK.
 419 If there was an error, -1 is returned and the error is available in
 420 C<errno>.
 421
 422 Notes:
 423
 424 =over 4
 425
 426 =item *
 427
 428 C<mexp_printf> will not do a partial write.  If it cannot write all
 429 the data, then it will return an error.
 430
 431 =item *
 432
 433 This function does not write a newline automatically.  If you want to
 434 send a command followed by a newline you have to do something like:
 435
 436  mexp_printf (h, "exit\n");
 437
 438 =back
 439
 440 B<int mexp_send_interrupt (mexp_h *h);>
 441
 442 Send the interrupt character (C<^C>, Ctrl-C, C<\003>).  This is like
 443 pressing C<^C> - the subprocess (or remote process, if using C<ssh>)
 444 is gracefully killed.
 445
 446 Note this only works if the pty is in cooked mode
 447 (ie. C<MEXP_SPAWN_COOKED_MODE> was passed to C<mexp_spawnlf> or
 448 C<mexp_spawnvf>).  In raw mode, all characters are passed through
 449 without any special interpretation.
 450
 451 =head1 SOURCE
 452
 453 Source is available from:
 454 L<http://git.annexia.org/?p=miniexpect.git;a=summary>
 455
 456 =head1 SEE ALSO
 457
 458 L<pcre(3)>,
 459 L<pcre_exec(3)>,
 460 L<pcreapi(3)>,
 461 L<waitpid(2)>,
 462 L<system(3)>.
 463
 464 =head1 AUTHORS
 465
 466 Richard W.M. Jones (C<rjones at redhat dot com>)
 467
 468 =head1 LICENSE
 469
 470 The library is released under the Library GPL (LGPL) version 2 or at
 471 your option any later version.
 472
 473 =head1 COPYRIGHT
 474
 475 Copyright (C) 2014 Red Hat Inc.