miniexpect.pod

   1 =encoding utf8
   2
   3 =head1 NAME
   4
   5 miniexpect - A very simple expect library for C.
   6
   7 =head1 SYNOPSIS
   8
   9  #include <errno.h>
  10  #include <sys/wait.h>
  11  #include <pcre.h>
  12  #include <miniexpect.h>
  13
  14  mexp_h *h;
  15  h = mexp_spawnl ("ssh", "ssh", "host");
  16  switch (mexp_expect (h, regexps, ovector, ovecsize)) {
  17    ...
  18  }
  19  mexp_close (h);
  20
  21  cc prog.c -o prog -lminiexpect -lpcre
  22
  23 =head1 DESCRIPTION
  24
  25 Miniexpect is a very simple expect-like library for C.  Expect is a
  26 way to control an external program that wants to be run interactively.
  27
  28 Miniexpect has a saner interface than libexpect, and doesn't depend on
  29 Tcl.  It is also thread safe, const-correct and uses modern C
  30 standards.
  31
  32 Miniexpect is a standalone library, except for a single dependency: it
  33 requires the PCRE (Perl Compatible Regular Expressions) library from
  34 L<http://www.pcre.org/>.  The PCRE dependency is fundamental because
  35 we want to offer the most powerful regular expression syntax to match
  36 on, but more importantly because PCRE has a convenient way to detect
  37 partial matches which made this library very simple to implement.
  38
  39 This manual page documents the API.  Examples of how to use the API
  40 can be found in the source directory.
  41
  42 =head1 CONCEPTS
  43
  44 Miniexpect lets you start up an external program, control it (by
  45 sending commands to it), and close it down gracefully.  Two things
  46 make this different from other APIs like L<popen(3)> and L<system(3)>:
  47 Firstly miniexpect creates a pseudoterminal (pty).  Secondly
  48 miniexpect lets you match the output of the program using regular
  49 expressions.  Both of these are handy for controlling interactive
  50 programs that might (for example) ask for passwords, but you can use
  51 miniexpect on just about any external program.
  52
  53 You can control multiple programs at the same time.
  54
  55 =head1 SPAWNING THE SUBPROCESS
  56
  57 There are four calls for creating a subprocess:
  58
  59 B<mexp_h *mexp_spawnl (const char *file, const char *arg, ...);>
  60
  61 This creates a subprocess running the external program C<file> (the
  62 current C<$PATH> is searched unless you give an absolute path).
  63 C<arg, ...> are the arguments to the program.  Usually the first
  64 argument should be the name of the program.
  65
  66 The return value is a handle (see next section).
  67
  68 If there was an error running the subprocess, C<NULL> is returned and
  69 the error is available in C<errno>.
  70
  71 For example, to run an ssh subprocess you could do:
  72
  73  h = mexp_spawnl ("ssh", "ssh", "-l", "root", "host");
  74
  75 or to run a particular ssh binary:
  76
  77  h = mexp_spawnl ("/usr/local/bin/ssh", "ssh", "-l", "root", "host");
  78
  79 An alternative to C<mexp_spawnl> is:
  80
  81 B<mexp_h *mexp_spawnv (const char *file, char **argv);>
  82
  83 This is the same as C<mexp_spawnl> except that you pass the arguments
  84 in a NULL-terminated array.
  85
  86 There are also two versions of the above calls which take flags:
  87
  88 B<mexp_h *mexp_spawnlf (unsigned flags, const char *file, const char *arg, ...);>
  89
  90 B<mexp_h *mexp_spawnvf (unsigned flags, const char *file, char **argv);>
  91
  92 The flags may contain the following values, logically ORed together:
  93
  94 =over 4
  95
  96 =item C<MEXP_SPAWN_KEEP_SIGNALS>
  97
  98 Do not reset signal handlers to C<SIG_DFL> in the subprocess.
  99
 100 =item B<MEXP_SPAWN_KEEP_FDS>
 101
 102 Do not close file descriptors E<ge> 3 in the subprocess.
 103
 104 =item B<MEXP_SPAWN_COOKED_MODE> or B<MEXP_SPAWN_RAW_MODE>
 105
 106 Configure the pty in cooked mode or raw mode.  Raw mode is the
 107 default.
 108
 109 =back
 110
 111 =head1 HANDLES
 112
 113 After spawning a subprocess, you get back a handle which is a pointer
 114 to a struct:
 115
 116  struct mexp_h;
 117  typedef struct mexp_h mexp_h;
 118
 119 Various methods can be used on the handle:
 120
 121 B<int mexp_get_fd (mexp_h *h);>
 122
 123 Return the file descriptor of the pty of the subprocess.  You can read
 124 and write to this if you want, although convenience functions are also
 125 provided (see below).
 126
 127 B<pid_t mexp_get_pid (mexp_h *h);>
 128
 129 Return the process ID of the subprocess.  You can send it signals if
 130 you want.
 131
 132 B<int mexp_get_timeout_ms (mexp_h *h);>
 133
 134 B<void mexp_set_timeout_ms (mexp_h *h, int millisecs);>
 135
 136 B<void mexp_set_timeout (mexp_h *h, int secs);>
 137
 138 Get or set the timeout used by C<mexp_expect> [see below].  The
 139 resolution is milliseconds (1/1000th of a second).  Set this before
 140 calling C<mexp_expect>.  Passing -1 to either of the C<set_> methods
 141 means no timeout.  The default setting is 60000 milliseconds (60
 142 seconds).
 143
 144 B<size_t mexp_get_read_size (mexp *h);>
 145
 146 B<void mexp_set_read_size (mexp *h, size_t read_size);>
 147
 148 Get or set the natural size (in bytes) for reads from the subprocess.
 149 The default is 1024.  Most callers will not need to change this.
 150
 151 B<int mexp_get_pcre_error (mexp *h);>
 152
 153 When C<mexp_expect> [see below] calls the PCRE function
 154 L<pcre_exec(3)>, it stashes the return value in the C<pcre_error>
 155 field in the handle, and that field is returned by this method.
 156
 157 There are two uses for this:
 158
 159 =over 4
 160
 161 =item 1.
 162
 163 If C<mexp_expect> returns C<MEXP_PCRE_ERROR>, then the actual PCRE
 164 error code returned by L<pcre_exec(3)> is available by calling this
 165 method.  For a list of PCRE error codes, see L<pcreapi(3)>.
 166
 167 =item 2.
 168
 169 A more unusual use is if you ever need to get the captured substrings
 170 from your regular expression (calling L<pcre_get_substring(3)>).  The
 171 third parameter of that function (C<stringcount>) is the value
 172 returned by L<pcre_exec(3)>, and so you can call it like this:
 173
 174  pcre_get_substring (h->buffer, ovector,
 175                      mexp_get_pcre_error (h), 1, &matched);
 176
 177 =back
 178
 179 The following fields in the handle do not have methods, but can be
 180 accessed directly instead:
 181
 182  char *buffer;
 183  size_t len;
 184  size_t alloc;
 185
 186 If C<mexp_expect> returns a match then these variables contain the
 187 read buffer.  Note this buffer does not contain the full input from
 188 the process, but it will contain at least the part matched by the
 189 regular expression (and maybe some more).  C<buffer> is the read
 190 buffer and C<len> is the number of bytes of data in the buffer.
 191
 192  ssize_t next_match;
 193
 194 If C<mexp_expect> returns a match, then C<next_match> points to the
 195 first byte in the buffer I<after> the fully matched expression.  (It
 196 may be C<-1> which means it is invalid).  The next time that
 197 C<mexp_expect> is called, it will start by consuming the data
 198 C<buffer[next_match...len-1]>.  Callers may also need to read from
 199 that point in the buffer before calling L<read(2)> on the file
 200 descriptor.  Callers may also set this, for example setting it to
 201 C<-1> in order to ignore the remainder of the buffer.  In most cases
 202 callers can ignore this field, and C<mexp_expect> will just do the
 203 right thing when called repeatedly.
 204
 205  void *user1;
 206  void *user2;
 207  void *user3;
 208
 209 Opaque pointers for use by the caller.  The library will not touch
 210 these.
 211
 212 =head1 CLOSING THE HANDLE
 213
 214 To close the handle and clean up the subprocess, call:
 215
 216 B<int mexp_close (mexp_h *h);>
 217
 218 This returns the status code from the subprocess.  This is in the form
 219 of a L<waitpid(2)>/L<system(3)> status so you have to use the macros
 220 C<WIFEXITED>, C<WEXITSTATUS>, C<WIFSIGNALED>, C<WTERMSIG> etc defined
 221 in C<E<lt>sys/wait.hE<gt>> to parse it.
 222
 223 If there was a system call error, then C<-1> is returned.  The error
 224 will be in C<errno>.
 225
 226 Notes:
 227
 228 =over 4
 229
 230 =item *
 231
 232 Even in error cases, the handle is always closed and its memory is
 233 freed by this call.
 234
 235 =item *
 236
 237 It is normal for the kernel to send SIGHUP to the subprocess.
 238
 239 If the subprocess doesn't catch the SIGHUP, then it will die
 240 with status:
 241
 242  WIFSIGNALED (status) && WTERMSIG (status) == SIGHUP
 243
 244 This case should not necessarily be considered an error.
 245
 246 =back
 247
 248 This is how code should check for and print errors from C<mexp_close>:
 249
 250   status = mexp_close (h);
 251   if (status == -1) {
 252     perror ("mexp_close");
 253     return -1;
 254   }
 255   if (WIFSIGNALED (status) && WTERMSIG (status) == SIGHUP)
 256     goto ignore; /* not an error */
 257   if (!WIFEXITED (status) || WEXITSTATUS (status) != 0)
 258     /* You could use the W* macros to print a better error message. */
 259     fprintf (stderr, "error: subprocess failed, status = %d", status);
 260     return -1;
 261   }
 262  ignore:
 263   /* no error case */
 264
 265 =head1 EXPECT FUNCTION
 266
 267 Miniexpect contains a powerful regular expression matching function
 268 based on L<pcre(3)>:
 269
 270 B<int mexp_expect (mexp_h *h, const mexp_regexp *regexps,
 271 int *ovector, int ovecsize);>
 272
 273 The output of the subprocess is matched against the list of PCRE
 274 regular expressions in C<regexps>.  C<regexps> is a list of regular
 275 expression structures:
 276
 277  struct mexp_regexp {
 278    int r;
 279    const pcre *re;
 280    const pcre_extra *extra;
 281    int options;
 282  };
 283  typedef struct mexp_regexp mexp_regexp;
 284
 285 C<r> is the integer code returned from C<mexp_expect> if this regular
 286 expression matches.  It B<must> be E<gt> 0.  C<r == 0> indicates the
 287 end of the list of regular expressions.  C<re> is the compiled regular
 288 expression.
 289
 290 Possible return values are:
 291
 292 =over 4
 293
 294 =item C<MEXP_TIMEOUT>
 295
 296 No input matched before the timeout (C<h-E<gt>timeout>) was
 297 reached.
 298
 299 =item C<MEXP_EOF>
 300
 301 The subprocess closed the connection.
 302
 303 =item C<MEXP_ERROR>
 304
 305 There was a system call error (eg. from the read call).  The error is
 306 returned in C<errno>.
 307
 308 =item C<MEXP_PCRE_ERROR>
 309
 310 There was a C<pcre_exec> error.  C<h-E<gt>pcre_error> is set to the
 311 error code.  See L<pcreapi(3)> for a list of the C<PCRE_*> error codes
 312 and what they mean.
 313
 314 =item C<r> E<gt> 0
 315
 316 If any regexp matches, the associated integer code (C<regexps[].r>)
 317 is returned.
 318
 319 =back
 320
 321 Notes:
 322
 323 =over 4
 324
 325 =item *
 326
 327 C<regexps> may be NULL or an empty list, which means we don't match
 328 against a regular expression.  This is useful if you just want to wait
 329 for EOF or timeout.
 330
 331 =item *
 332
 333 C<regexps[].re>, C<regexps[].extra>, C<regexps[].options>, C<ovector>
 334 and C<ovecsize> are passed through to the L<pcre_exec(3)> function.
 335
 336 =item *
 337
 338 If multiple regular expressions are passed, then they are checked in
 339 turn and the I<first> regular expression that matches is returned
 340 I<even if the match happens later in the input than another regular
 341 expression>.
 342
 343 For example if the input is C<"hello world"> and you pass the two
 344 regular expressions:
 345
 346  regexps[0].re = world
 347  regexps[1].re = hello
 348
 349 then the first regular expression (C<"world">) may match and the
 350 C<"hello"> part of the input may be ignored.
 351
 352 In some cases this can even lead to unpredictable matching.  In the
 353 case above, if we only happened to read C<"hello wor">, then the
 354 second regular expression (C<"hello">) I<would> match.
 355
 356 If this is a concern, combine your regular expressions into a single
 357 one, eg. C<(hello)|(world)>.
 358
 359 =back
 360
 361 =head2 mexp_expect example
 362
 363 It is easier to understand C<mexp_expect> by considering a simple
 364 example.
 365
 366 In this example we are waiting for ssh to either send us a password
 367 prompt, or (if no password was required) a command prompt, and based
 368 on the output we will either send back a password or a command.
 369
 370 The unusual C<(mexp_regexp[]){...}> syntax is called a "compound
 371 literal" and is available in C99.  If you need to use an older
 372 compiler, you can just use a local variable instead.
 373
 374  mexp_h *h;
 375  char *errptr;
 376  int offset;
 377  pcre *password_re, *prompt_re;
 378  const int ovecsize = 12;
 379  int ovector[ovecsize];
 380
 381  password_re = pcre_compile ("assword", 0, &errptr, &offset, NULL);
 382  prompt_re = pcre_compile ("[$#] ", 0, &errptr, &offset, NULL);
 383
 384  switch (mexp_expect (h,
 385                       (mexp_regexp[]) {
 386                         { 100, .re = password_re },
 387                         { 101, .re = prompt_re },
 388                         { 0 },
 389                       }, ovector, ovecsize)) {
 390   case 100:
 391     /* here you would send a password */
 392     break;
 393   case 101:
 394     /* here you would send a command */
 395     break;
 396   case MEXP_EOF:
 397     fprintf (stderr, "error: ssh closed the connection unexpectedly\n");
 398     exit (EXIT_FAILURE);
 399   case MEXP_TIMEOUT:
 400     fprintf (stderr, "error: timeout before reaching the prompt\n");
 401     exit (EXIT_FAILURE);
 402   case MEXP_ERROR:
 403     perror ("mexp_expect");
 404     exit (EXIT_FAILURE);
 405   case MEXP_PCRE_ERROR:
 406     fprintf (stderr, "error: PCRE error: %d\n", h->pcre_error);
 407     exit (EXIT_FAILURE);
 408  }
 409
 410 =head1 SENDING COMMANDS TO THE SUBPROCESS
 411
 412 You can write to the subprocess simply by writing to C<h-E<gt>fd>.
 413 However we also provide a convenience function:
 414
 415 B<int mexp_printf (mexp_h *h, const char *fs, ...);>
 416
 417 This returns the number of bytes, if the whole message was written OK.
 418 If there was an error, -1 is returned and the error is available in
 419 C<errno>.
 420
 421 Notes:
 422
 423 =over 4
 424
 425 =item *
 426
 427 C<mexp_printf> will not do a partial write.  If it cannot write all
 428 the data, then it will return an error.
 429
 430 =item *
 431
 432 This function does not write a newline automatically.  If you want to
 433 send a command followed by a newline you have to do something like:
 434
 435  mexp_printf (h, "exit\n");
 436
 437 =back
 438
 439 B<int mexp_send_interrupt (mexp_h *h);>
 440
 441 Send the interrupt character (C<^C>, Ctrl-C, C<\003>).  This is like
 442 pressing C<^C> - the subprocess (or remote process, if using C<ssh>)
 443 is gracefully killed.
 444
 445 Note this only works if the pty is in cooked mode
 446 (ie. C<MEXP_SPAWN_COOKED_MODE> was passed to C<mexp_spawnlf> or
 447 C<mexp_spawnvf>).  In raw mode, all characters are passed through
 448 without any special interpretation.
 449
 450 =head1 SOURCE
 451
 452 Source is available from:
 453 L<http://git.annexia.org/?p=miniexpect.git;a=summary>
 454
 455 =head1 SEE ALSO
 456
 457 L<pcre(3)>,
 458 L<pcre_exec(3)>,
 459 L<pcreapi(3)>,
 460 L<waitpid(2)>,
 461 L<system(3)>.
 462
 463 =head1 AUTHORS
 464
 465 Richard W.M. Jones (C<rjones at redhat dot com>)
 466
 467 =head1 LICENSE
 468
 469 The library is released under the Library GPL (LGPL) version 2 or at
 470 your option any later version.
 471
 472 =head1 COPYRIGHT
 473
 474 Copyright (C) 2014 Red Hat Inc.