miniexpect.pod

   1 =encoding utf8
   2
   3 =head1 NAME
   4
   5 miniexpect - A very simple expect library for C.
   6
   7 =head1 SYNOPSIS
   8
   9  #include <errno.h>
  10  #include <sys/wait.h>
  11  #define PCRE2_CODE_UNIT_WIDTH 8
  12  #include <pcre2.h>
  13  #include <miniexpect.h>
  14
  15  mexp_h *h;
  16  h = mexp_spawnl ("ssh", "ssh", "host", NULL);
  17  switch (mexp_expect (h, regexps, match_data)) {
  18    ...
  19  }
  20  mexp_close (h);
  21
  22  cc prog.c -o prog -lminiexpect -lpcre2-8
  23
  24 =head1 DESCRIPTION
  25
  26 Miniexpect is a very simple expect-like library for C.  Expect is a
  27 way to control an external program that wants to be run interactively.
  28
  29 Miniexpect has a saner interface than libexpect, and doesn't depend on
  30 Tcl.  It is also thread safe, const-correct and uses modern C
  31 standards.
  32
  33 Miniexpect is a standalone library, except for a single dependency: it
  34 requires the PCRE2 (Perl Compatible Regular Expressions) library from
  35 L<http://www.pcre.org/>.  The PCRE2 dependency is fundamental because
  36 we want to offer the most powerful regular expression syntax to match
  37 on, but more importantly because PCRE2 has a convenient way to detect
  38 partial matches which made this library very simple to implement.
  39
  40 This manual page documents the API.  Examples of how to use the API
  41 can be found in the source directory.
  42
  43 =head1 CONCEPTS
  44
  45 Miniexpect lets you start up an external program, control it (by
  46 sending commands to it), and close it down gracefully.  Two things
  47 make this different from other APIs like L<popen(3)> and L<system(3)>:
  48 Firstly miniexpect creates a pseudoterminal (pty).  Secondly
  49 miniexpect lets you match the output of the program using regular
  50 expressions.  Both of these are handy for controlling interactive
  51 programs that might (for example) ask for passwords, but you can use
  52 miniexpect on just about any external program.
  53
  54 You can control multiple programs at the same time.
  55
  56 =head1 SPAWNING THE SUBPROCESS
  57
  58 There are four calls for creating a subprocess:
  59
  60 B<mexp_h *mexp_spawnl (const char *file, const char *arg, ...);>
  61
  62 This creates a subprocess running the external program C<file> (the
  63 current C<$PATH> is searched unless you give an absolute path).
  64 C<arg, ...> are the arguments to the program.  You should terminate
  65 the list of arguments with C<NULL>.  Usually the first argument should
  66 be the name of the program.
  67
  68 The return value is a handle (see next section).
  69
  70 If there was an error running the subprocess, C<NULL> is returned and
  71 the error is available in C<errno>.
  72
  73 For example, to run an ssh subprocess you could do:
  74
  75  h = mexp_spawnl ("ssh", "ssh", "-l", "root", "host", NULL);
  76
  77 or to run a particular ssh binary:
  78
  79  h = mexp_spawnl ("/usr/local/bin/ssh", "ssh", "-l", "root", "host", NULL);
  80
  81 An alternative to C<mexp_spawnl> is:
  82
  83 B<mexp_h *mexp_spawnv (const char *file, char **argv);>
  84
  85 This is the same as C<mexp_spawnl> except that you pass the arguments
  86 in a NULL-terminated array.
  87
  88 There are also two versions of the above calls which take flags:
  89
  90 B<mexp_h *mexp_spawnlf (unsigned flags, const char *file, const char *arg, ...);>
  91
  92 B<mexp_h *mexp_spawnvf (unsigned flags, const char *file, char **argv);>
  93
  94 The flags may contain the following values, logically ORed together:
  95
  96 =over 4
  97
  98 =item B<MEXP_SPAWN_KEEP_SIGNALS>
  99
 100 Do not reset signal handlers to C<SIG_DFL> in the subprocess.
 101
 102 =item B<MEXP_SPAWN_KEEP_FDS>
 103
 104 Do not close file descriptors E<ge> 3 in the subprocess.
 105
 106 =item B<MEXP_SPAWN_COOKED_MODE> or B<MEXP_SPAWN_RAW_MODE>
 107
 108 Configure the pty in cooked mode or raw mode.  Raw mode is the
 109 default.
 110
 111 =back
 112
 113 =head1 HANDLES
 114
 115 After spawning a subprocess, you get back a handle which is a pointer
 116 to a struct:
 117
 118  struct mexp_h;
 119  typedef struct mexp_h mexp_h;
 120
 121 Various methods can be used on the handle:
 122
 123 B<int mexp_get_fd (mexp_h *h);>
 124
 125 Return the file descriptor of the pty of the subprocess.  You can read
 126 and write to this if you want, although convenience functions are also
 127 provided (see below).
 128
 129 B<pid_t mexp_get_pid (mexp_h *h);>
 130
 131 Return the process ID of the subprocess.  You can send it signals if
 132 you want.
 133
 134 B<int mexp_get_timeout_ms (mexp_h *h);>
 135
 136 B<void mexp_set_timeout_ms (mexp_h *h, int millisecs);>
 137
 138 B<void mexp_set_timeout (mexp_h *h, int secs);>
 139
 140 Get or set the timeout used by C<mexp_expect> [see below].  The
 141 resolution is milliseconds (1/1000th of a second).  Set this before
 142 calling C<mexp_expect>.  Passing -1 to either of the C<set_> methods
 143 means no timeout.  The default setting is 60000 milliseconds (60
 144 seconds).
 145
 146 B<size_t mexp_get_read_size (mexp *h);>
 147
 148 B<void mexp_set_read_size (mexp *h, size_t read_size);>
 149
 150 Get or set the natural size (in bytes) for reads from the subprocess.
 151 The default is 1024.  Most callers will not need to change this.
 152
 153 B<int mexp_get_pcre_error (mexp *h);>
 154
 155 When C<mexp_expect> [see below] calls the PCRE function
 156 L<pcre2_match(3)>, it stashes the return value in the C<pcre_error>
 157 field in the handle, and that field is returned by this method.
 158
 159 If C<mexp_expect> returns C<MEXP_PCRE_ERROR>, then the actual PCRE
 160 error code returned by L<pcre2_match(3)> is available by calling this
 161 method.  For a list of PCRE error codes, see L<pcre2api(3)>.
 162
 163 B<void mexp_set_debug_file (mexp *h, FILE *fp);>
 164
 165 B<FILE *mexp_get_debug_file (mexp *h);>
 166
 167 Set or get the debug file of the handle.  To enable debugging, pass a
 168 non-C<NULL> file handle, eg. C<stderr>.  To disable debugging, pass
 169 C<NULL>.  Debugging messages are printed on the file handle.
 170
 171 Note that all output and input gets printed, including passwords.  To
 172 prevent passwords from being printed, modify your code to call
 173 C<mexp_printf_password> instead of C<mexp_printf>.
 174
 175 The following fields in the handle do not have methods, but can be
 176 accessed directly instead:
 177
 178  char *buffer;
 179  size_t len;
 180  size_t alloc;
 181
 182 If C<mexp_expect> returns a match then these variables contain the
 183 read buffer.  Note this buffer does not contain the full input from
 184 the process, but it will contain at least the part matched by the
 185 regular expression (and maybe some more).  C<buffer> is the read
 186 buffer and C<len> is the number of bytes of data in the buffer.
 187
 188  ssize_t next_match;
 189
 190 If C<mexp_expect> returns a match, then C<next_match> points to the
 191 first byte in the buffer I<after> the fully matched expression.  (It
 192 may be C<-1> which means it is invalid).  The next time that
 193 C<mexp_expect> is called, it will start by consuming the data
 194 C<buffer[next_match...len-1]>.  Callers may also need to read from
 195 that point in the buffer before calling L<read(2)> on the file
 196 descriptor.  Callers may also set this, for example setting it to
 197 C<-1> in order to ignore the remainder of the buffer.  In most cases
 198 callers can ignore this field, and C<mexp_expect> will just do the
 199 right thing when called repeatedly.
 200
 201  void *user1;
 202  void *user2;
 203  void *user3;
 204
 205 Opaque pointers for use by the caller.  The library will not touch
 206 these.
 207
 208 =head1 CLOSING THE HANDLE
 209
 210 To close the handle and clean up the subprocess, call:
 211
 212 B<int mexp_close (mexp_h *h);>
 213
 214 This returns the status code from the subprocess.  This is in the form
 215 of a L<waitpid(2)>/L<system(3)> status so you have to use the macros
 216 C<WIFEXITED>, C<WEXITSTATUS>, C<WIFSIGNALED>, C<WTERMSIG> etc defined
 217 in C<E<lt>sys/wait.hE<gt>> to parse it.
 218
 219 If there was a system call error, then C<-1> is returned.  The error
 220 will be in C<errno>.
 221
 222 Notes:
 223
 224 =over 4
 225
 226 =item *
 227
 228 Even in error cases, the handle is always closed and its memory is
 229 freed by this call.
 230
 231 =item *
 232
 233 It is normal for the kernel to send SIGHUP to the subprocess.
 234
 235 If the subprocess doesn't catch the SIGHUP, then it will die
 236 with status:
 237
 238  WIFSIGNALED (status) && WTERMSIG (status) == SIGHUP
 239
 240 This case should not necessarily be considered an error.
 241
 242 =back
 243
 244 This is how code should check for and print errors from C<mexp_close>:
 245
 246   status = mexp_close (h);
 247   if (status == -1) {
 248     perror ("mexp_close");
 249     return -1;
 250   }
 251   if (WIFSIGNALED (status) && WTERMSIG (status) == SIGHUP)
 252     goto ignore; /* not an error */
 253   if (!WIFEXITED (status) || WEXITSTATUS (status) != 0)
 254     /* You could use the W* macros to print a better error message. */
 255     fprintf (stderr, "error: subprocess failed, status = %d", status);
 256     return -1;
 257   }
 258  ignore:
 259   /* no error case */
 260
 261 =head1 EXPECT FUNCTION
 262
 263 Miniexpect contains a powerful regular expression matching function
 264 based on L<pcre2(3)>:
 265
 266 B<int mexp_expect (mexp_h *h, const mexp_regexp *regexps,
 267 pcre2_match_data *match_data);>
 268
 269 The output of the subprocess is matched against the list of PCRE
 270 regular expressions in C<regexps>.  C<regexps> is a list of regular
 271 expression structures:
 272
 273  struct mexp_regexp {
 274    int r;
 275    const pcre2_code *re;
 276    int options;
 277  };
 278  typedef struct mexp_regexp mexp_regexp;
 279
 280 C<r> is the integer code returned from C<mexp_expect> if this regular
 281 expression matches.  It B<must> be E<gt> 0.  C<r == 0> indicates the
 282 end of the list of regular expressions.  C<re> is the compiled regular
 283 expression.
 284
 285 Possible return values are:
 286
 287 =over 4
 288
 289 =item C<MEXP_TIMEOUT>
 290
 291 No input matched before the timeout (C<h-E<gt>timeout>) was
 292 reached.
 293
 294 =item C<MEXP_EOF>
 295
 296 The subprocess closed the connection.
 297
 298 =item C<MEXP_ERROR>
 299
 300 There was a system call error (eg. from the read call).  The error is
 301 returned in C<errno>.
 302
 303 =item C<MEXP_PCRE_ERROR>
 304
 305 There was a C<pcre_exec> error.  C<h-E<gt>pcre_error> is set to the
 306 error code.  See L<pcreapi(3)> for a list of the C<PCRE_*> error codes
 307 and what they mean.
 308
 309 =item C<r> E<gt> 0
 310
 311 If any regexp matches, the associated integer code (C<regexps[].r>)
 312 is returned.
 313
 314 =back
 315
 316 Notes:
 317
 318 =over 4
 319
 320 =item *
 321
 322 C<regexps> may be NULL or an empty list, which means we don't match
 323 against a regular expression.  This is useful if you just want to wait
 324 for EOF or timeout.
 325
 326 =item *
 327
 328 C<regexps[].re>, C<regexps[].options> and C<match_data> are passed
 329 through to the L<pcre2_match(3)> function.
 330
 331 =item *
 332
 333 If multiple regular expressions are passed, then they are checked in
 334 turn and the I<first> regular expression that matches is returned
 335 I<even if the match happens later in the input than another regular
 336 expression>.
 337
 338 For example if the input is C<"hello world"> and you pass the two
 339 regular expressions:
 340
 341  regexps[0].re = world
 342  regexps[1].re = hello
 343
 344 then the first regular expression (C<"world">) may match and the
 345 C<"hello"> part of the input may be ignored.
 346
 347 In some cases this can even lead to unpredictable matching.  In the
 348 case above, if we only happened to read C<"hello wor">, then the
 349 second regular expression (C<"hello">) I<would> match.
 350
 351 If this is a concern, combine your regular expressions into a single
 352 one, eg. C<(hello)|(world)>.
 353
 354 =back
 355
 356 =head2 mexp_expect example
 357
 358 It is easier to understand C<mexp_expect> by considering a simple
 359 example.
 360
 361 In this example we are waiting for ssh to either send us a password
 362 prompt, or (if no password was required) a command prompt, and based
 363 on the output we will either send back a password or a command.
 364
 365 The unusual C<(mexp_regexp[]){...}> syntax is called a "compound
 366 literal" and is available in C99.  If you need to use an older
 367 compiler, you can just use a local variable instead.
 368
 369  mexp_h *h;
 370  char *errptr;
 371  int offset;
 372  pcre2_code *password_re, *prompt_re;
 373  pcre2_match_data *match_data = pcre2_match_data_create (4, NULL);
 374
 375  password_re = pcre_compile ("assword", 0, &errptr, &offset, NULL);
 376  prompt_re = pcre_compile ("[$#] ", 0, &errptr, &offset, NULL);
 377
 378  switch (mexp_expect (h,
 379                       (mexp_regexp[]) {
 380                         { 100, .re = password_re },
 381                         { 101, .re = prompt_re },
 382                         { 0 },
 383                       }, match_data)) {
 384   case 100:
 385     /* here you would send a password */
 386     break;
 387   case 101:
 388     /* here you would send a command */
 389     break;
 390   case MEXP_EOF:
 391     fprintf (stderr, "error: ssh closed the connection unexpectedly\n");
 392     exit (EXIT_FAILURE);
 393   case MEXP_TIMEOUT:
 394     fprintf (stderr, "error: timeout before reaching the prompt\n");
 395     exit (EXIT_FAILURE);
 396   case MEXP_ERROR:
 397     perror ("mexp_expect");
 398     exit (EXIT_FAILURE);
 399   case MEXP_PCRE_ERROR:
 400     fprintf (stderr, "error: PCRE error: %d\n", h->pcre_error);
 401     exit (EXIT_FAILURE);
 402  }
 403
 404 =head1 SENDING COMMANDS TO THE SUBPROCESS
 405
 406 You can write to the subprocess simply by writing to C<h-E<gt>fd>.
 407 However we also provide a convenience function:
 408
 409 B<int mexp_printf (mexp_h *h, const char *fs, ...);>
 410
 411 B<int mexp_printf_password (mexp_h *h, const char *fs, ...);>
 412
 413 This returns the number of bytes, if the whole message was written OK.
 414 If there was an error, -1 is returned and the error is available in
 415 C<errno>.
 416
 417 Notes:
 418
 419 =over 4
 420
 421 =item *
 422
 423 C<mexp_printf> will not do a partial write.  If it cannot write all
 424 the data, then it will return an error.
 425
 426 =item *
 427
 428 This function does not write a newline automatically.  If you want to
 429 send a command followed by a newline you have to do something like:
 430
 431  mexp_printf (h, "exit\n");
 432
 433 =item *
 434
 435 C<mexp_printf_password> works identically to C<mexp_printf> except
 436 that the output is I<not> sent to the debugging file if debugging is
 437 enabled.  As the name suggests, use this for passwords so that they
 438 don't appear in debugging output.
 439
 440 =back
 441
 442 B<int mexp_send_interrupt (mexp_h *h);>
 443
 444 Send the interrupt character (C<^C>, Ctrl-C, C<\003>).  This is like
 445 pressing C<^C> - the subprocess (or remote process, if using C<ssh>)
 446 is gracefully killed.
 447
 448 Note this only works if the pty is in cooked mode
 449 (ie. C<MEXP_SPAWN_COOKED_MODE> was passed to C<mexp_spawnlf> or
 450 C<mexp_spawnvf>).  In raw mode, all characters are passed through
 451 without any special interpretation.
 452
 453 =head1 SOURCE
 454
 455 Source is available from:
 456 L<http://git.annexia.org/?p=miniexpect.git;a=summary>
 457
 458 =head1 SEE ALSO
 459
 460 L<pcre2(3)>,
 461 L<pcre2_match(3)>,
 462 L<pcre2api(3)>,
 463 L<waitpid(2)>,
 464 L<system(3)>.
 465
 466 =head1 AUTHORS
 467
 468 Richard W.M. Jones (C<rjones at redhat dot com>)
 469
 470 =head1 LICENSE
 471
 472 The library is released under the Library GPL (LGPL) version 2 or at
 473 your option any later version.
 474
 475 =head1 COPYRIGHT
 476
 477 Copyright (C) 2014-2022 Red Hat Inc.