1 /* String functions which allocate strings on the pool.
2 * By Richard W.M. Jones <rich@annexia.org>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the Free
16 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 * $Id: pstring.h,v 1.16 2002/12/04 21:03:25 rich Exp $
32 /* Function: pstrdup - duplicate a string or area of memory
36 * @code{pstrdup} duplicates string @code{s}, allocating new memory for the
37 * string in pool @code{pool}.
39 * @code{pstrndup} duplicates just the first @code{n} characters of the
42 * @code{pmemdup} duplicates an arbitrary area of memory of size
43 * @code{size} bytes starting at address @code{data}.
45 extern char *pstrdup (pool, const char *s);
46 extern char *pstrndup (pool, const char *s, int n);
47 extern void *pmemdup (pool, const void *data, size_t size);
49 /* Function: pstrsplit - split a string on a character, string or regexp.
50 * Function: pstrcsplit
51 * Function: pstrresplit
52 * Function: pstrsplit2
53 * Function: pstrcsplit2
54 * Function: pstrresplit2
56 * These functions split string @code{str} on either a string
57 * @code{sep}, a character @code{c} or a regular expression @code{re}.
59 * The result is a vector of newly created substrings.
61 * The @code{*2} variants split the string in the same way
62 * on the regular expression, but keeps the matching splitting text as
63 * separate elements in the vector. To illustrate this, imagine that
64 * @code{pstrresplit} and @code{pstrresplit2} are called on the string
65 * "This text is <b>bold</b>" with the regular expression @code{[<>]}.
67 * @code{pstrresplit} will return a vector containing:
69 * @code{ ( "This text is ", "b", "bold", "/b" ) }
71 * whereas @code{pstrcsplit2} will return:
73 * @code{ ( "This text is ", "<", "b", ">", "bold", "<", "/b", ">" ) }
75 * Note that the first element of the vector might be splitting
76 * text, or might be ordinary text as in the example above. Also
77 * the elements may not be interleaved like this (think about
78 * what would happen if the original string contained @code{"<b></b>"}).
79 * The only way to decide would be to call @code{prematch} on each element.
81 * This turns out to be very useful for certain sorts of simple
82 * parsing, or if you need to reconstruct the original string (just
83 * concatenate all of the elements together using @code{pconcat}).
85 * In common with Perl's @code{split} function, all of these functions
86 * return a zero length vector if @code{str} is the empty string.
88 * See also: @ref{prematch(3)}, @ref{pconcat(3)}.
90 extern vector pstrsplit (pool, const char *str, const char *sep);
91 extern vector pstrcsplit (pool, const char *str, char c);
92 extern vector pstrresplit (pool, const char *str, const pcre *re);
93 extern vector pstrsplit2 (pool, const char *str, const char *sep);
94 extern vector pstrcsplit2 (pool, const char *str, char c);
95 extern vector pstrresplit2 (pool, const char *str, const pcre *re);
97 /* Function: pconcat - concatenate a vector of strings
100 * @code{pconcat} concatenates a vector of strings to form a string.
102 * @code{pjoin} is similar except that @code{sep} is inserted between
103 * each concatenated string in the output.
105 * @code{pjoin} is kind of the opposite of @ref{pstrsplit(3)}.
107 extern char *pconcat (pool, vector);
108 extern char *pjoin (pool, vector, const char *sep);
110 /* Function: pchrs - generate a string of n repeated characters or strings
113 * @code{pchrs (pool, 'c', n)} is similar to the Perl expression
114 * @code{'c' x n}. It generates a pool-allocated string of @code{n} copies
115 * of character @code{'c'}.
117 * @code{pstrs (pool, str, n)} is similar to the Perl expression
118 * @code{str x n}. It generates a pool-allocated string of @code{n} copies
119 * of the string @code{str}.
121 extern char *pchrs (pool, char c, int n);
122 extern char *pstrs (pool, const char *str, int n);
124 /* Function: pvector - generate a vector from a list or array of strings
127 * @code{pvector} takes a NULL-terminated list of strings as arguments
128 * and returns a vector of strings. @code{pvectora} takes a pointer to
129 * an array of strings and the number of strings and returns a vector
132 * A typical use of this is to quickly concatenate strings:
134 * @code{s = pconcat (pool, pvector (pool, s1, s2, s3, NULL));}
136 * which is roughly equivalent to:
138 * @code{s = psprintf (pool, "%s%s%s", s1, s2, s3);}
140 * See also: @ref{pconcat(3)}, @ref{psprintf(3)}.
142 extern vector pvector (pool, ...);
143 extern vector pvectora (pool, const char *array[], int n);
145 /* Function: psort - sort a vector of strings
147 * Sort a vector of strings, using @code{compare_fn} to compare
148 * strings. The vector is sorted in-place.
150 * It is a common mistake to try to use @code{strcmp} directly
151 * as your comparison function. This will not work. See the
152 * C FAQ, section 12, question 12.2
153 * (@code{http://www.lysator.liu.se/c/c-faq/c-12.html}).
155 extern void psort (vector, int (*compare_fn) (const char **, const char **));
157 /* Function: pchomp - remove line endings from a string
159 * Remove line endings (either CR, CRLF or LF) from the string argument.
160 * The string is modified in-place and a pointer to the string
163 extern char *pchomp (char *line);
165 /* Function: ptrim - remove whitespace from the ends of a string
166 * Function: ptrimfront
167 * Function: ptrimback
169 * @code{ptrim} modifies a string of text in place, removing any
170 * whitespace characters from the beginning and end of the line.
172 * @code{ptrimfront} is the same as @code{ptrim} but only removes
173 * whitespace from the beginning of a string.
175 * @code{ptrimback} is the same as @code{ptrim} but only removes
176 * whitespace from the end of a string.
178 extern char *ptrim (char *str);
179 extern char *ptrimfront (char *str);
180 extern char *ptrimback (char *str);
182 /* Function: psprintf - sprintf which allocates the result in a pool
183 * Function: pvsprintf
185 * The @code{psprintf} function is equivalent to @code{sprintf}
186 * but it allocates the result string in @code{pool}.
188 * @code{pvsprintf} works similarly to @code{vsprintf}.
190 extern char *psprintf (pool, const char *format, ...) __attribute__ ((format (printf, 2, 3)));
191 extern char *pvsprintf (pool, const char *format, va_list ap);
193 /* Function: pitoa - convert number types to strings
197 * These functions convert a decimal @code{int}, @code{double} or
198 * hexadecimal @code{unsigned} into a string, which is allocated
201 * @code{pitoa} is equivalent to @code{psprintf (pool, "%d", i)},
202 * and the other functions have similar equivalents.
204 extern char *pitoa (pool, int);
205 extern char *pdtoa (pool, double);
206 extern char *pxtoa (pool, unsigned);
208 /* Function: pvitostr - convert vectors of numbers to vectors of strings
212 * Promote vector of numbers to vector of strings.
214 * @code{pvitostr} expects a vector of @code{int}.
216 * @code{pvdtostr} expects a vector of @code{double}.
218 * @code{pvxtostr} expects a vector of hexadecimal @code{unsigned}.
220 * All functions return a vector of @code{char *}.
222 extern vector pvitostr (pool, vector);
223 extern vector pvdtostr (pool, vector);
224 extern vector pvxtostr (pool, vector);
226 /* Function: pstrcat - extend a string
229 * @code{str} is a string allocated in @code{pool}.
230 * Append @code{ending} to @code{str}, reallocating
231 * @code{str} if necessary.
233 * Because @code{str} may be reallocated (ie. moved) you
234 * must invoke this function as follows:
236 * @code{str = pstrcat (pool, str, ending);}
238 * @code{pstrncat} is similar to @code{pstrcat} except that
239 * only the first @code{n} characters of @code{ending}
240 * are appended to @code{str}.
242 extern char *pstrcat (pool, char *str, const char *ending);
243 extern char *pstrncat (pool, char *str, const char *ending, size_t n);
245 /* Function: psubstr - return a substring of a string
247 * Return the substring starting at @code{offset} and of length
248 * @code{len} of @code{str}, allocated
249 * as a new string. If @code{len} is negative,
250 * everything up to the end of @code{str}
253 extern char *psubstr (pool, const char *str, int offset, int len);
255 /* Function: pstrupr - convert a string to upper- or lowercase
258 * Convert a string, in-place, to upper or lowercase by applying
259 * @code{toupper} or @code{tolower} to each character in turn.
261 extern char *pstrupr (char *str);
262 extern char *pstrlwr (char *str);
264 /* Function: pgetline - read a line from a file, optionally removing comments
265 * Function: pgetlinex
266 * Function: pgetlinec
268 * @code{pgetline} reads a single line from a file and returns it. It
269 * allocates enough space to read lines of arbitrary length. Line ending
270 * characters ('\r' and '\n') are automatically removed from the end
273 * The @code{pool} argument is a pool for allocating the line. The
274 * @code{fp} argument is the C @code{FILE} pointer. The @code{line}
275 * argument is a pointer to a string allocated in pool which will
276 * be reallocated and filled with the contents of the line. You may
277 * pass @code{line} as @code{NULL} to get a newly allocated buffer.
279 * Use @code{pgetline} in one of the following two ways:
281 * @code{line = pgetline (pool, fp, line);}
285 * @code{line = pgetline (pool, fp, NULL);}
287 * @code{pgetlinex} is a more advanced function which reads a line
288 * from a file, optionally removing comments, concatenating together
289 * lines which have been split with a backslash, and ignoring blank
290 * lines. @code{pgetlinex} (and the related macro @code{pgetlinec}) are
291 * very useful for reading lines of input from a configuration file.
293 * The @code{pool} argument is a pool for allocating the line. The
294 * @code{fp} argument is the C @code{FILE} pointer. The @code{line}
295 * argument is a buffer allocated in pool which will be reallocated
296 * and filled with the result. @code{comment_set} is the set of
297 * possible comment characters -- eg. @code{"#!"} to allow either
298 * @code{#} or @code{!} to be used to introduce comments.
299 * @code{flags} is zero or more of the following flags OR-ed
302 * @code{PGETL_NO_CONCAT}: Don't concatenate lines which have been
303 * split with trailing backslash characters.
305 * @code{PGETL_INLINE_COMMENTS}: Treat everything following a comment
306 * character as a comment. The default is to only allow comments which
307 * appear on a line on their own.
309 * @code{pgetlinec} is a helper macro which calls @code{pgetlinex}
310 * with @code{comment_set == "#"} and @code{flags == 0}.
312 extern char *pgetline (pool, FILE *fp, char *line);
313 extern char *pgetlinex (pool, FILE *fp, char *line, const char *comment_set, int flags);
314 #define pgetlinec(p,fp,line) pgetlinex ((p), (fp), (line), "#", 0)
316 #define PGETL_NO_CONCAT 1
317 #define PGETL_INLINE_COMMENTS 2
319 /* Function: pmap - map, search vectors of strings
322 * @code{pmap} takes a @code{vector} of strings (@code{char *}) and
323 * transforms it into another @code{vector} of strings by applying
324 * the function @code{char *map_fn (pool, const char *)} to each
327 * @code{pgrep} applies the function @code{int grep_fn (pool, const char *)}
328 * to each element in a @code{vector} of strings, and returns a
329 * new vector of strings containing only those strings where
330 * @code{grep_fn} returns true.
332 * See also: @ref{vector_map_pool(3)}, @ref{vector_grep_pool(3)}.
334 vector pmap (pool, const vector v, char *(*map_fn) (pool, const char *));
335 vector pgrep (pool, const vector v, int (*grep_fn) (pool, const char *));
337 #endif /* PSTRING_H */