Clarify uxsock logging
[multipath-tools/.git] / libmultipath / regex.c
1 /* Extended regular expression matching and search library,
2    version 0.12.
3    (Implements POSIX draft P10003.2/D11.2, except for
4    internationalization features.)
5
6    Copyright (C) 1993 Free Software Foundation, Inc.
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 2, or (at your option)
11    any later version.
12
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, write to the Free Software
20    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
21
22 #ifndef _GNU_SOURCE
23 #define _GNU_SOURCE
24 #endif
25
26 #include <sys/types.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #ifndef bcmp
31 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
32 #endif
33 #ifndef bcopy
34 #define bcopy(s, d, n)  memcpy ((d), (s), (n))
35 #endif
36 #ifndef bzero
37 #define bzero(s, n)     memset ((s), 0, (n))
38 #endif
39
40 /* Define the syntax stuff for \<, \>, etc.  */
41
42 #ifndef Sword
43 #define Sword 1
44 #endif
45
46 #define CHAR_SET_SIZE 256
47
48 static char re_syntax_table[CHAR_SET_SIZE];
49
50 static void init_syntax_once(void)
51 {
52         register int c;
53         static int done = 0;
54
55         if (done)
56                 return;
57
58         bzero(re_syntax_table, sizeof re_syntax_table);
59
60         for (c = 'a'; c <= 'z'; c++)
61                 re_syntax_table[c] = Sword;
62
63         for (c = 'A'; c <= 'Z'; c++)
64                 re_syntax_table[c] = Sword;
65
66         for (c = '0'; c <= '9'; c++)
67                 re_syntax_table[c] = Sword;
68
69         re_syntax_table['_'] = Sword;
70
71         done = 1;
72 }
73
74 #define SYNTAX(c) re_syntax_table[c]
75
76 #include "regex.h"
77 #include <ctype.h>
78
79 #ifdef isblank
80 #define ISBLANK(c) (isascii (c) && isblank (c))
81 #else
82 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
83 #endif
84 #ifdef isgraph
85 #define ISGRAPH(c) (isascii (c) && isgraph (c))
86 #else
87 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
88 #endif
89
90 #define ISPRINT(c) (isascii (c) && isprint (c))
91 #define ISDIGIT(c) (isascii (c) && isdigit (c))
92 #define ISALNUM(c) (isascii (c) && isalnum (c))
93 #define ISALPHA(c) (isascii (c) && isalpha (c))
94 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
95 #define ISLOWER(c) (isascii (c) && islower (c))
96 #define ISPUNCT(c) (isascii (c) && ispunct (c))
97 #define ISSPACE(c) (isascii (c) && isspace (c))
98 #define ISUPPER(c) (isascii (c) && isupper (c))
99 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
100
101 #undef SIGN_EXTEND_CHAR
102 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
103
104 #ifndef alloca
105 #ifdef __GNUC__
106 #define alloca __builtin_alloca
107 #endif                          /* not __GNUC__ */
108 #endif                          /* not alloca */
109
110 #define REGEX_ALLOCATE alloca
111
112 /* Assumes a `char *destination' variable.  */
113 #define REGEX_REALLOCATE(source, osize, nsize)                          \
114         (destination = (char *) alloca (nsize),                         \
115          bcopy (source, destination, osize),                            \
116          destination)
117
118 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
119    `string1' or just past its end.  This works if PTR is NULL, which is
120    a good thing.  */
121 #define FIRST_STRING_P(ptr)                                             \
122         (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
123
124 /* (Re)Allocate N items of type T using malloc, or fail.  */
125 #define TALLOC(n, t)         ((t *) malloc ((n) * sizeof (t)))
126 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
127 #define REGEX_TALLOC(n, t)   ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
128
129 #define BYTEWIDTH 8             /* In bits.  */
130
131 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
132
133 #define MAX(a, b) ((a) > (b) ? (a) : (b))
134 #define MIN(a, b) ((a) < (b) ? (a) : (b))
135
136 typedef char boolean;
137 #define false 0
138 #define true 1
139
140 typedef enum {
141         no_op = 0,
142         exactn = 1,
143         anychar,
144         charset,
145         charset_not,
146         start_memory,
147         stop_memory,
148         duplicate,
149         begline,
150         endline,
151         begbuf,
152         endbuf,
153         jump,
154         jump_past_alt,
155         on_failure_jump,
156         on_failure_keep_string_jump,
157         pop_failure_jump,
158         maybe_pop_jump,
159         dummy_failure_jump,
160         push_dummy_failure,
161         succeed_n,
162         jump_n,
163         set_number_at,
164         wordchar,
165         notwordchar,
166         wordbeg,
167         wordend,
168         wordbound,
169         notwordbound
170 } re_opcode_t;
171
172 #define STORE_NUMBER(destination, number)                               \
173   do {                                                                  \
174     (destination)[0] = (number) & 0377;                                 \
175     (destination)[1] = (number) >> 8;                                   \
176   } while (0)
177
178 #define STORE_NUMBER_AND_INCR(destination, number)                      \
179   do {                                                                  \
180     STORE_NUMBER (destination, number);                                 \
181     (destination) += 2;                                                 \
182   } while (0)
183
184 #define EXTRACT_NUMBER(destination, source)                             \
185   do {                                                                  \
186     (destination) = *(source) & 0377;                                   \
187     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
188   } while (0)
189
190 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
191   do {                                                                  \
192     EXTRACT_NUMBER (destination, source);                               \
193     (source) += 2;                                                      \
194   } while (0)
195
196 #undef assert
197 #define assert(e)
198
199 #define DEBUG_STATEMENT(e)
200 #define DEBUG_PRINT1(x)
201 #define DEBUG_PRINT2(x1, x2)
202 #define DEBUG_PRINT3(x1, x2, x3)
203 #define DEBUG_PRINT4(x1, x2, x3, x4)
204 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
205 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
206
207 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
208 reg_syntax_t re_set_syntax(syntax)
209 reg_syntax_t syntax;
210 {
211         reg_syntax_t ret = re_syntax_options;
212
213         re_syntax_options = syntax;
214         return ret;
215 }
216
217 /* This table gives an error message for each of the error codes listed
218    in regex.h.  Obviously the order here has to be same as there.  */
219
220 static const char *re_error_msg[] = { NULL,     /* REG_NOERROR */
221         "No match",             /* REG_NOMATCH */
222         "Invalid regular expression",   /* REG_BADPAT */
223         "Invalid collation character",  /* REG_ECOLLATE */
224         "Invalid character class name", /* REG_ECTYPE */
225         "Trailing backslash",   /* REG_EESCAPE */
226         "Invalid back reference",       /* REG_ESUBREG */
227         "Unmatched [ or [^",    /* REG_EBRACK */
228         "Unmatched ( or \\(",   /* REG_EPAREN */
229         "Unmatched \\{",        /* REG_EBRACE */
230         "Invalid content of \\{\\}",    /* REG_BADBR */
231         "Invalid range end",    /* REG_ERANGE */
232         "Memory exhausted",     /* REG_ESPACE */
233         "Invalid preceding regular expression", /* REG_BADRPT */
234         "Premature end of regular expression",  /* REG_EEND */
235         "Regular expression too big",   /* REG_ESIZE */
236         "Unmatched ) or \\)",   /* REG_ERPAREN */
237 };
238
239 /* Subroutine declarations and macros for regex_compile.  */
240
241 static reg_errcode_t regex_compile (const char *pattern, size_t size,
242                                     reg_syntax_t syntax,
243                                     struct re_pattern_buffer * bufp);
244
245 static void store_op1 (re_opcode_t op, unsigned char *loc, int arg);
246
247 static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
248
249 static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg,
250                         unsigned char *end);
251
252 static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2,
253                         unsigned char *end);
254
255 static boolean at_begline_loc_p (const char *pattern, const char *p,
256                                  reg_syntax_t syntax);
257
258 static boolean at_endline_loc_p (const char *p, const char *pend,
259                                  reg_syntax_t syntax);
260
261 static reg_errcode_t compile_range (const char **p_ptr, const char *pend,
262                                     char *translate, reg_syntax_t syntax,
263                                     unsigned char *b);
264
265 /* Fetch the next character in the uncompiled pattern---translating it
266    if necessary.  Also cast from a signed character in the constant
267    string passed to us by the user to an unsigned char that we can use
268    as an array index (in, e.g., `translate').  */
269 #define PATFETCH(c)                                                     \
270   do {if (p == pend) return REG_EEND;                                   \
271     c = (unsigned char) *p++;                                           \
272     if (translate) c = translate[c];                                    \
273   } while (0)
274
275 /* Fetch the next character in the uncompiled pattern, with no
276    translation.  */
277 #define PATFETCH_RAW(c)                                                 \
278   do {if (p == pend) return REG_EEND;                                   \
279     c = (unsigned char) *p++;                                           \
280   } while (0)
281
282 /* Go backwards one character in the pattern.  */
283 #define PATUNFETCH p--
284
285
286 /* If `translate' is non-null, return translate[D], else just D.  We
287    cast the subscript to translate because some data is declared as
288    `char *', to avoid warnings when a string constant is passed.  But
289    when we use a character as a subscript we must make it unsigned.  */
290 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
291
292
293 /* Macros for outputting the compiled pattern into `buffer'.  */
294
295 /* If the buffer isn't allocated when it comes in, use this.  */
296 #define INIT_BUF_SIZE  32
297
298 /* Make sure we have at least N more bytes of space in buffer.  */
299 #define GET_BUFFER_SPACE(n)                                             \
300     while (b - bufp->buffer + (n) > bufp->allocated)                    \
301       EXTEND_BUFFER ()
302
303 /* Make sure we have one more byte of buffer space and then add C to it.  */
304 #define BUF_PUSH(c)                                                     \
305   do {                                                                  \
306     GET_BUFFER_SPACE (1);                                               \
307     *b++ = (unsigned char) (c);                                         \
308   } while (0)
309
310
311 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
312 #define BUF_PUSH_2(c1, c2)                                              \
313   do {                                                                  \
314     GET_BUFFER_SPACE (2);                                               \
315     *b++ = (unsigned char) (c1);                                        \
316     *b++ = (unsigned char) (c2);                                        \
317   } while (0)
318
319
320 /* As with BUF_PUSH_2, except for three bytes.  */
321 #define BUF_PUSH_3(c1, c2, c3)                                          \
322   do {                                                                  \
323     GET_BUFFER_SPACE (3);                                               \
324     *b++ = (unsigned char) (c1);                                        \
325     *b++ = (unsigned char) (c2);                                        \
326     *b++ = (unsigned char) (c3);                                        \
327   } while (0)
328
329
330 /* Store a jump with opcode OP at LOC to location TO.  We store a
331    relative address offset by the three bytes the jump itself occupies.  */
332 #define STORE_JUMP(op, loc, to) \
333   store_op1 (op, loc, (int)((to) - (loc) - 3))
334
335 /* Likewise, for a two-argument jump.  */
336 #define STORE_JUMP2(op, loc, to, arg) \
337   store_op2 (op, loc, (int)((to) - (loc) - 3), arg)
338
339 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
340 #define INSERT_JUMP(op, loc, to) \
341   insert_op1 (op, loc, (int)((to) - (loc) - 3), b)
342
343 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
344 #define INSERT_JUMP2(op, loc, to, arg) \
345   insert_op2 (op, loc, (int)((to) - (loc) - 3), arg, b)
346
347
348 /* This is not an arbitrary limit: the arguments which represent offsets
349    into the pattern are two bytes long.  So if 2^16 bytes turns out to
350    be too small, many things would have to change.  */
351 #define MAX_BUF_SIZE (1L << 16)
352 #define REALLOC realloc
353
354 /* Extend the buffer by twice its current size via realloc and
355    reset the pointers that pointed into the old block to point to the
356    correct places in the new one.  If extending the buffer results in it
357    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
358 #define EXTEND_BUFFER()                                                 \
359   do {                                                                  \
360     unsigned char *old_buffer = bufp->buffer;                           \
361     if (bufp->allocated == MAX_BUF_SIZE)                                \
362       return REG_ESIZE;                                                 \
363     bufp->allocated <<= 1;                                              \
364     if (bufp->allocated > MAX_BUF_SIZE)                                 \
365       bufp->allocated = MAX_BUF_SIZE;                                   \
366     bufp->buffer = (unsigned char *) REALLOC(bufp->buffer, bufp->allocated);\
367     if (bufp->buffer == NULL)                                           \
368       return REG_ESPACE;                                                \
369     /* If the buffer moved, move all the pointers into it.  */          \
370     if (old_buffer != bufp->buffer)                                     \
371       {                                                                 \
372         b = (b - old_buffer) + bufp->buffer;                            \
373         begalt = (begalt - old_buffer) + bufp->buffer;                  \
374         if (fixup_alt_jump)                                             \
375           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
376         if (laststart)                                                  \
377           laststart = (laststart - old_buffer) + bufp->buffer;          \
378         if (pending_exact)                                              \
379           pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
380       }                                                                 \
381   } while (0)
382
383
384 /* Since we have one byte reserved for the register number argument to
385    {start,stop}_memory, the maximum number of groups we can report
386    things about is what fits in that byte.  */
387 #define MAX_REGNUM 255
388
389 /* But patterns can have more than `MAX_REGNUM' registers.  We just
390    ignore the excess.  */
391 typedef unsigned regnum_t;
392
393
394 /* Macros for the compile stack.  */
395
396 /* Since offsets can go either forwards or backwards, this type needs to
397    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
398 /* int may be not enough when sizeof(int) == 2                           */
399 typedef long pattern_offset_t;
400
401 typedef struct {
402         pattern_offset_t begalt_offset;
403         pattern_offset_t fixup_alt_jump;
404         pattern_offset_t inner_group_offset;
405         pattern_offset_t laststart_offset;
406         regnum_t regnum;
407 } compile_stack_elt_t;
408
409
410 typedef struct {
411         compile_stack_elt_t *stack;
412         unsigned size;
413         unsigned avail;         /* Offset of next open position.  */
414 } compile_stack_type;
415
416
417 #define INIT_COMPILE_STACK_SIZE 32
418
419 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
420 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
421
422 /* The next available element.  */
423 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
424
425
426 /* Set the bit for character C in a list.  */
427 #define SET_LIST_BIT(c)                               \
428   (b[((unsigned char) (c)) / BYTEWIDTH]               \
429    |= 1 << (((unsigned char) c) % BYTEWIDTH))
430
431
432 /* Get the next unsigned number in the uncompiled pattern.  */
433 #define GET_UNSIGNED_NUMBER(num)                                        \
434   { if (p != pend)                                                      \
435      {                                                                  \
436        PATFETCH (c);                                                    \
437        while (ISDIGIT (c))                                              \
438          {                                                              \
439            if (num < 0)                                                 \
440               num = 0;                                                  \
441            num = num * 10 + c - '0';                                    \
442            if (p == pend)                                               \
443               break;                                                    \
444            PATFETCH (c);                                                \
445          }                                                              \
446        }                                                                \
447     }
448
449 #define CHAR_CLASS_MAX_LENGTH  6        /* Namely, `xdigit'.  */
450
451 #define IS_CHAR_CLASS(string)                                           \
452    (STREQ (string, "alpha") || STREQ (string, "upper")                  \
453     || STREQ (string, "lower") || STREQ (string, "digit")               \
454     || STREQ (string, "alnum") || STREQ (string, "xdigit")              \
455     || STREQ (string, "space") || STREQ (string, "print")               \
456     || STREQ (string, "punct") || STREQ (string, "graph")               \
457     || STREQ (string, "cntrl") || STREQ (string, "blank"))
458
459 static boolean group_in_compile_stack (compile_stack_type
460                                        compile_stack, regnum_t regnum);
461
462 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
463    Returns one of error codes defined in `regex.h', or zero for success  */
464
465 static reg_errcode_t regex_compile(pattern, size, syntax, bufp)
466 const char *pattern;
467 size_t size;
468 reg_syntax_t syntax;
469 struct re_pattern_buffer *bufp;
470 {
471         /* We fetch characters from PATTERN here.  Even though PATTERN is
472            `char *' (i.e., signed), we declare these variables as unsigned, so
473            they can be reliably used as array indices.  */
474         register unsigned char c, c1;
475
476         /* A random tempory spot in PATTERN.  */
477         const char *p1;
478
479         /* Points to the end of the buffer, where we should append.  */
480         register unsigned char *b;
481
482         /* Keeps track of unclosed groups.  */
483         compile_stack_type compile_stack;
484
485         /* Points to the current (ending) position in the pattern.  */
486         const char *p = pattern;
487         const char *pend = pattern + size;
488
489         /* How to translate the characters in the pattern.  */
490         char *translate = bufp->translate;
491
492         /* Address of the count-byte of the most recently inserted `exactn'
493            command.  This makes it possible to tell if a new exact-match
494            character can be added to that command or if the character requires
495            a new `exactn' command.  */
496         unsigned char *pending_exact = 0;
497
498         /* Address of start of the most recently finished expression.
499            This tells, e.g., postfix * where to find the start of its
500            operand.  Reset at the beginning of groups and alternatives.  */
501         unsigned char *laststart = 0;
502
503         /* Address of beginning of regexp, or inside of last group.  */
504         unsigned char *begalt;
505
506         /* Place in the uncompiled pattern (i.e., the {) to
507            which to go back if the interval is invalid.  */
508         const char *beg_interval;
509
510         /* Address of the place where a forward jump should go to the end of
511            the containing expression.  Each alternative of an `or' -- except the
512            last -- ends with a forward jump of this sort.  */
513         unsigned char *fixup_alt_jump = 0;
514
515         /* Counts open-groups as they are encountered.  Remembered for the
516            matching close-group on the compile stack, so the same register
517            number is put in the stop_memory as the start_memory.  */
518         regnum_t regnum = 0;
519
520         /* Initialize the compile stack.  */
521         compile_stack.stack =
522             TALLOC(INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
523         if (compile_stack.stack == NULL)
524                 return REG_ESPACE;
525
526         compile_stack.size = INIT_COMPILE_STACK_SIZE;
527         compile_stack.avail = 0;
528
529         /* Initialize the pattern buffer.  */
530         bufp->syntax = syntax;
531         bufp->fastmap_accurate = 0;
532         bufp->not_bol = bufp->not_eol = 0;
533
534         /* Set `used' to zero, so that if we return an error, the pattern
535            printer (for debugging) will think there's no pattern.  We reset it
536            at the end.  */
537         bufp->used = 0;
538
539         /* Always count groups, whether or not bufp->no_sub is set.  */
540         bufp->re_nsub = 0;
541
542         /* Initialize the syntax table.  */
543         init_syntax_once();
544
545         if (bufp->allocated == 0) {
546                 if (bufp->buffer) {
547                         RETALLOC(bufp->buffer, INIT_BUF_SIZE,
548                                  unsigned char);
549                 } else { /* Caller did not allocate a buffer. Do it for them. */
550                         bufp->buffer =
551                             TALLOC(INIT_BUF_SIZE, unsigned char);
552                 }
553                 if (!bufp->buffer)
554                         return REG_ESPACE;
555
556                 bufp->allocated = INIT_BUF_SIZE;
557         }
558
559         begalt = b = bufp->buffer;
560
561         /* Loop through the uncompiled pattern until we're at the end.  */
562         while (p != pend) {
563                 PATFETCH(c);
564
565                 switch (c) {
566                 case '^':
567                 {
568                         if (p == pattern + 1 ||
569                             syntax & RE_CONTEXT_INDEP_ANCHORS ||
570                             at_begline_loc_p(pattern, p, syntax))
571                                 BUF_PUSH(begline);
572                         else
573                                 goto normal_char;
574                 }
575                 break;
576
577                 case '$':
578                 {
579                         if (p == pend ||
580                             syntax & RE_CONTEXT_INDEP_ANCHORS ||
581                             at_endline_loc_p(p, pend, syntax))
582                                 BUF_PUSH(endline);
583                         else
584                                 goto normal_char;
585                 }
586                 break;
587
588                 case '+':
589
590                 case '?':
591                 if ((syntax & RE_BK_PLUS_QM) ||
592                     (syntax & RE_LIMITED_OPS))
593                         goto normal_char;
594                 handle_plus:
595
596                 case '*':
597                 /* If there is no previous pattern... */
598                 if (!laststart) {
599                         if (syntax & RE_CONTEXT_INVALID_OPS)
600                                 return REG_BADRPT;
601                         else if (!(syntax & RE_CONTEXT_INDEP_OPS))
602                                 goto normal_char;
603                 }
604
605                 {
606                         /* Are we optimizing this jump?  */
607                         boolean keep_string_p = false;
608
609                         /* 1 means zero (many) matches is allowed.  */
610                         char zero_times_ok = 0, many_times_ok = 0;
611
612                         for (;;) {
613                                 zero_times_ok |= c != '+';
614                                 many_times_ok |= c != '?';
615
616                                 if (p == pend)
617                                         break;
618
619                                 PATFETCH(c);
620
621                                 if (c == '*' || (!(syntax & RE_BK_PLUS_QM) &&
622                                     (c == '+' || c == '?')));
623
624                                 else if (syntax & RE_BK_PLUS_QM && c == '\\') {
625                                         if (p == pend)
626                                                 return REG_EESCAPE;
627
628                                         PATFETCH(c1);
629                                         if (!(c1 == '+' || c1 == '?')) {
630                                                 PATUNFETCH;
631                                                 PATUNFETCH;
632                                                 break;
633                                         }
634
635                                         c = c1;
636                                 } else {
637                                         PATUNFETCH;
638                                         break;
639                                 }
640                         }
641
642                         if (!laststart)
643                                 break;
644
645                         if (many_times_ok) {
646                                 assert(p - 1 > pattern);
647
648                                 /* Allocate the space for the jump.  */
649                                 GET_BUFFER_SPACE(3);
650
651                                 if (TRANSLATE(*(p - 2)) == TRANSLATE('.') &&
652                                     zero_times_ok && p < pend &&
653                                     TRANSLATE(*p) == TRANSLATE('\n') &&
654                                     !(syntax & RE_DOT_NEWLINE)) {
655                                         /* We have .*\n.  */
656                                         STORE_JUMP(jump, b, laststart);
657                                         keep_string_p = true;
658                                 } else
659                                         STORE_JUMP(maybe_pop_jump, b,
660                                                    laststart - 3);
661
662                                         b += 3;
663                                 }
664
665                                 GET_BUFFER_SPACE(3);
666                                 INSERT_JUMP(keep_string_p ?
667                                             on_failure_keep_string_jump :
668                                             on_failure_jump, laststart,
669                                             b + 3);
670                                 pending_exact = 0;
671                                 b += 3;
672
673                                 if (!zero_times_ok) {
674                                         GET_BUFFER_SPACE(3);
675                                         INSERT_JUMP(dummy_failure_jump,
676                                                     laststart,
677                                                     laststart + 6);
678                                         b += 3;
679                                 }
680                         }
681                         break;
682
683
684                 case '.':
685                 laststart = b;
686                 BUF_PUSH(anychar);
687                 break;
688
689                 case '[':
690                 {
691                         boolean had_char_class = false;
692
693                         if (p == pend)
694                                 return REG_EBRACK;
695
696                         GET_BUFFER_SPACE(34);
697
698                         laststart = b;
699
700                         /* We test `*p == '^' twice, instead of using an if
701                            statement, so we only need one BUF_PUSH.  */
702                         BUF_PUSH(*p == '^' ? charset_not : charset);
703                         if (*p == '^')
704                                 p++;
705
706                         p1 = p;
707
708                         /* Push the number of bytes in the bitmap.  */
709                         BUF_PUSH((1 << BYTEWIDTH) / BYTEWIDTH);
710
711                         /* Clear the whole map.  */
712                         bzero(b, (1 << BYTEWIDTH) / BYTEWIDTH);
713
714                         if ((re_opcode_t) b[-2] == charset_not
715                             && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
716                                 SET_LIST_BIT('\n');
717
718                         /* Read in characters and ranges, setting map bits.  */
719                         for (;;) {
720                                 if (p == pend)
721                                         return REG_EBRACK;
722
723                                 PATFETCH(c);
724
725                                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) &&
726                                     c == '\\') {
727                                         if (p == pend)
728                                                 return REG_EESCAPE;
729
730                                         PATFETCH(c1);
731                                         SET_LIST_BIT(c1);
732                                         continue;
733                                 }
734
735                                 if (c == ']' && p != p1 + 1)
736                                         break;
737
738                                 if (had_char_class && c == '-' && *p != ']')
739                                         return REG_ERANGE;
740
741                                 if (c == '-' && !(p - 2 >= pattern &&
742                                     p[-2] == '[') && !(p - 3 >= pattern &&
743                                     p[-3] == '[' && p[-2] == '^') &&
744                                     *p != ']') {
745                                         reg_errcode_t ret =
746                                             compile_range(&p, pend, translate,
747                                                           syntax, b);
748                                         if (ret != REG_NOERROR)
749                                                 return ret;
750                                 }
751
752                                 else if (p[0] == '-' && p[1] != ']') {
753                                         reg_errcode_t ret;
754
755                                         /* Move past the `-'.  */
756                                         PATFETCH(c1);
757
758                                         ret = compile_range(&p, pend, translate,
759                                                             syntax, b);
760                                         if (ret != REG_NOERROR)
761                                                 return ret;
762                                 }
763
764                                 else if (syntax & RE_CHAR_CLASSES &&
765                                          c == '[' && *p == ':') {
766                                         char str[CHAR_CLASS_MAX_LENGTH + 1];
767
768                                         PATFETCH(c);
769                                         c1 = 0;
770
771                                         /* If pattern is `[[:'.  */
772                                         if (p == pend)
773                                                 return REG_EBRACK;
774
775                                         for (;;) {
776                                                 PATFETCH(c);
777                                                 if (c == ':' || c == ']' ||
778                                                     p == pend || c1 ==
779                                                     CHAR_CLASS_MAX_LENGTH)
780                                                         break;
781                                                 str[c1++] = c;
782                                         }
783                                         str[c1] = '\0';
784
785                                         if (c == ':' && *p == ']') {
786                                                 int ch;
787                                                 boolean is_alnum =
788                                                     STREQ(str, "alnum");
789                                                 boolean is_alpha =
790                                                     STREQ(str, "alpha");
791                                                 boolean is_blank =
792                                                     STREQ(str, "blank");
793                                                 boolean is_cntrl =
794                                                     STREQ(str, "cntrl");
795                                                 boolean is_digit =
796                                                     STREQ(str, "digit");
797                                                 boolean is_graph =
798                                                     STREQ(str, "graph");
799                                                 boolean is_lower =
800                                                     STREQ(str, "lower");
801                                                 boolean is_print =
802                                                     STREQ(str, "print");
803                                                 boolean is_punct =
804                                                     STREQ(str, "punct");
805                                                 boolean is_space =
806                                                     STREQ(str, "space");
807                                                 boolean is_upper =
808                                                     STREQ(str, "upper");
809                                                 boolean is_xdigit =
810                                                     STREQ(str, "xdigit");
811
812                                                 if (!IS_CHAR_CLASS(str))
813                                                         return REG_ECTYPE;
814
815                                                 PATFETCH(c);
816
817                                                 if (p == pend)
818                                                         return REG_EBRACK;
819
820                                                 for (ch = 0; ch < 1 <<
821                                                      BYTEWIDTH; ch++) {
822                                                         if ((is_alnum &&
823                                                              ISALNUM(ch)) ||
824                                                             (is_alpha &&
825                                                              ISALPHA(ch)) ||
826                                                             (is_blank &&
827                                                              ISBLANK(ch)) ||
828                                                             (is_cntrl &&
829                                                              ISCNTRL(ch)) ||
830                                                             (is_digit &&
831                                                              ISDIGIT(ch)) ||
832                                                             (is_graph &&
833                                                              ISGRAPH(ch)) ||
834                                                             (is_lower &&
835                                                              ISLOWER(ch)) ||
836                                                             (is_print &&
837                                                              ISPRINT(ch)) ||
838                                                             (is_punct &&
839                                                              ISPUNCT(ch)) ||
840                                                             (is_space &&
841                                                              ISSPACE(ch)) ||
842                                                             (is_upper &&
843                                                              ISUPPER(ch)) ||
844                                                             (is_xdigit &&
845                                                              ISXDIGIT(ch)))
846                                                                 SET_LIST_BIT(ch);
847                                                 }
848                                                 had_char_class =
849                                                     true;
850                                         } else {
851                                                 c1++;
852                                                 while (c1--)
853                                                         PATUNFETCH;
854                                                 SET_LIST_BIT('[');
855                                                 SET_LIST_BIT(':');
856                                                 had_char_class = false;
857                                         }
858                                 } else {
859                                         had_char_class = false;
860                                         SET_LIST_BIT(c);
861                                 }
862                         }
863
864                         while ((int) b[-1] > 0
865                                && b[b[-1] - 1] == 0)
866                                 b[-1]--;
867                         b += b[-1];
868                 }
869                 break;
870
871                 case '(':
872                 if (syntax & RE_NO_BK_PARENS)
873                         goto handle_open;
874                 else
875                         goto normal_char;
876
877
878                 case ')':
879                 if (syntax & RE_NO_BK_PARENS)
880                         goto handle_close;
881                 else
882                         goto normal_char;
883
884
885                 case '\n':
886                 if (syntax & RE_NEWLINE_ALT)
887                         goto handle_alt;
888                 else
889                         goto normal_char;
890
891
892                 case '|':
893                 if (syntax & RE_NO_BK_VBAR)
894                         goto handle_alt;
895                 else
896                         goto normal_char;
897
898
899                 case '{':
900                 if (syntax & RE_INTERVALS
901                     && syntax & RE_NO_BK_BRACES)
902                         goto handle_interval;
903                 else
904                         goto normal_char;
905
906
907                 case '\\':
908                 if (p == pend)
909                         return REG_EESCAPE;
910
911                 PATFETCH_RAW(c);
912
913                 switch (c) {
914                         case '(':
915                         if (syntax & RE_NO_BK_PARENS)
916                                 goto normal_backslash;
917
918                       handle_open:
919                         bufp->re_nsub++;
920                         regnum++;
921
922                         if (COMPILE_STACK_FULL) {
923                                 RETALLOC(compile_stack.stack,
924                                          compile_stack.size << 1,
925                                          compile_stack_elt_t);
926                                 if (compile_stack.stack == NULL)
927                                         return REG_ESPACE;
928
929                                 compile_stack.size <<= 1;
930                         }
931
932                         COMPILE_STACK_TOP.begalt_offset =
933                             begalt - bufp->buffer;
934                         COMPILE_STACK_TOP.fixup_alt_jump =
935                             fixup_alt_jump ? fixup_alt_jump -
936                             bufp->buffer + 1 : 0;
937                         COMPILE_STACK_TOP.laststart_offset =
938                             b - bufp->buffer;
939                         COMPILE_STACK_TOP.regnum = regnum;
940
941                         if (regnum <= MAX_REGNUM) {
942                                 COMPILE_STACK_TOP.inner_group_offset =
943                                     b - bufp->buffer + 2;
944                                 BUF_PUSH_3(start_memory, regnum, 0);
945                         }
946
947                         compile_stack.avail++;
948
949                         fixup_alt_jump = 0;
950                         laststart = 0;
951                         begalt = b;
952                         pending_exact = 0;
953                         break;
954
955                         case ')':
956                         if (syntax & RE_NO_BK_PARENS)
957                                 goto normal_backslash;
958
959                         if (COMPILE_STACK_EMPTY) {
960                                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
961                                         goto normal_backslash;
962                                 else
963                                         return REG_ERPAREN;
964                         }
965
966                       handle_close:
967                         if (fixup_alt_jump) {
968                                 BUF_PUSH(push_dummy_failure);
969                                 STORE_JUMP(jump_past_alt,
970                                            fixup_alt_jump, b - 1);
971                         }
972
973                         if (COMPILE_STACK_EMPTY) {
974                                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
975                                         goto normal_char;
976                                 else
977                                         return REG_ERPAREN;
978                         }
979
980                         assert(compile_stack.avail != 0);
981                         {
982                                 regnum_t this_group_regnum;
983
984                                 compile_stack.avail--;
985                                 begalt = bufp->buffer +
986                                             COMPILE_STACK_TOP.begalt_offset;
987                                 fixup_alt_jump =
988                                             COMPILE_STACK_TOP.fixup_alt_jump ?
989                                             bufp->buffer + COMPILE_STACK_TOP.
990                                             fixup_alt_jump - 1 : 0;
991                                 laststart = bufp->buffer +
992                                             COMPILE_STACK_TOP.laststart_offset;
993                                 this_group_regnum = COMPILE_STACK_TOP.regnum;
994                                 pending_exact = 0;
995
996                                 if (this_group_regnum <= MAX_REGNUM) {
997                                         unsigned char
998                                         *inner_group_loc = bufp->buffer +
999                                                 COMPILE_STACK_TOP.
1000                                                 inner_group_offset;
1001
1002                                         *inner_group_loc = regnum -
1003                                                 this_group_regnum;
1004                                         BUF_PUSH_3(stop_memory,
1005                                                    this_group_regnum,
1006                                                    regnum - this_group_regnum);
1007                                 }
1008                         }
1009                         break;
1010
1011
1012                         case '|':       /* `\|'.  */
1013                         if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1014                                 goto normal_backslash;
1015                       handle_alt:
1016                         if (syntax & RE_LIMITED_OPS)
1017                                 goto normal_char;
1018
1019                         GET_BUFFER_SPACE(3);
1020                         INSERT_JUMP(on_failure_jump, begalt, b + 6);
1021                         pending_exact = 0;
1022                         b += 3;
1023
1024                         if (fixup_alt_jump)
1025                                 STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
1026
1027                         fixup_alt_jump = b;
1028                         GET_BUFFER_SPACE(3);
1029                         b += 3;
1030
1031                         laststart = 0;
1032                         begalt = b;
1033                         break;
1034
1035
1036                         case '{':
1037                         /* If \{ is a literal.  */
1038                         if (!(syntax & RE_INTERVALS) || ((syntax & RE_INTERVALS)
1039                             && (syntax & RE_NO_BK_BRACES))
1040                             || (p - 2 == pattern && p == pend))
1041                                 goto normal_backslash;
1042
1043                       handle_interval:
1044                         {
1045                                 int lower_bound = -1, upper_bound = -1;
1046                                 beg_interval = p - 1;
1047
1048                                 if (p == pend) {
1049                                         if (syntax & RE_NO_BK_BRACES)
1050                                                 goto unfetch_interval;
1051                                         else
1052                                                 return REG_EBRACE;
1053                                 }
1054
1055                                 GET_UNSIGNED_NUMBER(lower_bound);
1056
1057                                 if (c == ',') {
1058                                         GET_UNSIGNED_NUMBER(upper_bound);
1059                                         if (upper_bound < 0)
1060                                                 upper_bound = RE_DUP_MAX;
1061                                 } else
1062                                         upper_bound = lower_bound;
1063
1064                                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1065                                     || lower_bound > upper_bound) {
1066                                         if (syntax & RE_NO_BK_BRACES)
1067                                                 goto unfetch_interval;
1068                                         else
1069                                                 return REG_BADBR;
1070                                 }
1071
1072                                 if (!(syntax & RE_NO_BK_BRACES)) {
1073                                         if (c != '\\')
1074                                                 return REG_EBRACE;
1075
1076                                         PATFETCH(c);
1077                                 }
1078
1079                                 if (c != '}') {
1080                                         if (syntax & RE_NO_BK_BRACES)
1081                                                 goto unfetch_interval;
1082                                         else
1083                                                 return REG_BADBR;
1084                                 }
1085
1086                                 if (!laststart) {
1087                                         if (syntax & RE_CONTEXT_INVALID_OPS)
1088                                                 return REG_BADRPT;
1089                                         else if (syntax & RE_CONTEXT_INDEP_OPS)
1090                                                 laststart = b;
1091                                         else
1092                                                 goto unfetch_interval;
1093                                 }
1094
1095                                 if (upper_bound == 0) {
1096                                         GET_BUFFER_SPACE(3);
1097                                         INSERT_JUMP(jump, laststart, b + 3);
1098                                         b += 3;
1099                                 }
1100
1101                                 else {
1102                                         unsigned nbytes =
1103                                             10 + (upper_bound > 1) * 10;
1104
1105                                         GET_BUFFER_SPACE(nbytes);
1106
1107                                         INSERT_JUMP2(succeed_n, laststart,
1108                                                      b + 5 + (upper_bound >
1109                                                       1) * 5, lower_bound);
1110                                         b += 5;
1111
1112                                         insert_op2(set_number_at, laststart, 5,
1113                                                    lower_bound, b);
1114                                         b += 5;
1115
1116                                         if (upper_bound > 1) {
1117                                                 STORE_JUMP2(jump_n, b,
1118                                                             laststart + 5,
1119                                                             upper_bound - 1);
1120                                                 b += 5;
1121
1122                                                 insert_op2(set_number_at,
1123                                                            laststart,
1124                                                            b - laststart,
1125                                                            upper_bound - 1, b);
1126                                                 b += 5;
1127                                         }
1128                                 }
1129                                 pending_exact = 0;
1130                                 beg_interval = NULL;
1131                         }
1132                         break;
1133
1134                       unfetch_interval:
1135                         assert(beg_interval);
1136                         p = beg_interval;
1137                         beg_interval = NULL;
1138
1139                         /* normal_char and normal_backslash need `c'.  */
1140                         PATFETCH(c);
1141
1142                         if (!(syntax & RE_NO_BK_BRACES)) {
1143                                 if (p > pattern && p[-1] == '\\')
1144                                         goto normal_backslash;
1145                         }
1146                         goto normal_char;
1147
1148                         case 'w':
1149                                 if (re_syntax_options & RE_NO_GNU_OPS)
1150                                         goto normal_char;
1151                                 laststart = b;
1152                                 BUF_PUSH(wordchar);
1153                                 break;
1154
1155
1156                         case 'W':
1157                                 if (re_syntax_options & RE_NO_GNU_OPS)
1158                                         goto normal_char;
1159                                 laststart = b;
1160                                 BUF_PUSH(notwordchar);
1161                                 break;
1162
1163
1164                         case '<':
1165                                 if (re_syntax_options & RE_NO_GNU_OPS)
1166                                         goto normal_char;
1167                                 BUF_PUSH(wordbeg);
1168                                 break;
1169
1170                         case '>':
1171                                 if (re_syntax_options & RE_NO_GNU_OPS)
1172                                         goto normal_char;
1173                                 BUF_PUSH(wordend);
1174                                 break;
1175
1176                         case 'b':
1177                                 if (re_syntax_options & RE_NO_GNU_OPS)
1178                                         goto normal_char;
1179                                 BUF_PUSH(wordbound);
1180                                 break;
1181
1182                         case 'B':
1183                                 if (re_syntax_options & RE_NO_GNU_OPS)
1184                                         goto normal_char;
1185                                 BUF_PUSH(notwordbound);
1186                                 break;
1187
1188                         case '`':
1189                                 if (re_syntax_options & RE_NO_GNU_OPS)
1190                                         goto normal_char;
1191                                 BUF_PUSH(begbuf);
1192                                 break;
1193
1194                         case '\'':
1195                                 if (re_syntax_options & RE_NO_GNU_OPS)
1196                                         goto normal_char;
1197                                 BUF_PUSH(endbuf);
1198                                 break;
1199
1200                         case '1':
1201                         case '2':
1202                         case '3':
1203                         case '4':
1204                         case '5':
1205                         case '6':
1206                         case '7':
1207                         case '8':
1208                         case '9':
1209                                 if (syntax & RE_NO_BK_REFS)
1210                                         goto normal_char;
1211
1212                                 c1 = c - '0';
1213
1214                                 if (c1 > regnum)
1215                                         return REG_ESUBREG;
1216
1217                                 /* Can't back reference to a subexpression if inside of it.  */
1218                                 if (group_in_compile_stack
1219                                     (compile_stack, (regnum_t) c1))
1220                                         goto normal_char;
1221
1222                                 laststart = b;
1223                                 BUF_PUSH_2(duplicate, c1);
1224                                 break;
1225
1226
1227                         case '+':
1228                         case '?':
1229                                 if (syntax & RE_BK_PLUS_QM)
1230                                         goto handle_plus;
1231                                 else
1232                                         goto normal_backslash;
1233
1234                         default:
1235                               normal_backslash:
1236                                 /* You might think it would be useful for \ to mean
1237                                    not to translate; but if we don't translate it
1238                                    it will never match anything.  */
1239                                 c = TRANSLATE(c);
1240                                 goto normal_char;
1241                         }
1242                         break;
1243
1244
1245                 default:
1246                         /* Expects the character in `c'.  */
1247                       normal_char:
1248                         /* If no exactn currently being built.  */
1249                         if (!pending_exact
1250                             /* If last exactn not at current position.  */
1251                             || pending_exact + *pending_exact + 1 != b
1252                             /* We have only one byte following the exactn for the count.  */
1253                             || *pending_exact == (1 << BYTEWIDTH) - 1
1254                             /* If followed by a repetition operator.  */
1255                             || *p == '*' || *p == '^'
1256                             || ((syntax & RE_BK_PLUS_QM)
1257                                 ? *p == '\\' && (p[1] == '+'
1258                                                  || p[1] == '?')
1259                                 : (*p == '+' || *p == '?'))
1260                             || ((syntax & RE_INTERVALS)
1261                                 && ((syntax & RE_NO_BK_BRACES)
1262                                     ? *p == '{'
1263                                     : (p[0] == '\\' && p[1] == '{')))) {
1264                                 /* Start building a new exactn.  */
1265
1266                                 laststart = b;
1267
1268                                 BUF_PUSH_2(exactn, 0);
1269                                 pending_exact = b - 1;
1270                         }
1271
1272                         BUF_PUSH(c);
1273                         (*pending_exact)++;
1274                         break;
1275                 }               /* switch (c) */
1276         }                       /* while p != pend */
1277
1278
1279         /* Through the pattern now.  */
1280
1281         if (fixup_alt_jump)
1282                 STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
1283
1284         if (!COMPILE_STACK_EMPTY)
1285                 return REG_EPAREN;
1286
1287         free(compile_stack.stack);
1288
1289         /* We have succeeded; set the length of the buffer.  */
1290         bufp->used = b - bufp->buffer;
1291
1292         return REG_NOERROR;
1293 }                               /* regex_compile */
1294
1295 /* Subroutines for `regex_compile'.  */
1296
1297 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
1298
1299 static void store_op1(op, loc, arg)
1300 re_opcode_t op;
1301 unsigned char *loc;
1302 int arg;
1303 {
1304         *loc = (unsigned char) op;
1305         STORE_NUMBER(loc + 1, arg);
1306 }
1307
1308
1309 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
1310
1311 static void store_op2(op, loc, arg1, arg2)
1312 re_opcode_t op;
1313 unsigned char *loc;
1314 int arg1, arg2;
1315 {
1316         *loc = (unsigned char) op;
1317         STORE_NUMBER(loc + 1, arg1);
1318         STORE_NUMBER(loc + 3, arg2);
1319 }
1320
1321
1322 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
1323    for OP followed by two-byte integer parameter ARG.  */
1324
1325 static void insert_op1(op, loc, arg, end)
1326 re_opcode_t op;
1327 unsigned char *loc;
1328 int arg;
1329 unsigned char *end;
1330 {
1331         register unsigned char *pfrom = end;
1332         register unsigned char *pto = end + 3;
1333
1334         while (pfrom != loc)
1335                 *--pto = *--pfrom;
1336
1337         store_op1(op, loc, arg);
1338 }
1339
1340
1341 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
1342
1343 static void insert_op2(op, loc, arg1, arg2, end)
1344 re_opcode_t op;
1345 unsigned char *loc;
1346 int arg1, arg2;
1347 unsigned char *end;
1348 {
1349         register unsigned char *pfrom = end;
1350         register unsigned char *pto = end + 5;
1351
1352         while (pfrom != loc)
1353                 *--pto = *--pfrom;
1354
1355         store_op2(op, loc, arg1, arg2);
1356 }
1357
1358
1359 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
1360    after an alternative or a begin-subexpression.  We assume there is at
1361    least one character before the ^.  */
1362
1363 static boolean at_begline_loc_p(pattern, p, syntax)
1364 const char *pattern, *p;
1365 reg_syntax_t syntax;
1366 {
1367         const char *prev = p - 2;
1368         boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
1369
1370         return
1371             /* After a subexpression?  */
1372             (*prev == '('
1373              && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
1374             /* After an alternative?  */
1375             || (*prev == '|'
1376                 && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
1377 }
1378
1379
1380 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
1381    at least one character after the $, i.e., `P < PEND'.  */
1382
1383 static boolean at_endline_loc_p(p, pend, syntax)
1384 const char *p, *pend;
1385 reg_syntax_t syntax;
1386 {
1387         const char *next = p;
1388         boolean next_backslash = *next == '\\';
1389         const char *next_next = p + 1 < pend ? p + 1 : NULL;
1390
1391         return
1392             /* Before a subexpression?  */
1393             (syntax & RE_NO_BK_PARENS ? *next == ')'
1394              : next_backslash && next_next && *next_next == ')')
1395             /* Before an alternative?  */
1396             || (syntax & RE_NO_BK_VBAR ? *next == '|'
1397                 : next_backslash && next_next && *next_next == '|');
1398 }
1399
1400
1401 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
1402    false if it's not.  */
1403
1404 static boolean group_in_compile_stack(compile_stack, regnum)
1405 compile_stack_type compile_stack;
1406 regnum_t regnum;
1407 {
1408         int this_element;
1409
1410         for (this_element = compile_stack.avail - 1;
1411              this_element >= 0; this_element--)
1412                 if (compile_stack.stack[this_element].regnum == regnum)
1413                         return true;
1414
1415         return false;
1416 }
1417
1418
1419 /* Read the ending character of a range (in a bracket expression) from the
1420    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
1421    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
1422    Then we set the translation of all bits between the starting and
1423    ending characters (inclusive) in the compiled pattern B.
1424
1425    Return an error code.
1426
1427    We use these short variable names so we can use the same macros as
1428    `regex_compile' itself.  */
1429
1430 static reg_errcode_t compile_range(p_ptr, pend, translate, syntax, b)
1431 const char **p_ptr, *pend;
1432 char *translate;
1433 reg_syntax_t syntax;
1434 unsigned char *b;
1435 {
1436         unsigned this_char;
1437
1438         const char *p = *p_ptr;
1439         int range_start, range_end;
1440
1441         if (p == pend)
1442                 return REG_ERANGE;
1443
1444         /* Even though the pattern is a signed `char *', we need to fetch
1445            with unsigned char *'s; if the high bit of the pattern character
1446            is set, the range endpoints will be negative if we fetch using a
1447            signed char *.
1448
1449            We also want to fetch the endpoints without translating them; the
1450            appropriate translation is done in the bit-setting loop below.  */
1451         range_start = ((unsigned char *) p)[-2];
1452         range_end = ((unsigned char *) p)[0];
1453
1454         /* Have to increment the pointer into the pattern string, so the
1455            caller isn't still at the ending character.  */
1456         (*p_ptr)++;
1457
1458         /* If the start is after the end, the range is empty.  */
1459         if (range_start > range_end)
1460                 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE :
1461                     REG_NOERROR;
1462
1463         /* Here we see why `this_char' has to be larger than an `unsigned
1464            char' -- the range is inclusive, so if `range_end' == 0xff
1465            (assuming 8-bit characters), we would otherwise go into an infinite
1466            loop, since all characters <= 0xff.  */
1467         for (this_char = range_start; this_char <= range_end; this_char++) {
1468                 SET_LIST_BIT(TRANSLATE(this_char));
1469         }
1470         return REG_NOERROR;
1471 }
1472
1473 /* Failure stack declarations and macros; both re_compile_fastmap and
1474    re_match_2 use a failure stack.  These have to be macros because of
1475    REGEX_ALLOCATE.  */
1476
1477
1478 /* Number of failure points for which to initially allocate space
1479    when matching.  If this number is exceeded, we allocate more
1480    space, so it is not a hard limit.  */
1481 #define INIT_FAILURE_ALLOC 5
1482
1483 /* Roughly the maximum number of failure points on the stack.  Would be
1484    exactly that if always used MAX_FAILURE_SPACE each time we failed.
1485    This is a variable only so users of regex can assign to it; we never
1486    change it ourselves.  */
1487 int re_max_failures = 2000;
1488
1489 typedef const unsigned char *fail_stack_elt_t;
1490
1491 typedef struct {
1492         fail_stack_elt_t *stack;
1493         unsigned size;
1494         unsigned avail;         /* Offset of next open position.  */
1495 } fail_stack_type;
1496
1497 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1498 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1499 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1500 #define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
1501
1502
1503 /* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
1504
1505 #define INIT_FAIL_STACK()                                               \
1506   do {                                                                  \
1507     fail_stack.stack = (fail_stack_elt_t *)                             \
1508       REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));  \
1509                                                                         \
1510     if (fail_stack.stack == NULL)                                       \
1511       return -2;                                                        \
1512                                                                         \
1513     fail_stack.size = INIT_FAILURE_ALLOC;                               \
1514     fail_stack.avail = 0;                                               \
1515   } while (0)
1516
1517
1518 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1519
1520    Return 1 if succeeds, and 0 if either ran out of memory
1521    allocating space for it or it was already too large.
1522
1523    REGEX_REALLOCATE requires `destination' be declared.   */
1524
1525 #define DOUBLE_FAIL_STACK(fail_stack)                                   \
1526   ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS              \
1527    ? 0                                                                  \
1528    : ((fail_stack).stack = (fail_stack_elt_t *)                         \
1529         REGEX_REALLOCATE ((fail_stack).stack,                           \
1530           (fail_stack).size * sizeof (fail_stack_elt_t),                \
1531           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
1532                                                                         \
1533       (fail_stack).stack == NULL                                        \
1534       ? 0                                                               \
1535       : ((fail_stack).size <<= 1,                                       \
1536          1)))
1537
1538
1539 /* Push PATTERN_OP on FAIL_STACK.
1540
1541    Return 1 if was able to do so and 0 if ran out of memory allocating
1542    space to do so.  */
1543 #define PUSH_PATTERN_OP(pattern_op, fail_stack)                         \
1544   ((FAIL_STACK_FULL ()                                                  \
1545     && !DOUBLE_FAIL_STACK (fail_stack))                                 \
1546     ? 0                                                                 \
1547     : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,           \
1548        1))
1549
1550 /* This pushes an item onto the failure stack.  Must be a four-byte
1551    value.  Assumes the variable `fail_stack'.  Probably should only
1552    be called from within `PUSH_FAILURE_POINT'.  */
1553 #define PUSH_FAILURE_ITEM(item)                                         \
1554   fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
1555
1556 /* The complement operation.  Assumes `fail_stack' is nonempty.  */
1557 #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
1558
1559 /* Used to omit pushing failure point id's when we're not debugging.  */
1560 #define DEBUG_PUSH(item)
1561 #define DEBUG_POP(item_addr)
1562
1563
1564 /* Push the information about the state we will need
1565    if we ever fail back to it.
1566
1567    Requires variables fail_stack, regstart, regend, reg_info, and
1568    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
1569    declared.
1570
1571    Does `return FAILURE_CODE' if runs out of memory.  */
1572
1573 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
1574   do {                                                                  \
1575     char *destination;                                                  \
1576     /* Must be int, so when we don't save any registers, the arithmetic \
1577        of 0 + -1 isn't done as unsigned.  */                            \
1578     /* Can't be int, since there is not a shred of a guarantee that int \
1579        is wide enough to hold a value of something to which pointer can \
1580        be assigned */                                                   \
1581     s_reg_t this_reg;                                                   \
1582                                                                         \
1583     DEBUG_STATEMENT (failure_id++);                                     \
1584     DEBUG_STATEMENT (nfailure_points_pushed++);                         \
1585     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);           \
1586     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1587     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1588                                                                         \
1589     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);           \
1590     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);       \
1591                                                                         \
1592     /* Ensure we have enough space allocated for what we will push.  */ \
1593     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                   \
1594       {                                                                 \
1595         if (!DOUBLE_FAIL_STACK (fail_stack))                    \
1596           return failure_code;                                          \
1597                                                                         \
1598         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",              \
1599                        (fail_stack).size);                              \
1600         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1601       }
1602
1603 #define PUSH_FAILURE_POINT2(pattern_place, string_place, failure_code)  \
1604     /* Push the info, starting with the registers.  */                  \
1605     DEBUG_PRINT1 ("\n");                                                \
1606                                                                         \
1607     PUSH_FAILURE_POINT_LOOP ();                                         \
1608                                                                         \
1609     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
1610     PUSH_FAILURE_ITEM (lowest_active_reg);                              \
1611                                                                         \
1612     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
1613     PUSH_FAILURE_ITEM (highest_active_reg);                             \
1614                                                                         \
1615     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);           \
1616     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);           \
1617     PUSH_FAILURE_ITEM (pattern_place);                                  \
1618                                                                         \
1619     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);            \
1620     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1621                                  size2);                                \
1622     DEBUG_PRINT1 ("'\n");                                               \
1623     PUSH_FAILURE_ITEM (string_place);                                   \
1624                                                                         \
1625     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);            \
1626     DEBUG_PUSH (failure_id);                                            \
1627   } while (0)
1628
1629 /*  Pulled out of PUSH_FAILURE_POINT() to shorten the definition
1630     of that macro.  (for VAX C) */
1631 #define PUSH_FAILURE_POINT_LOOP()                                       \
1632     for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;  \
1633          this_reg++)                                                    \
1634       {                                                                 \
1635         DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);                 \
1636         DEBUG_STATEMENT (num_regs_pushed++);                            \
1637                                                                         \
1638         DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);         \
1639         PUSH_FAILURE_ITEM (regstart[this_reg]);                         \
1640                                                                         \
1641         DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);             \
1642         PUSH_FAILURE_ITEM (regend[this_reg]);                           \
1643                                                                         \
1644         DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);    \
1645         DEBUG_PRINT2 (" match_null=%d",                                 \
1646                       REG_MATCH_NULL_STRING_P (reg_info[this_reg]));    \
1647         DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));    \
1648         DEBUG_PRINT2 (" matched_something=%d",                          \
1649                       MATCHED_SOMETHING (reg_info[this_reg]));          \
1650         DEBUG_PRINT2 (" ever_matched=%d",                               \
1651                       EVER_MATCHED_SOMETHING (reg_info[this_reg]));     \
1652         DEBUG_PRINT1 ("\n");                                            \
1653         PUSH_FAILURE_ITEM (reg_info[this_reg].word);                    \
1654       }
1655
1656 /* This is the number of items that are pushed and popped on the stack
1657    for each register.  */
1658 #define NUM_REG_ITEMS  3
1659
1660 /* Individual items aside from the registers.  */
1661 #define NUM_NONREG_ITEMS 4
1662
1663 /* We push at most this many items on the stack.  */
1664 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1665
1666 /* We actually push this many items.  */
1667 #define NUM_FAILURE_ITEMS                                               \
1668   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS         \
1669     + NUM_NONREG_ITEMS)
1670
1671 /* How many items can still be added to the stack without overflowing it.  */
1672 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1673
1674
1675 /* Pops what PUSH_FAIL_STACK pushes.
1676
1677    We restore into the parameters, all of which should be lvalues:
1678      STR -- the saved data position.
1679      PAT -- the saved pattern position.
1680      LOW_REG, HIGH_REG -- the highest and lowest active registers.
1681      REGSTART, REGEND -- arrays of string positions.
1682      REG_INFO -- array of information about each subexpression.
1683
1684    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1685    `pend', `string1', `size1', `string2', and `size2'.  */
1686
1687 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1688 {                                                                       \
1689   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)                        \
1690   s_reg_t this_reg;                                                     \
1691   const unsigned char *string_temp;                                     \
1692                                                                         \
1693   assert (!FAIL_STACK_EMPTY ());                                        \
1694                                                                         \
1695   /* Remove failure points and point to how many regs pushed.  */       \
1696   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                                \
1697   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);    \
1698   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);     \
1699                                                                         \
1700   assert (fail_stack.avail >= NUM_NONREG_ITEMS);                        \
1701                                                                         \
1702   DEBUG_POP (&failure_id);                                              \
1703   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);              \
1704                                                                         \
1705   /* If the saved string location is NULL, it came from an              \
1706      on_failure_keep_string_jump opcode, and we want to throw away the  \
1707      saved NULL, thus retaining our current position in the string.  */ \
1708   string_temp = POP_FAILURE_ITEM ();                                    \
1709   if (string_temp != NULL)                                              \
1710     str = (const char *) string_temp;                                   \
1711                                                                         \
1712   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);                       \
1713   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
1714   DEBUG_PRINT1 ("'\n");                                                 \
1715                                                                         \
1716   pat = (unsigned char *) POP_FAILURE_ITEM ();                          \
1717   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);                       \
1718   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
1719                                                                         \
1720   POP_FAILURE_POINT2 (low_reg, high_reg, regstart, regend, reg_info);
1721
1722 /*  Pulled out of POP_FAILURE_POINT() to shorten the definition
1723     of that macro.  (for MSC 5.1) */
1724 #define POP_FAILURE_POINT2(low_reg, high_reg, regstart, regend, reg_info) \
1725                                                                         \
1726   /* Restore register info.  */                                         \
1727   high_reg = (active_reg_t) POP_FAILURE_ITEM ();                        \
1728   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);           \
1729                                                                         \
1730   low_reg = (active_reg_t) POP_FAILURE_ITEM ();                         \
1731   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);            \
1732                                                                         \
1733   for (this_reg = high_reg; this_reg >= low_reg; this_reg--)            \
1734     {                                                                   \
1735       DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);                 \
1736                                                                         \
1737       reg_info[this_reg].word = POP_FAILURE_ITEM ();                    \
1738       DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);          \
1739                                                                         \
1740       regend[this_reg] = (const char *) POP_FAILURE_ITEM ();            \
1741       DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);             \
1742                                                                         \
1743       regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();          \
1744       DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);         \
1745     }                                                                   \
1746                                                                         \
1747   DEBUG_STATEMENT (nfailure_points_popped++);                           \
1748 }                               /* POP_FAILURE_POINT */
1749
1750 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
1751    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
1752    characters can start a string that matches the pattern.  This fastmap
1753    is used by re_search to skip quickly over impossible starting points.
1754
1755    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
1756    area as BUFP->fastmap.
1757
1758    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
1759    the pattern buffer.
1760
1761    Returns 0 if we succeed, -2 if an internal error.   */
1762
1763 int re_compile_fastmap(bufp)
1764 struct re_pattern_buffer *bufp;
1765 {
1766         int j, k;
1767         fail_stack_type fail_stack;
1768         char *destination;
1769         /* We don't push any register information onto the failure stack.  */
1770         unsigned num_regs = 0;
1771
1772         register char *fastmap = bufp->fastmap;
1773         unsigned char *pattern = bufp->buffer;
1774         const unsigned char *p = pattern;
1775         register unsigned char *pend = pattern + bufp->used;
1776
1777         /* Assume that each path through the pattern can be null until
1778            proven otherwise.  We set this false at the bottom of switch
1779            statement, to which we get only if a particular path doesn't
1780            match the empty string.  */
1781         boolean path_can_be_null = true;
1782
1783         /* We aren't doing a `succeed_n' to begin with.  */
1784         boolean succeed_n_p = false;
1785
1786         assert(fastmap != NULL && p != NULL);
1787
1788         INIT_FAIL_STACK();
1789         bzero(fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid.  */
1790         bufp->fastmap_accurate = 1;     /* It will be when we're done.  */
1791         bufp->can_be_null = 0;
1792
1793         while (p != pend || !FAIL_STACK_EMPTY()) {
1794                 if (p == pend) {
1795                         bufp->can_be_null |= path_can_be_null;
1796
1797                         /* Reset for next path.  */
1798                         path_can_be_null = true;
1799
1800                         p = fail_stack.stack[--fail_stack.avail];
1801                 }
1802
1803                 /* We should never be about to go beyond the end of the pattern.  */
1804                 assert(p < pend);
1805
1806                 switch ((re_opcode_t) * p++) {
1807
1808                         /* I guess the idea here is to simply not bother with a fastmap
1809                            if a backreference is used, since it's too hard to figure out
1810                            the fastmap for the corresponding group.  Setting
1811                            `can_be_null' stops `re_search_2' from using the fastmap, so
1812                            that is all we do.  */
1813                 case duplicate:
1814                         bufp->can_be_null = 1;
1815                         return 0;
1816
1817
1818                         /* Following are the cases which match a character.  These end
1819                            with `break'.  */
1820
1821                 case exactn:
1822                         fastmap[p[1]] = 1;
1823                         break;
1824
1825
1826                 case charset:
1827                         for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
1828                                 if (p[j / BYTEWIDTH] &
1829                                     (1 << (j % BYTEWIDTH)))
1830                                         fastmap[j] = 1;
1831                         break;
1832
1833
1834                 case charset_not:
1835                         /* Chars beyond end of map must be allowed.  */
1836                         for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
1837                                 fastmap[j] = 1;
1838
1839                         for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
1840                                 if (!
1841                                     (p[j / BYTEWIDTH] &
1842                                      (1 << (j % BYTEWIDTH))))
1843                                         fastmap[j] = 1;
1844                         break;
1845
1846
1847                 case wordchar:
1848                         for (j = 0; j < (1 << BYTEWIDTH); j++)
1849                                 if (SYNTAX(j) == Sword)
1850                                         fastmap[j] = 1;
1851                         break;
1852
1853
1854                 case notwordchar:
1855                         for (j = 0; j < (1 << BYTEWIDTH); j++)
1856                                 if (SYNTAX(j) != Sword)
1857                                         fastmap[j] = 1;
1858                         break;
1859
1860
1861                 case anychar:
1862                         /* `.' matches anything ...  */
1863                         for (j = 0; j < (1 << BYTEWIDTH); j++)
1864                                 fastmap[j] = 1;
1865
1866                         /* ... except perhaps newline.  */
1867                         if (!(bufp->syntax & RE_DOT_NEWLINE))
1868                                 fastmap['\n'] = 0;
1869
1870                         /* Return if we have already set `can_be_null'; if we have,
1871                            then the fastmap is irrelevant.  Something's wrong here.  */
1872                         else if (bufp->can_be_null)
1873                                 return 0;
1874
1875                         /* Otherwise, have to check alternative paths.  */
1876                         break;
1877
1878                 case no_op:
1879                 case begline:
1880                 case endline:
1881                 case begbuf:
1882                 case endbuf:
1883                 case wordbound:
1884                 case notwordbound:
1885                 case wordbeg:
1886                 case wordend:
1887                 case push_dummy_failure:
1888                         continue;
1889
1890
1891                 case jump_n:
1892                 case pop_failure_jump:
1893                 case maybe_pop_jump:
1894                 case jump:
1895                 case jump_past_alt:
1896                 case dummy_failure_jump:
1897                         EXTRACT_NUMBER_AND_INCR(j, p);
1898                         p += j;
1899                         if (j > 0)
1900                                 continue;
1901
1902                         /* Jump backward implies we just went through the body of a
1903                            loop and matched nothing.  Opcode jumped to should be
1904                            `on_failure_jump' or `succeed_n'.  Just treat it like an
1905                            ordinary jump.  For a * loop, it has pushed its failure
1906                            point already; if so, discard that as redundant.  */
1907                         if ((re_opcode_t) * p != on_failure_jump
1908                             && (re_opcode_t) * p != succeed_n)
1909                                 continue;
1910
1911                         p++;
1912                         EXTRACT_NUMBER_AND_INCR(j, p);
1913                         p += j;
1914
1915                         /* If what's on the stack is where we are now, pop it.  */
1916                         if (!FAIL_STACK_EMPTY()
1917                             && fail_stack.stack[fail_stack.avail - 1] == p)
1918                                 fail_stack.avail--;
1919
1920                         continue;
1921
1922
1923                 case on_failure_jump:
1924                 case on_failure_keep_string_jump:
1925                       handle_on_failure_jump:
1926                         EXTRACT_NUMBER_AND_INCR(j, p);
1927
1928                         /* For some patterns, e.g., `(a?)?', `p+j' here points to the
1929                            end of the pattern.  We don't want to push such a point,
1930                            since when we restore it above, entering the switch will
1931                            increment `p' past the end of the pattern.  We don't need
1932                            to push such a point since we obviously won't find any more
1933                            fastmap entries beyond `pend'.  Such a pattern can match
1934                            the null string, though.  */
1935                         if (p + j < pend) {
1936                                 if (!PUSH_PATTERN_OP(p + j, fail_stack))
1937                                         return -2;
1938                         } else
1939                                 bufp->can_be_null = 1;
1940
1941                         if (succeed_n_p) {
1942                                 EXTRACT_NUMBER_AND_INCR(k, p);  /* Skip the n.  */
1943                                 succeed_n_p = false;
1944                         }
1945
1946                         continue;
1947
1948
1949                 case succeed_n:
1950                         /* Get to the number of times to succeed.  */
1951                         p += 2;
1952
1953                         /* Increment p past the n for when k != 0.  */
1954                         EXTRACT_NUMBER_AND_INCR(k, p);
1955                         if (k == 0) {
1956                                 p -= 4;
1957                                 succeed_n_p = true;     /* Spaghetti code alert.  */
1958                                 goto handle_on_failure_jump;
1959                         }
1960                         continue;
1961
1962
1963                 case set_number_at:
1964                         p += 4;
1965                         continue;
1966
1967
1968                 case start_memory:
1969                 case stop_memory:
1970                         p += 2;
1971                         continue;
1972
1973
1974                 default:
1975                         abort();        /* We have listed all the cases.  */
1976                 }               /* switch *p++ */
1977
1978                 /* Getting here means we have found the possible starting
1979                    characters for one path of the pattern -- and that the empty
1980                    string does not match.  We need not follow this path further.
1981                    Instead, look at the next alternative (remembered on the
1982                    stack), or quit if no more.  The test at the top of the loop
1983                    does these things.  */
1984                 path_can_be_null = false;
1985                 p = pend;
1986         }                       /* while p */
1987
1988         /* Set `can_be_null' for the last path (also the first path, if the
1989            pattern is empty).  */
1990         bufp->can_be_null |= path_can_be_null;
1991         return 0;
1992 }                               /* re_compile_fastmap */
1993
1994 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
1995    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
1996    this memory for recording register information.  STARTS and ENDS
1997    must be allocated using the malloc library routine, and must each
1998    be at least NUM_REGS * sizeof (regoff_t) bytes long.
1999
2000    If NUM_REGS == 0, then subsequent matches should allocate their own
2001    register data.
2002
2003    Unless this function is called, the first search or match using
2004    PATTERN_BUFFER will allocate its own register data, without
2005    freeing the old data.  */
2006
2007 void re_set_registers(bufp, regs, num_regs, starts, ends)
2008 struct re_pattern_buffer *bufp;
2009 struct re_registers *regs;
2010 unsigned num_regs;
2011 regoff_t *starts, *ends;
2012 {
2013         if (num_regs) {
2014                 bufp->regs_allocated = REGS_REALLOCATE;
2015                 regs->num_regs = num_regs;
2016                 regs->start = starts;
2017                 regs->end = ends;
2018         } else {
2019                 bufp->regs_allocated = REGS_UNALLOCATED;
2020                 regs->num_regs = 0;
2021                 regs->start = regs->end = 0;
2022         }
2023 }
2024
2025 /* Searching routines.  */
2026
2027 /* Like re_search_2, below, but only one string is specified, and
2028    doesn't let you say where to stop matching. */
2029
2030 int re_search(bufp, string, size, startpos, range, regs)
2031 struct re_pattern_buffer *bufp;
2032 const char *string;
2033 int size, startpos, range;
2034 struct re_registers *regs;
2035 {
2036         return re_search_2(bufp, NULL, 0, string, size, startpos, range,
2037                            regs, size);
2038 }
2039
2040
2041 /* Using the compiled pattern in BUFP->buffer, first tries to match the
2042    virtual concatenation of STRING1 and STRING2, starting first at index
2043    STARTPOS, then at STARTPOS + 1, and so on.
2044
2045    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2046
2047    RANGE is how far to scan while trying to match.  RANGE = 0 means try
2048    only at STARTPOS; in general, the last start tried is STARTPOS +
2049    RANGE.
2050
2051    In REGS, return the indices of the virtual concatenation of STRING1
2052    and STRING2 that matched the entire BUFP->buffer and its contained
2053    subexpressions.
2054
2055    Do not consider matching one past the index STOP in the virtual
2056    concatenation of STRING1 and STRING2.
2057
2058    We return either the position in the strings at which the match was
2059    found, -1 if no match, or -2 if error (such as failure
2060    stack overflow).  */
2061
2062 int
2063 re_search_2(bufp, string1, size1, string2, size2, startpos, range, regs,
2064             stop)
2065 struct re_pattern_buffer *bufp;
2066 const char *string1, *string2;
2067 int size1, size2;
2068 int startpos;
2069 int range;
2070 struct re_registers *regs;
2071 int stop;
2072 {
2073         int val;
2074         register char *fastmap = bufp->fastmap;
2075         register char *translate = bufp->translate;
2076         int total_size = size1 + size2;
2077         int endpos = startpos + range;
2078
2079         /* Check for out-of-range STARTPOS.  */
2080         if (startpos < 0 || startpos > total_size)
2081                 return -1;
2082
2083         /* Fix up RANGE if it might eventually take us outside
2084            the virtual concatenation of STRING1 and STRING2.  */
2085         if (endpos < -1)
2086                 range = -1 - startpos;
2087         else if (endpos > total_size)
2088                 range = total_size - startpos;
2089
2090         /* If the search isn't to be a backwards one, don't waste time in a
2091            search for a pattern that must be anchored.  */
2092         if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf
2093             && range > 0) {
2094                 if (startpos > 0)
2095                         return -1;
2096                 else
2097                         range = 1;
2098         }
2099
2100         /* Update the fastmap now if not correct already.  */
2101         if (fastmap && !bufp->fastmap_accurate)
2102                 if (re_compile_fastmap(bufp) == -2)
2103                         return -2;
2104
2105         /* Loop through the string, looking for a place to start matching.  */
2106         for (;;) {
2107                 /* If a fastmap is supplied, skip quickly over characters that
2108                    cannot be the start of a match.  If the pattern can match the
2109                    null string, however, we don't need to skip characters; we want
2110                    the first null string.  */
2111                 if (fastmap && startpos < total_size && !bufp->can_be_null) {
2112                         if (range > 0) {        /* Searching forwards.  */
2113                                 register const char *d;
2114                                 register int lim = 0;
2115                                 int irange = range;
2116
2117                                 if (startpos < size1
2118                                     && startpos + range >= size1)
2119                                         lim = range - (size1 - startpos);
2120
2121                                 d = (startpos >=
2122                                      size1 ? string2 - size1 : string1) +
2123                                     startpos;
2124
2125                                 /* Written out as an if-else to avoid testing `translate'
2126                                    inside the loop.  */
2127                                 if (translate)
2128                                         while (range > lim
2129                                                && !fastmap[(unsigned char)
2130                                                            translate[(unsigned char) *d++]])
2131                                                 range--;
2132                                 else
2133                                         while (range > lim
2134                                                && !fastmap[(unsigned char)
2135                                                            *d++])
2136                                                 range--;
2137
2138                                 startpos += irange - range;
2139                         } else {        /* Searching backwards.  */
2140
2141                                 register char c = (size1 == 0
2142                                                    || startpos >=
2143                                                    size1 ? string2[startpos
2144                                                                    - size1]
2145                                                    : string1[startpos]);
2146
2147                                 if (!fastmap[(unsigned char) TRANSLATE(c)])
2148                                         goto advance;
2149                         }
2150                 }
2151
2152                 /* If can't match the null string, and that's all we have left, fail.  */
2153                 if (range >= 0 && startpos == total_size && fastmap
2154                     && !bufp->can_be_null)
2155                         return -1;
2156
2157                 val = re_match_2(bufp, string1, size1, string2, size2,
2158                                  startpos, regs, stop);
2159                 if (val >= 0)
2160                         return startpos;
2161
2162                 if (val == -2)
2163                         return -2;
2164
2165               advance:
2166                 if (!range)
2167                         break;
2168                 else if (range > 0) {
2169                         range--;
2170                         startpos++;
2171                 } else {
2172                         range++;
2173                         startpos--;
2174                 }
2175         }
2176         return -1;
2177 }                               /* re_search_2 */
2178
2179 /* Structure for per-register (a.k.a. per-group) information.
2180    This must not be longer than one word, because we push this value
2181    onto the failure stack.  Other register information, such as the
2182    starting and ending positions (which are addresses), and the list of
2183    inner groups (which is a bits list) are maintained in separate
2184    variables.
2185
2186    We are making a (strictly speaking) nonportable assumption here: that
2187    the compiler will pack our bit fields into something that fits into
2188    the type of `word', i.e., is something that fits into one item on the
2189    failure stack.  */
2190
2191 /* Declarations and macros for re_match_2.  */
2192
2193 typedef union {
2194         fail_stack_elt_t word;
2195         struct {
2196                 /* This field is one if this group can match the empty string,
2197                    zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
2198 #define MATCH_NULL_UNSET_VALUE 3
2199                 unsigned match_null_string_p:2;
2200                 unsigned is_active:1;
2201                 unsigned matched_something:1;
2202                 unsigned ever_matched_something:1;
2203         } bits;
2204 } register_info_type;
2205
2206 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
2207 #define IS_ACTIVE(R)  ((R).bits.is_active)
2208 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
2209 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
2210
2211 static boolean group_match_null_string_p (unsigned char **p,
2212                                           unsigned char *end,
2213                                           register_info_type *
2214                                           reg_info);
2215
2216 static boolean alt_match_null_string_p (unsigned char *p, unsigned char *end,
2217                                         register_info_type * reg_info);
2218
2219 static boolean common_op_match_null_string_p (unsigned char **p,
2220                                               unsigned char *end,
2221                                               register_info_type * reg_info);
2222
2223 static int bcmp_translate (const char *s1, const char *s2,
2224                            int len, char *translate);
2225
2226 /* Call this when have matched a real character; it sets `matched' flags
2227    for the subexpressions which we are currently inside.  Also records
2228    that those subexprs have matched.  */
2229 #define SET_REGS_MATCHED()                                              \
2230   do                                                                    \
2231     {                                                                   \
2232       active_reg_t r;                                                   \
2233       for (r = lowest_active_reg; r <= highest_active_reg; r++)         \
2234         {                                                               \
2235           MATCHED_SOMETHING (reg_info[r])                               \
2236             = EVER_MATCHED_SOMETHING (reg_info[r])                      \
2237             = 1;                                                        \
2238         }                                                               \
2239     }                                                                   \
2240   while (0)
2241
2242
2243 /* This converts PTR, a pointer into one of the search strings `string1'
2244    and `string2' into an offset from the beginning of that string.  */
2245 #define POINTER_TO_OFFSET(ptr)                                          \
2246   (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
2247
2248 /* Registers are set to a sentinel when they haven't yet matched.  */
2249 #define REG_UNSET_VALUE ((char *) -1)
2250 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
2251
2252
2253 /* Macros for dealing with the split strings in re_match_2.  */
2254
2255 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
2256
2257 /* Call before fetching a character with *d.  This switches over to
2258    string2 if necessary.  */
2259 #define PREFETCH()                                                      \
2260   while (d == dend)                                                     \
2261     {                                                                   \
2262       /* End of string2 => fail.  */                                    \
2263       if (dend == end_match_2)                                          \
2264         goto fail;                                                      \
2265       /* End of string1 => advance to string2.  */                      \
2266       d = string2;                                                      \
2267       dend = end_match_2;                                               \
2268     }
2269
2270
2271 /* Test if at very beginning or at very end of the virtual concatenation
2272    of `string1' and `string2'.  If only one string, it's `string2'.  */
2273 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
2274 #define AT_STRINGS_END(d) ((d) == end2)
2275
2276
2277 /* Test if D points to a character which is word-constituent.  We have
2278    two special cases to check for: if past the end of string1, look at
2279    the first character in string2; and if before the beginning of
2280    string2, look at the last character in string1.  */
2281 #define WORDCHAR_P(d)                                                   \
2282   (SYNTAX ((d) == end1 ? *string2                                       \
2283            : (d) == string2 - 1 ? *(end1 - 1) : *(d))                   \
2284    == Sword)
2285
2286 /* Test if the character before D and the one at D differ with respect
2287    to being word-constituent.  */
2288 #define AT_WORD_BOUNDARY(d)                                             \
2289   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
2290    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
2291
2292
2293 /* Free everything we malloc.  */
2294 #define FREE_VARIABLES() alloca (0)
2295
2296 /* These values must meet several constraints.  They must not be valid
2297    register values; since we have a limit of 255 registers (because
2298    we use only one byte in the pattern for the register number), we can
2299    use numbers larger than 255.  They must differ by 1, because of
2300    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
2301    be larger than the value for the highest register, so we do not try
2302    to actually save any registers when none are active.  */
2303 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
2304 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
2305
2306 /* Matching routines.  */
2307
2308 /* re_match is like re_match_2 except it takes only a single string.  */
2309
2310 int re_match(bufp, string, size, pos, regs)
2311 struct re_pattern_buffer *bufp;
2312 const char *string;
2313 int size, pos;
2314 struct re_registers *regs;
2315 {
2316         return re_match_2(bufp, NULL, 0, string, size, pos, regs, size);
2317 }
2318
2319 /* re_match_2 matches the compiled pattern in BUFP against the
2320    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
2321    and SIZE2, respectively).  We start matching at POS, and stop
2322    matching at STOP.
2323
2324    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
2325    store offsets for the substring each group matched in REGS.  See the
2326    documentation for exactly how many groups we fill.
2327
2328    We return -1 if no match, -2 if an internal error (such as the
2329    failure stack overflowing).  Otherwise, we return the length of the
2330    matched substring.  */
2331
2332 int re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop)
2333 struct re_pattern_buffer *bufp;
2334 const char *string1, *string2;
2335 int size1, size2;
2336 int pos;
2337 struct re_registers *regs;
2338 int stop;
2339 {
2340         /* General temporaries.  */
2341         int mcnt;
2342         unsigned char *p1;
2343
2344         /* Just past the end of the corresponding string.  */
2345         const char *end1, *end2;
2346
2347         /* Pointers into string1 and string2, just past the last characters in
2348            each to consider matching.  */
2349         const char *end_match_1, *end_match_2;
2350
2351         /* Where we are in the data, and the end of the current string.  */
2352         const char *d, *dend;
2353
2354         /* Where we are in the pattern, and the end of the pattern.  */
2355         unsigned char *p = bufp->buffer;
2356         register unsigned char *pend = p + bufp->used;
2357
2358         /* We use this to map every character in the string.  */
2359         char *translate = bufp->translate;
2360
2361         /* Failure point stack.  Each place that can handle a failure further
2362            down the line pushes a failure point on this stack.  It consists of
2363            restart, regend, and reg_info for all registers corresponding to
2364            the subexpressions we're currently inside, plus the number of such
2365            registers, and, finally, two char *'s.  The first char * is where
2366            to resume scanning the pattern; the second one is where to resume
2367            scanning the strings.  If the latter is zero, the failure point is
2368            a ``dummy''; if a failure happens and the failure point is a dummy,
2369            it gets discarded and the next next one is tried.  */
2370         fail_stack_type fail_stack;
2371
2372         /* We fill all the registers internally, independent of what we
2373            return, for use in backreferences.  The number here includes
2374            an element for register zero.  */
2375         size_t num_regs = bufp->re_nsub + 1;
2376
2377         /* The currently active registers.  */
2378         active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
2379         active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
2380
2381         /* Information on the contents of registers. These are pointers into
2382            the input strings; they record just what was matched (on this
2383            attempt) by a subexpression part of the pattern, that is, the
2384            regnum-th regstart pointer points to where in the pattern we began
2385            matching and the regnum-th regend points to right after where we
2386            stopped matching the regnum-th subexpression.  (The zeroth register
2387            keeps track of what the whole pattern matches.)  */
2388         const char **regstart = 0, **regend = 0;
2389
2390         /* If a group that's operated upon by a repetition operator fails to
2391            match anything, then the register for its start will need to be
2392            restored because it will have been set to wherever in the string we
2393            are when we last see its open-group operator.  Similarly for a
2394            register's end.  */
2395         const char **old_regstart = 0, **old_regend = 0;
2396
2397         /* The is_active field of reg_info helps us keep track of which (possibly
2398            nested) subexpressions we are currently in. The matched_something
2399            field of reg_info[reg_num] helps us tell whether or not we have
2400            matched any of the pattern so far this time through the reg_num-th
2401            subexpression.  These two fields get reset each time through any
2402            loop their register is in.  */
2403         register_info_type *reg_info = 0;
2404
2405         /* The following record the register info as found in the above
2406            variables when we find a match better than any we've seen before.
2407            This happens as we backtrack through the failure points, which in
2408            turn happens only if we have not yet matched the entire string. */
2409         unsigned best_regs_set = false;
2410         const char **best_regstart = 0, **best_regend = 0;
2411
2412         /* Logically, this is `best_regend[0]'.  But we don't want to have to
2413            allocate space for that if we're not allocating space for anything
2414            else (see below).  Also, we never need info about register 0 for
2415            any of the other register vectors, and it seems rather a kludge to
2416            treat `best_regend' differently than the rest.  So we keep track of
2417            the end of the best match so far in a separate variable.  We
2418            initialize this to NULL so that when we backtrack the first time
2419            and need to test it, it's not garbage.  */
2420         const char *match_end = NULL;
2421
2422         /* Used when we pop values we don't care about.  */
2423         const char **reg_dummy = 0;
2424         register_info_type *reg_info_dummy = 0;
2425
2426         DEBUG_PRINT1("\n\nEntering re_match_2.\n");
2427
2428         INIT_FAIL_STACK();
2429
2430         /* Do not bother to initialize all the register variables if there are
2431            no groups in the pattern, as it takes a fair amount of time.  If
2432            there are groups, we include space for register 0 (the whole
2433            pattern), even though we never use it, since it simplifies the
2434            array indexing.  We should fix this.  */
2435         if (bufp->re_nsub) {
2436                 regstart = REGEX_TALLOC(num_regs, const char *);
2437                 regend = REGEX_TALLOC(num_regs, const char *);
2438                 old_regstart = REGEX_TALLOC(num_regs, const char *);
2439                 old_regend = REGEX_TALLOC(num_regs, const char *);
2440                 best_regstart = REGEX_TALLOC(num_regs, const char *);
2441                 best_regend = REGEX_TALLOC(num_regs, const char *);
2442                 reg_info = REGEX_TALLOC(num_regs, register_info_type);
2443                 reg_dummy = REGEX_TALLOC(num_regs, const char *);
2444                 reg_info_dummy =
2445                     REGEX_TALLOC(num_regs, register_info_type);
2446
2447                 if (!
2448                     (regstart && regend && old_regstart && old_regend
2449                      && reg_info && best_regstart && best_regend
2450                      && reg_dummy && reg_info_dummy)) {
2451                         FREE_VARIABLES();
2452                         return -2;
2453                 }
2454         }
2455
2456         /* The starting position is bogus.  */
2457         if (pos < 0 || pos > size1 + size2) {
2458                 FREE_VARIABLES();
2459                 return -1;
2460         }
2461
2462         /* Initialize subexpression text positions to -1 to mark ones that no
2463            start_memory/stop_memory has been seen for. Also initialize the
2464            register information struct.  */
2465         for (mcnt = 1; mcnt < num_regs; mcnt++) {
2466                 regstart[mcnt] = regend[mcnt]
2467                     = old_regstart[mcnt] = old_regend[mcnt] =
2468                     REG_UNSET_VALUE;
2469
2470                 REG_MATCH_NULL_STRING_P(reg_info[mcnt]) =
2471                     MATCH_NULL_UNSET_VALUE;
2472                 IS_ACTIVE(reg_info[mcnt]) = 0;
2473                 MATCHED_SOMETHING(reg_info[mcnt]) = 0;
2474                 EVER_MATCHED_SOMETHING(reg_info[mcnt]) = 0;
2475         }
2476
2477         /* We move `string1' into `string2' if the latter's empty -- but not if
2478            `string1' is null.  */
2479         if (size2 == 0 && string1 != NULL) {
2480                 string2 = string1;
2481                 size2 = size1;
2482                 string1 = 0;
2483                 size1 = 0;
2484         }
2485         end1 = string1 + size1;
2486         end2 = string2 + size2;
2487
2488         /* Compute where to stop matching, within the two strings.  */
2489         if (stop <= size1) {
2490                 end_match_1 = string1 + stop;
2491                 end_match_2 = string2;
2492         } else {
2493                 end_match_1 = end1;
2494                 end_match_2 = string2 + stop - size1;
2495         }
2496
2497         /* `p' scans through the pattern as `d' scans through the data.
2498            `dend' is the end of the input string that `d' points within.  `d'
2499            is advanced into the following input string whenever necessary, but
2500            this happens before fetching; therefore, at the beginning of the
2501            loop, `d' can be pointing at the end of a string, but it cannot
2502            equal `string2'.  */
2503         if (size1 > 0 && pos <= size1) {
2504                 d = string1 + pos;
2505                 dend = end_match_1;
2506         } else {
2507                 d = string2 + pos - size1;
2508                 dend = end_match_2;
2509         }
2510
2511         DEBUG_PRINT1("The compiled pattern is: ");
2512         DEBUG_PRINT_COMPILED_PATTERN(bufp, p, pend);
2513         DEBUG_PRINT1("The string to match is: `");
2514         DEBUG_PRINT_DOUBLE_STRING(d, string1, size1, string2, size2);
2515         DEBUG_PRINT1("'\n");
2516
2517         /* This loops over pattern commands.  It exits by returning from the
2518            function if the match is complete, or it drops through if the match
2519            fails at this starting point in the input data.  */
2520         for (;;) {
2521                 DEBUG_PRINT2("\n0x%x: ", p);
2522
2523                 if (p == pend) {        /* End of pattern means we might have succeeded.  */
2524                         DEBUG_PRINT1("end of pattern ... ");
2525
2526                         /* If we haven't matched the entire string, and we want the
2527                            longest match, try backtracking.  */
2528                         if (d != end_match_2) {
2529                                 DEBUG_PRINT1("backtracking.\n");
2530
2531                                 if (!FAIL_STACK_EMPTY()) {      /* More failure points to try.  */
2532                                         boolean same_str_p =
2533                                             (FIRST_STRING_P(match_end)
2534                                              == MATCHING_IN_FIRST_STRING);
2535
2536                                         /* If exceeds best match so far, save it.  */
2537                                         if (!best_regs_set
2538                                             || (same_str_p
2539                                                 && d > match_end)
2540                                             || (!same_str_p
2541                                                 &&
2542                                                 !MATCHING_IN_FIRST_STRING))
2543                                         {
2544                                                 best_regs_set = true;
2545                                                 match_end = d;
2546
2547                                                 DEBUG_PRINT1
2548                                                     ("\nSAVING match as best so far.\n");
2549
2550                                                 for (mcnt = 1;
2551                                                      mcnt < num_regs;
2552                                                      mcnt++) {
2553                                                         best_regstart[mcnt]
2554                                                             =
2555                                                             regstart[mcnt];
2556                                                         best_regend[mcnt] =
2557                                                             regend[mcnt];
2558                                                 }
2559                                         }
2560                                         goto fail;
2561                                 }
2562
2563                                 /* If no failure points, don't restore garbage.  */
2564                                 else if (best_regs_set) {
2565                                       restore_best_regs:
2566                                         /* Restore best match.  It may happen that `dend ==
2567                                            end_match_1' while the restored d is in string2.
2568                                            For example, the pattern `x.*y.*z' against the
2569                                            strings `x-' and `y-z-', if the two strings are
2570                                            not consecutive in memory.  */
2571                                         DEBUG_PRINT1
2572                                             ("Restoring best registers.\n");
2573
2574                                         d = match_end;
2575                                         dend = ((d >= string1 && d <= end1)
2576                                                 ? end_match_1 :
2577                                                 end_match_2);
2578
2579                                         for (mcnt = 1; mcnt < num_regs;
2580                                              mcnt++) {
2581                                                 regstart[mcnt] =
2582                                                     best_regstart[mcnt];
2583                                                 regend[mcnt] =
2584                                                     best_regend[mcnt];
2585                                         }
2586                                 }
2587                         }
2588                         /* d != end_match_2 */
2589                         DEBUG_PRINT1("Accepting match.\n");
2590
2591                         /* If caller wants register contents data back, do it.  */
2592                         if (regs && !bufp->no_sub) {
2593                                 /* Have the register data arrays been allocated?  */
2594                                 if (bufp->regs_allocated == REGS_UNALLOCATED) { /* No.  So allocate them with malloc.  We need one
2595                                                                                    extra element beyond `num_regs' for the `-1' marker
2596                                                                                    GNU code uses.  */
2597                                         regs->num_regs =
2598                                             MAX(RE_NREGS, num_regs + 1);
2599                                         regs->start =
2600                                             TALLOC(regs->num_regs,
2601                                                    regoff_t);
2602                                         regs->end =
2603                                             TALLOC(regs->num_regs,
2604                                                    regoff_t);
2605                                         if (regs->start == NULL
2606                                             || regs->end == NULL)
2607                                                 return -2;
2608                                         bufp->regs_allocated =
2609                                             REGS_REALLOCATE;
2610                                 } else if (bufp->regs_allocated == REGS_REALLOCATE) {   /* Yes.  If we need more elements than were already
2611                                                                                            allocated, reallocate them.  If we need fewer, just
2612                                                                                            leave it alone.  */
2613                                         if (regs->num_regs < num_regs + 1) {
2614                                                 regs->num_regs =
2615                                                     num_regs + 1;
2616                                                 RETALLOC(regs->start,
2617                                                          regs->num_regs,
2618                                                          regoff_t);
2619                                                 RETALLOC(regs->end,
2620                                                          regs->num_regs,
2621                                                          regoff_t);
2622                                                 if (regs->start == NULL
2623                                                     || regs->end == NULL)
2624                                                         return -2;
2625                                         }
2626                                 } else {
2627                                         /* These braces fend off a "empty body in an else-statement"
2628                                            warning under GCC when assert expands to nothing.  */
2629                                         assert(bufp->regs_allocated ==
2630                                                REGS_FIXED);
2631                                 }
2632
2633                                 /* Convert the pointer data in `regstart' and `regend' to
2634                                    indices.  Register zero has to be set differently,
2635                                    since we haven't kept track of any info for it.  */
2636                                 if (regs->num_regs > 0) {
2637                                         regs->start[0] = pos;
2638                                         regs->end[0] =
2639                                             (MATCHING_IN_FIRST_STRING ? d -
2640                                              string1 : d - string2 +
2641                                              size1);
2642                                 }
2643
2644                                 /* Go through the first `min (num_regs, regs->num_regs)'
2645                                    registers, since that is all we initialized.  */
2646                                 for (mcnt = 1;
2647                                      mcnt < MIN(num_regs, regs->num_regs);
2648                                      mcnt++) {
2649                                         if (REG_UNSET(regstart[mcnt])
2650                                             || REG_UNSET(regend[mcnt]))
2651                                                 regs->start[mcnt] =
2652                                                     regs->end[mcnt] = -1;
2653                                         else {
2654                                                 regs->start[mcnt] =
2655                                                     POINTER_TO_OFFSET
2656                                                     (regstart[mcnt]);
2657                                                 regs->end[mcnt] =
2658                                                     POINTER_TO_OFFSET
2659                                                     (regend[mcnt]);
2660                                         }
2661                                 }
2662
2663                                 /* If the regs structure we return has more elements than
2664                                    were in the pattern, set the extra elements to -1.  If
2665                                    we (re)allocated the registers, this is the case,
2666                                    because we always allocate enough to have at least one
2667                                    -1 at the end.  */
2668                                 for (mcnt = num_regs;
2669                                      mcnt < regs->num_regs; mcnt++)
2670                                         regs->start[mcnt] =
2671                                             regs->end[mcnt] = -1;
2672                         }
2673                         /* regs && !bufp->no_sub */
2674                         FREE_VARIABLES();
2675                         DEBUG_PRINT4
2676                             ("%u failure points pushed, %u popped (%u remain).\n",
2677                              nfailure_points_pushed,
2678                              nfailure_points_popped,
2679                              nfailure_points_pushed -
2680                              nfailure_points_popped);
2681                         DEBUG_PRINT2("%u registers pushed.\n",
2682                                      num_regs_pushed);
2683
2684                         mcnt = d - pos - (MATCHING_IN_FIRST_STRING
2685                                           ? string1 : string2 - size1);
2686
2687                         DEBUG_PRINT2("Returning %d from re_match_2.\n",
2688                                      mcnt);
2689
2690                         return mcnt;
2691                 }
2692
2693                 /* Otherwise match next pattern command.  */
2694                 switch ((re_opcode_t) * p++) {
2695                         /* Ignore these.  Used to ignore the n of succeed_n's which
2696                            currently have n == 0.  */
2697                 case no_op:
2698                         DEBUG_PRINT1("EXECUTING no_op.\n");
2699                         break;
2700
2701
2702                         /* Match the next n pattern characters exactly.  The following
2703                            byte in the pattern defines n, and the n bytes after that
2704                            are the characters to match.  */
2705                 case exactn:
2706                         mcnt = *p++;
2707                         DEBUG_PRINT2("EXECUTING exactn %d.\n", mcnt);
2708
2709                         /* This is written out as an if-else so we don't waste time
2710                            testing `translate' inside the loop.  */
2711                         if (translate) {
2712                                 do {
2713                                         PREFETCH();
2714                                         if (translate[(unsigned char) *d++]
2715                                             != (char) *p++)
2716                                                 goto fail;
2717                                 }
2718                                 while (--mcnt);
2719                         } else {
2720                                 do {
2721                                         PREFETCH();
2722                                         if (*d++ != (char) *p++)
2723                                                 goto fail;
2724                                 }
2725                                 while (--mcnt);
2726                         }
2727                         SET_REGS_MATCHED();
2728                         break;
2729
2730
2731                         /* Match any character except possibly a newline or a null.  */
2732                 case anychar:
2733                         DEBUG_PRINT1("EXECUTING anychar.\n");
2734
2735                         PREFETCH();
2736
2737                         if ((!(bufp->syntax & RE_DOT_NEWLINE)
2738                              && TRANSLATE(*d) == '\n')
2739                             || (bufp->syntax & RE_DOT_NOT_NULL
2740                                 && TRANSLATE(*d) == '\000'))
2741                                 goto fail;
2742
2743                         SET_REGS_MATCHED();
2744                         DEBUG_PRINT2("  Matched `%d'.\n", *d);
2745                         d++;
2746                         break;
2747
2748
2749                 case charset:
2750                 case charset_not:
2751                         {
2752                                 register unsigned char c;
2753                                 boolean not =
2754                                     (re_opcode_t) * (p - 1) == charset_not;
2755
2756                                 DEBUG_PRINT2("EXECUTING charset%s.\n",
2757                                              not ? "_not" : "");