1 /* Extended regular expression matching and search library,
3 (Implements POSIX draft P10003.2/D11.2, except for
4 internationalization features.)
6 Copyright (C) 1993 Free Software Foundation, Inc.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
26 #include <sys/types.h>
31 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
34 #define bcopy(s, d, n) memcpy ((d), (s), (n))
37 #define bzero(s, n) memset ((s), 0, (n))
40 /* Define the syntax stuff for \<, \>, etc. */
46 #define CHAR_SET_SIZE 256
48 static char re_syntax_table[CHAR_SET_SIZE];
50 static void init_syntax_once(void)
58 bzero(re_syntax_table, sizeof re_syntax_table);
60 for (c = 'a'; c <= 'z'; c++)
61 re_syntax_table[c] = Sword;
63 for (c = 'A'; c <= 'Z'; c++)
64 re_syntax_table[c] = Sword;
66 for (c = '0'; c <= '9'; c++)
67 re_syntax_table[c] = Sword;
69 re_syntax_table['_'] = Sword;
74 #define SYNTAX(c) re_syntax_table[c]
80 #define ISBLANK(c) (isascii (c) && isblank (c))
82 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
85 #define ISGRAPH(c) (isascii (c) && isgraph (c))
87 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
90 #define ISPRINT(c) (isascii (c) && isprint (c))
91 #define ISDIGIT(c) (isascii (c) && isdigit (c))
92 #define ISALNUM(c) (isascii (c) && isalnum (c))
93 #define ISALPHA(c) (isascii (c) && isalpha (c))
94 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
95 #define ISLOWER(c) (isascii (c) && islower (c))
96 #define ISPUNCT(c) (isascii (c) && ispunct (c))
97 #define ISSPACE(c) (isascii (c) && isspace (c))
98 #define ISUPPER(c) (isascii (c) && isupper (c))
99 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
101 #undef SIGN_EXTEND_CHAR
102 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
106 #define alloca __builtin_alloca
107 #endif /* not __GNUC__ */
108 #endif /* not alloca */
110 #define REGEX_ALLOCATE alloca
112 /* Assumes a `char *destination' variable. */
113 #define REGEX_REALLOCATE(source, osize, nsize) \
114 (destination = (char *) alloca (nsize), \
115 bcopy (source, destination, osize), \
118 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
119 `string1' or just past its end. This works if PTR is NULL, which is
121 #define FIRST_STRING_P(ptr) \
122 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
124 /* (Re)Allocate N items of type T using malloc, or fail. */
125 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
126 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
127 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
129 #define BYTEWIDTH 8 /* In bits. */
131 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
133 #define MAX(a, b) ((a) > (b) ? (a) : (b))
134 #define MIN(a, b) ((a) < (b) ? (a) : (b))
136 typedef char boolean;
156 on_failure_keep_string_jump,
172 #define STORE_NUMBER(destination, number) \
174 (destination)[0] = (number) & 0377; \
175 (destination)[1] = (number) >> 8; \
178 #define STORE_NUMBER_AND_INCR(destination, number) \
180 STORE_NUMBER (destination, number); \
181 (destination) += 2; \
184 #define EXTRACT_NUMBER(destination, source) \
186 (destination) = *(source) & 0377; \
187 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
190 #define EXTRACT_NUMBER_AND_INCR(destination, source) \
192 EXTRACT_NUMBER (destination, source); \
199 #define DEBUG_STATEMENT(e)
200 #define DEBUG_PRINT1(x)
201 #define DEBUG_PRINT2(x1, x2)
202 #define DEBUG_PRINT3(x1, x2, x3)
203 #define DEBUG_PRINT4(x1, x2, x3, x4)
204 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
205 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
207 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
208 reg_syntax_t re_set_syntax(syntax)
211 reg_syntax_t ret = re_syntax_options;
213 re_syntax_options = syntax;
217 /* This table gives an error message for each of the error codes listed
218 in regex.h. Obviously the order here has to be same as there. */
220 static const char *re_error_msg[] = { NULL, /* REG_NOERROR */
221 "No match", /* REG_NOMATCH */
222 "Invalid regular expression", /* REG_BADPAT */
223 "Invalid collation character", /* REG_ECOLLATE */
224 "Invalid character class name", /* REG_ECTYPE */
225 "Trailing backslash", /* REG_EESCAPE */
226 "Invalid back reference", /* REG_ESUBREG */
227 "Unmatched [ or [^", /* REG_EBRACK */
228 "Unmatched ( or \\(", /* REG_EPAREN */
229 "Unmatched \\{", /* REG_EBRACE */
230 "Invalid content of \\{\\}", /* REG_BADBR */
231 "Invalid range end", /* REG_ERANGE */
232 "Memory exhausted", /* REG_ESPACE */
233 "Invalid preceding regular expression", /* REG_BADRPT */
234 "Premature end of regular expression", /* REG_EEND */
235 "Regular expression too big", /* REG_ESIZE */
236 "Unmatched ) or \\)", /* REG_ERPAREN */
239 /* Subroutine declarations and macros for regex_compile. */
241 static reg_errcode_t regex_compile (const char *pattern, size_t size,
243 struct re_pattern_buffer * bufp);
245 static void store_op1 (re_opcode_t op, unsigned char *loc, int arg);
247 static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
249 static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg,
252 static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2,
255 static boolean at_begline_loc_p (const char *pattern, const char *p,
256 reg_syntax_t syntax);
258 static boolean at_endline_loc_p (const char *p, const char *pend,
259 reg_syntax_t syntax);
261 static reg_errcode_t compile_range (const char **p_ptr, const char *pend,
262 char *translate, reg_syntax_t syntax,
265 /* Fetch the next character in the uncompiled pattern---translating it
266 if necessary. Also cast from a signed character in the constant
267 string passed to us by the user to an unsigned char that we can use
268 as an array index (in, e.g., `translate'). */
269 #define PATFETCH(c) \
270 do {if (p == pend) return REG_EEND; \
271 c = (unsigned char) *p++; \
272 if (translate) c = translate[c]; \
275 /* Fetch the next character in the uncompiled pattern, with no
277 #define PATFETCH_RAW(c) \
278 do {if (p == pend) return REG_EEND; \
279 c = (unsigned char) *p++; \
282 /* Go backwards one character in the pattern. */
283 #define PATUNFETCH p--
286 /* If `translate' is non-null, return translate[D], else just D. We
287 cast the subscript to translate because some data is declared as
288 `char *', to avoid warnings when a string constant is passed. But
289 when we use a character as a subscript we must make it unsigned. */
290 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
293 /* Macros for outputting the compiled pattern into `buffer'. */
295 /* If the buffer isn't allocated when it comes in, use this. */
296 #define INIT_BUF_SIZE 32
298 /* Make sure we have at least N more bytes of space in buffer. */
299 #define GET_BUFFER_SPACE(n) \
300 while (b - bufp->buffer + (n) > bufp->allocated) \
303 /* Make sure we have one more byte of buffer space and then add C to it. */
304 #define BUF_PUSH(c) \
306 GET_BUFFER_SPACE (1); \
307 *b++ = (unsigned char) (c); \
311 /* Ensure we have two more bytes of buffer space and then append C1 and C2. */
312 #define BUF_PUSH_2(c1, c2) \
314 GET_BUFFER_SPACE (2); \
315 *b++ = (unsigned char) (c1); \
316 *b++ = (unsigned char) (c2); \
320 /* As with BUF_PUSH_2, except for three bytes. */
321 #define BUF_PUSH_3(c1, c2, c3) \
323 GET_BUFFER_SPACE (3); \
324 *b++ = (unsigned char) (c1); \
325 *b++ = (unsigned char) (c2); \
326 *b++ = (unsigned char) (c3); \
330 /* Store a jump with opcode OP at LOC to location TO. We store a
331 relative address offset by the three bytes the jump itself occupies. */
332 #define STORE_JUMP(op, loc, to) \
333 store_op1 (op, loc, (int)((to) - (loc) - 3))
335 /* Likewise, for a two-argument jump. */
336 #define STORE_JUMP2(op, loc, to, arg) \
337 store_op2 (op, loc, (int)((to) - (loc) - 3), arg)
339 /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
340 #define INSERT_JUMP(op, loc, to) \
341 insert_op1 (op, loc, (int)((to) - (loc) - 3), b)
343 /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
344 #define INSERT_JUMP2(op, loc, to, arg) \
345 insert_op2 (op, loc, (int)((to) - (loc) - 3), arg, b)
348 /* This is not an arbitrary limit: the arguments which represent offsets
349 into the pattern are two bytes long. So if 2^16 bytes turns out to
350 be too small, many things would have to change. */
351 #define MAX_BUF_SIZE (1L << 16)
352 #define REALLOC realloc
354 /* Extend the buffer by twice its current size via realloc and
355 reset the pointers that pointed into the old block to point to the
356 correct places in the new one. If extending the buffer results in it
357 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
358 #define EXTEND_BUFFER() \
360 unsigned char *old_buffer = bufp->buffer; \
361 if (bufp->allocated == MAX_BUF_SIZE) \
363 bufp->allocated <<= 1; \
364 if (bufp->allocated > MAX_BUF_SIZE) \
365 bufp->allocated = MAX_BUF_SIZE; \
366 bufp->buffer = (unsigned char *) REALLOC(bufp->buffer, bufp->allocated);\
367 if (bufp->buffer == NULL) \
369 /* If the buffer moved, move all the pointers into it. */ \
370 if (old_buffer != bufp->buffer) \
372 b = (b - old_buffer) + bufp->buffer; \
373 begalt = (begalt - old_buffer) + bufp->buffer; \
374 if (fixup_alt_jump) \
375 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
377 laststart = (laststart - old_buffer) + bufp->buffer; \
379 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
384 /* Since we have one byte reserved for the register number argument to
385 {start,stop}_memory, the maximum number of groups we can report
386 things about is what fits in that byte. */
387 #define MAX_REGNUM 255
389 /* But patterns can have more than `MAX_REGNUM' registers. We just
390 ignore the excess. */
391 typedef unsigned regnum_t;
394 /* Macros for the compile stack. */
396 /* Since offsets can go either forwards or backwards, this type needs to
397 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
398 /* int may be not enough when sizeof(int) == 2 */
399 typedef long pattern_offset_t;
402 pattern_offset_t begalt_offset;
403 pattern_offset_t fixup_alt_jump;
404 pattern_offset_t inner_group_offset;
405 pattern_offset_t laststart_offset;
407 } compile_stack_elt_t;
411 compile_stack_elt_t *stack;
413 unsigned avail; /* Offset of next open position. */
414 } compile_stack_type;
417 #define INIT_COMPILE_STACK_SIZE 32
419 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
420 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
422 /* The next available element. */
423 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
426 /* Set the bit for character C in a list. */
427 #define SET_LIST_BIT(c) \
428 (b[((unsigned char) (c)) / BYTEWIDTH] \
429 |= 1 << (((unsigned char) c) % BYTEWIDTH))
432 /* Get the next unsigned number in the uncompiled pattern. */
433 #define GET_UNSIGNED_NUMBER(num) \
437 while (ISDIGIT (c)) \
441 num = num * 10 + c - '0'; \
449 #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
451 #define IS_CHAR_CLASS(string) \
452 (STREQ (string, "alpha") || STREQ (string, "upper") \
453 || STREQ (string, "lower") || STREQ (string, "digit") \
454 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
455 || STREQ (string, "space") || STREQ (string, "print") \
456 || STREQ (string, "punct") || STREQ (string, "graph") \
457 || STREQ (string, "cntrl") || STREQ (string, "blank"))
459 static boolean group_in_compile_stack (compile_stack_type
460 compile_stack, regnum_t regnum);
462 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
463 Returns one of error codes defined in `regex.h', or zero for success */
465 static reg_errcode_t regex_compile(pattern, size, syntax, bufp)
469 struct re_pattern_buffer *bufp;
471 /* We fetch characters from PATTERN here. Even though PATTERN is
472 `char *' (i.e., signed), we declare these variables as unsigned, so
473 they can be reliably used as array indices. */
474 register unsigned char c, c1;
476 /* A random tempory spot in PATTERN. */
479 /* Points to the end of the buffer, where we should append. */
480 register unsigned char *b;
482 /* Keeps track of unclosed groups. */
483 compile_stack_type compile_stack;
485 /* Points to the current (ending) position in the pattern. */
486 const char *p = pattern;
487 const char *pend = pattern + size;
489 /* How to translate the characters in the pattern. */
490 char *translate = bufp->translate;
492 /* Address of the count-byte of the most recently inserted `exactn'
493 command. This makes it possible to tell if a new exact-match
494 character can be added to that command or if the character requires
495 a new `exactn' command. */
496 unsigned char *pending_exact = 0;
498 /* Address of start of the most recently finished expression.
499 This tells, e.g., postfix * where to find the start of its
500 operand. Reset at the beginning of groups and alternatives. */
501 unsigned char *laststart = 0;
503 /* Address of beginning of regexp, or inside of last group. */
504 unsigned char *begalt;
506 /* Place in the uncompiled pattern (i.e., the {) to
507 which to go back if the interval is invalid. */
508 const char *beg_interval;
510 /* Address of the place where a forward jump should go to the end of
511 the containing expression. Each alternative of an `or' -- except the
512 last -- ends with a forward jump of this sort. */
513 unsigned char *fixup_alt_jump = 0;
515 /* Counts open-groups as they are encountered. Remembered for the
516 matching close-group on the compile stack, so the same register
517 number is put in the stop_memory as the start_memory. */
520 /* Initialize the compile stack. */
521 compile_stack.stack =
522 TALLOC(INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
523 if (compile_stack.stack == NULL)
526 compile_stack.size = INIT_COMPILE_STACK_SIZE;
527 compile_stack.avail = 0;
529 /* Initialize the pattern buffer. */
530 bufp->syntax = syntax;
531 bufp->fastmap_accurate = 0;
532 bufp->not_bol = bufp->not_eol = 0;
534 /* Set `used' to zero, so that if we return an error, the pattern
535 printer (for debugging) will think there's no pattern. We reset it
539 /* Always count groups, whether or not bufp->no_sub is set. */
542 /* Initialize the syntax table. */
545 if (bufp->allocated == 0) {
547 RETALLOC(bufp->buffer, INIT_BUF_SIZE,
549 } else { /* Caller did not allocate a buffer. Do it for them. */
551 TALLOC(INIT_BUF_SIZE, unsigned char);
556 bufp->allocated = INIT_BUF_SIZE;
559 begalt = b = bufp->buffer;
561 /* Loop through the uncompiled pattern until we're at the end. */
568 if (p == pattern + 1 ||
569 syntax & RE_CONTEXT_INDEP_ANCHORS ||
570 at_begline_loc_p(pattern, p, syntax))
580 syntax & RE_CONTEXT_INDEP_ANCHORS ||
581 at_endline_loc_p(p, pend, syntax))
591 if ((syntax & RE_BK_PLUS_QM) ||
592 (syntax & RE_LIMITED_OPS))
597 /* If there is no previous pattern... */
599 if (syntax & RE_CONTEXT_INVALID_OPS)
601 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
606 /* Are we optimizing this jump? */
607 boolean keep_string_p = false;
609 /* 1 means zero (many) matches is allowed. */
610 char zero_times_ok = 0, many_times_ok = 0;
613 zero_times_ok |= c != '+';
614 many_times_ok |= c != '?';
621 if (c == '*' || (!(syntax & RE_BK_PLUS_QM) &&
622 (c == '+' || c == '?')));
624 else if (syntax & RE_BK_PLUS_QM && c == '\\') {
629 if (!(c1 == '+' || c1 == '?')) {
646 assert(p - 1 > pattern);
648 /* Allocate the space for the jump. */
651 if (TRANSLATE(*(p - 2)) == TRANSLATE('.') &&
652 zero_times_ok && p < pend &&
653 TRANSLATE(*p) == TRANSLATE('\n') &&
654 !(syntax & RE_DOT_NEWLINE)) {
656 STORE_JUMP(jump, b, laststart);
657 keep_string_p = true;
659 STORE_JUMP(maybe_pop_jump, b,
666 INSERT_JUMP(keep_string_p ?
667 on_failure_keep_string_jump :
668 on_failure_jump, laststart,
673 if (!zero_times_ok) {
675 INSERT_JUMP(dummy_failure_jump,
691 boolean had_char_class = false;
696 GET_BUFFER_SPACE(34);
700 /* We test `*p == '^' twice, instead of using an if
701 statement, so we only need one BUF_PUSH. */
702 BUF_PUSH(*p == '^' ? charset_not : charset);
708 /* Push the number of bytes in the bitmap. */
709 BUF_PUSH((1 << BYTEWIDTH) / BYTEWIDTH);
711 /* Clear the whole map. */
712 bzero(b, (1 << BYTEWIDTH) / BYTEWIDTH);
714 if ((re_opcode_t) b[-2] == charset_not
715 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
718 /* Read in characters and ranges, setting map bits. */
725 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) &&
735 if (c == ']' && p != p1 + 1)
738 if (had_char_class && c == '-' && *p != ']')
741 if (c == '-' && !(p - 2 >= pattern &&
742 p[-2] == '[') && !(p - 3 >= pattern &&
743 p[-3] == '[' && p[-2] == '^') &&
746 compile_range(&p, pend, translate,
748 if (ret != REG_NOERROR)
752 else if (p[0] == '-' && p[1] != ']') {
755 /* Move past the `-'. */
758 ret = compile_range(&p, pend, translate,
760 if (ret != REG_NOERROR)
764 else if (syntax & RE_CHAR_CLASSES &&
765 c == '[' && *p == ':') {
766 char str[CHAR_CLASS_MAX_LENGTH + 1];
771 /* If pattern is `[[:'. */
777 if (c == ':' || c == ']' ||
779 CHAR_CLASS_MAX_LENGTH)
785 if (c == ':' && *p == ']') {
810 STREQ(str, "xdigit");
812 if (!IS_CHAR_CLASS(str))
820 for (ch = 0; ch < 1 <<
856 had_char_class = false;
859 had_char_class = false;
864 while ((int) b[-1] > 0
865 && b[b[-1] - 1] == 0)
872 if (syntax & RE_NO_BK_PARENS)
879 if (syntax & RE_NO_BK_PARENS)
886 if (syntax & RE_NEWLINE_ALT)
893 if (syntax & RE_NO_BK_VBAR)
900 if (syntax & RE_INTERVALS
901 && syntax & RE_NO_BK_BRACES)
902 goto handle_interval;
915 if (syntax & RE_NO_BK_PARENS)
916 goto normal_backslash;
922 if (COMPILE_STACK_FULL) {
923 RETALLOC(compile_stack.stack,
924 compile_stack.size << 1,
925 compile_stack_elt_t);
926 if (compile_stack.stack == NULL)
929 compile_stack.size <<= 1;
932 COMPILE_STACK_TOP.begalt_offset =
933 begalt - bufp->buffer;
934 COMPILE_STACK_TOP.fixup_alt_jump =
935 fixup_alt_jump ? fixup_alt_jump -
936 bufp->buffer + 1 : 0;
937 COMPILE_STACK_TOP.laststart_offset =
939 COMPILE_STACK_TOP.regnum = regnum;
941 if (regnum <= MAX_REGNUM) {
942 COMPILE_STACK_TOP.inner_group_offset =
943 b - bufp->buffer + 2;
944 BUF_PUSH_3(start_memory, regnum, 0);
947 compile_stack.avail++;
956 if (syntax & RE_NO_BK_PARENS)
957 goto normal_backslash;
959 if (COMPILE_STACK_EMPTY) {
960 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
961 goto normal_backslash;
967 if (fixup_alt_jump) {
968 BUF_PUSH(push_dummy_failure);
969 STORE_JUMP(jump_past_alt,
970 fixup_alt_jump, b - 1);
973 if (COMPILE_STACK_EMPTY) {
974 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
980 assert(compile_stack.avail != 0);
982 regnum_t this_group_regnum;
984 compile_stack.avail--;
985 begalt = bufp->buffer +
986 COMPILE_STACK_TOP.begalt_offset;
988 COMPILE_STACK_TOP.fixup_alt_jump ?
989 bufp->buffer + COMPILE_STACK_TOP.
990 fixup_alt_jump - 1 : 0;
991 laststart = bufp->buffer +
992 COMPILE_STACK_TOP.laststart_offset;
993 this_group_regnum = COMPILE_STACK_TOP.regnum;
996 if (this_group_regnum <= MAX_REGNUM) {
998 *inner_group_loc = bufp->buffer +
1002 *inner_group_loc = regnum -
1004 BUF_PUSH_3(stop_memory,
1006 regnum - this_group_regnum);
1012 case '|': /* `\|'. */
1013 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1014 goto normal_backslash;
1016 if (syntax & RE_LIMITED_OPS)
1019 GET_BUFFER_SPACE(3);
1020 INSERT_JUMP(on_failure_jump, begalt, b + 6);
1025 STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
1028 GET_BUFFER_SPACE(3);
1037 /* If \{ is a literal. */
1038 if (!(syntax & RE_INTERVALS) || ((syntax & RE_INTERVALS)
1039 && (syntax & RE_NO_BK_BRACES))
1040 || (p - 2 == pattern && p == pend))
1041 goto normal_backslash;
1045 int lower_bound = -1, upper_bound = -1;
1046 beg_interval = p - 1;
1049 if (syntax & RE_NO_BK_BRACES)
1050 goto unfetch_interval;
1055 GET_UNSIGNED_NUMBER(lower_bound);
1058 GET_UNSIGNED_NUMBER(upper_bound);
1059 if (upper_bound < 0)
1060 upper_bound = RE_DUP_MAX;
1062 upper_bound = lower_bound;
1064 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1065 || lower_bound > upper_bound) {
1066 if (syntax & RE_NO_BK_BRACES)
1067 goto unfetch_interval;
1072 if (!(syntax & RE_NO_BK_BRACES)) {
1080 if (syntax & RE_NO_BK_BRACES)
1081 goto unfetch_interval;
1087 if (syntax & RE_CONTEXT_INVALID_OPS)
1089 else if (syntax & RE_CONTEXT_INDEP_OPS)
1092 goto unfetch_interval;
1095 if (upper_bound == 0) {
1096 GET_BUFFER_SPACE(3);
1097 INSERT_JUMP(jump, laststart, b + 3);
1103 10 + (upper_bound > 1) * 10;
1105 GET_BUFFER_SPACE(nbytes);
1107 INSERT_JUMP2(succeed_n, laststart,
1108 b + 5 + (upper_bound >
1109 1) * 5, lower_bound);
1112 insert_op2(set_number_at, laststart, 5,
1116 if (upper_bound > 1) {
1117 STORE_JUMP2(jump_n, b,
1122 insert_op2(set_number_at,
1125 upper_bound - 1, b);
1130 beg_interval = NULL;
1135 assert(beg_interval);
1137 beg_interval = NULL;
1139 /* normal_char and normal_backslash need `c'. */
1142 if (!(syntax & RE_NO_BK_BRACES)) {
1143 if (p > pattern && p[-1] == '\\')
1144 goto normal_backslash;
1149 if (re_syntax_options & RE_NO_GNU_OPS)
1157 if (re_syntax_options & RE_NO_GNU_OPS)
1160 BUF_PUSH(notwordchar);
1165 if (re_syntax_options & RE_NO_GNU_OPS)
1171 if (re_syntax_options & RE_NO_GNU_OPS)
1177 if (re_syntax_options & RE_NO_GNU_OPS)
1179 BUF_PUSH(wordbound);
1183 if (re_syntax_options & RE_NO_GNU_OPS)
1185 BUF_PUSH(notwordbound);
1189 if (re_syntax_options & RE_NO_GNU_OPS)
1195 if (re_syntax_options & RE_NO_GNU_OPS)
1209 if (syntax & RE_NO_BK_REFS)
1217 /* Can't back reference to a subexpression if inside of it. */
1218 if (group_in_compile_stack
1219 (compile_stack, (regnum_t) c1))
1223 BUF_PUSH_2(duplicate, c1);
1229 if (syntax & RE_BK_PLUS_QM)
1232 goto normal_backslash;
1236 /* You might think it would be useful for \ to mean
1237 not to translate; but if we don't translate it
1238 it will never match anything. */
1246 /* Expects the character in `c'. */
1248 /* If no exactn currently being built. */
1250 /* If last exactn not at current position. */
1251 || pending_exact + *pending_exact + 1 != b
1252 /* We have only one byte following the exactn for the count. */
1253 || *pending_exact == (1 << BYTEWIDTH) - 1
1254 /* If followed by a repetition operator. */
1255 || *p == '*' || *p == '^'
1256 || ((syntax & RE_BK_PLUS_QM)
1257 ? *p == '\\' && (p[1] == '+'
1259 : (*p == '+' || *p == '?'))
1260 || ((syntax & RE_INTERVALS)
1261 && ((syntax & RE_NO_BK_BRACES)
1263 : (p[0] == '\\' && p[1] == '{')))) {
1264 /* Start building a new exactn. */
1268 BUF_PUSH_2(exactn, 0);
1269 pending_exact = b - 1;
1276 } /* while p != pend */
1279 /* Through the pattern now. */
1282 STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
1284 if (!COMPILE_STACK_EMPTY)
1287 free(compile_stack.stack);
1289 /* We have succeeded; set the length of the buffer. */
1290 bufp->used = b - bufp->buffer;
1293 } /* regex_compile */
1295 /* Subroutines for `regex_compile'. */
1297 /* Store OP at LOC followed by two-byte integer parameter ARG. */
1299 static void store_op1(op, loc, arg)
1304 *loc = (unsigned char) op;
1305 STORE_NUMBER(loc + 1, arg);
1309 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
1311 static void store_op2(op, loc, arg1, arg2)
1316 *loc = (unsigned char) op;
1317 STORE_NUMBER(loc + 1, arg1);
1318 STORE_NUMBER(loc + 3, arg2);
1322 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
1323 for OP followed by two-byte integer parameter ARG. */
1325 static void insert_op1(op, loc, arg, end)
1331 register unsigned char *pfrom = end;
1332 register unsigned char *pto = end + 3;
1334 while (pfrom != loc)
1337 store_op1(op, loc, arg);
1341 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
1343 static void insert_op2(op, loc, arg1, arg2, end)
1349 register unsigned char *pfrom = end;
1350 register unsigned char *pto = end + 5;
1352 while (pfrom != loc)
1355 store_op2(op, loc, arg1, arg2);
1359 /* P points to just after a ^ in PATTERN. Return true if that ^ comes
1360 after an alternative or a begin-subexpression. We assume there is at
1361 least one character before the ^. */
1363 static boolean at_begline_loc_p(pattern, p, syntax)
1364 const char *pattern, *p;
1365 reg_syntax_t syntax;
1367 const char *prev = p - 2;
1368 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
1371 /* After a subexpression? */
1373 && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
1374 /* After an alternative? */
1376 && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
1380 /* The dual of at_begline_loc_p. This one is for $. We assume there is
1381 at least one character after the $, i.e., `P < PEND'. */
1383 static boolean at_endline_loc_p(p, pend, syntax)
1384 const char *p, *pend;
1385 reg_syntax_t syntax;
1387 const char *next = p;
1388 boolean next_backslash = *next == '\\';
1389 const char *next_next = p + 1 < pend ? p + 1 : NULL;
1392 /* Before a subexpression? */
1393 (syntax & RE_NO_BK_PARENS ? *next == ')'
1394 : next_backslash && next_next && *next_next == ')')
1395 /* Before an alternative? */
1396 || (syntax & RE_NO_BK_VBAR ? *next == '|'
1397 : next_backslash && next_next && *next_next == '|');
1401 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
1402 false if it's not. */
1404 static boolean group_in_compile_stack(compile_stack, regnum)
1405 compile_stack_type compile_stack;
1410 for (this_element = compile_stack.avail - 1;
1411 this_element >= 0; this_element--)
1412 if (compile_stack.stack[this_element].regnum == regnum)
1419 /* Read the ending character of a range (in a bracket expression) from the
1420 uncompiled pattern *P_PTR (which ends at PEND). We assume the
1421 starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
1422 Then we set the translation of all bits between the starting and
1423 ending characters (inclusive) in the compiled pattern B.
1425 Return an error code.
1427 We use these short variable names so we can use the same macros as
1428 `regex_compile' itself. */
1430 static reg_errcode_t compile_range(p_ptr, pend, translate, syntax, b)
1431 const char **p_ptr, *pend;
1433 reg_syntax_t syntax;
1438 const char *p = *p_ptr;
1439 int range_start, range_end;
1444 /* Even though the pattern is a signed `char *', we need to fetch
1445 with unsigned char *'s; if the high bit of the pattern character
1446 is set, the range endpoints will be negative if we fetch using a
1449 We also want to fetch the endpoints without translating them; the
1450 appropriate translation is done in the bit-setting loop below. */
1451 range_start = ((unsigned char *) p)[-2];
1452 range_end = ((unsigned char *) p)[0];
1454 /* Have to increment the pointer into the pattern string, so the
1455 caller isn't still at the ending character. */
1458 /* If the start is after the end, the range is empty. */
1459 if (range_start > range_end)
1460 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE :
1463 /* Here we see why `this_char' has to be larger than an `unsigned
1464 char' -- the range is inclusive, so if `range_end' == 0xff
1465 (assuming 8-bit characters), we would otherwise go into an infinite
1466 loop, since all characters <= 0xff. */
1467 for (this_char = range_start; this_char <= range_end; this_char++) {
1468 SET_LIST_BIT(TRANSLATE(this_char));
1473 /* Failure stack declarations and macros; both re_compile_fastmap and
1474 re_match_2 use a failure stack. These have to be macros because of
1478 /* Number of failure points for which to initially allocate space
1479 when matching. If this number is exceeded, we allocate more
1480 space, so it is not a hard limit. */
1481 #define INIT_FAILURE_ALLOC 5
1483 /* Roughly the maximum number of failure points on the stack. Would be
1484 exactly that if always used MAX_FAILURE_SPACE each time we failed.
1485 This is a variable only so users of regex can assign to it; we never
1486 change it ourselves. */
1487 int re_max_failures = 2000;
1489 typedef const unsigned char *fail_stack_elt_t;
1492 fail_stack_elt_t *stack;
1494 unsigned avail; /* Offset of next open position. */
1497 #define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
1498 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1499 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1500 #define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
1503 /* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
1505 #define INIT_FAIL_STACK() \
1507 fail_stack.stack = (fail_stack_elt_t *) \
1508 REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
1510 if (fail_stack.stack == NULL) \
1513 fail_stack.size = INIT_FAILURE_ALLOC; \
1514 fail_stack.avail = 0; \
1518 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1520 Return 1 if succeeds, and 0 if either ran out of memory
1521 allocating space for it or it was already too large.
1523 REGEX_REALLOCATE requires `destination' be declared. */
1525 #define DOUBLE_FAIL_STACK(fail_stack) \
1526 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
1528 : ((fail_stack).stack = (fail_stack_elt_t *) \
1529 REGEX_REALLOCATE ((fail_stack).stack, \
1530 (fail_stack).size * sizeof (fail_stack_elt_t), \
1531 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
1533 (fail_stack).stack == NULL \
1535 : ((fail_stack).size <<= 1, \
1539 /* Push PATTERN_OP on FAIL_STACK.
1541 Return 1 if was able to do so and 0 if ran out of memory allocating
1543 #define PUSH_PATTERN_OP(pattern_op, fail_stack) \
1544 ((FAIL_STACK_FULL () \
1545 && !DOUBLE_FAIL_STACK (fail_stack)) \
1547 : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
1550 /* This pushes an item onto the failure stack. Must be a four-byte
1551 value. Assumes the variable `fail_stack'. Probably should only
1552 be called from within `PUSH_FAILURE_POINT'. */
1553 #define PUSH_FAILURE_ITEM(item) \
1554 fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
1556 /* The complement operation. Assumes `fail_stack' is nonempty. */
1557 #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
1559 /* Used to omit pushing failure point id's when we're not debugging. */
1560 #define DEBUG_PUSH(item)
1561 #define DEBUG_POP(item_addr)
1564 /* Push the information about the state we will need
1565 if we ever fail back to it.
1567 Requires variables fail_stack, regstart, regend, reg_info, and
1568 num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
1571 Does `return FAILURE_CODE' if runs out of memory. */
1573 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
1575 char *destination; \
1576 /* Must be int, so when we don't save any registers, the arithmetic \
1577 of 0 + -1 isn't done as unsigned. */ \
1578 /* Can't be int, since there is not a shred of a guarantee that int \
1579 is wide enough to hold a value of something to which pointer can \
1583 DEBUG_STATEMENT (failure_id++); \
1584 DEBUG_STATEMENT (nfailure_points_pushed++); \
1585 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1586 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
1587 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1589 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
1590 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
1592 /* Ensure we have enough space allocated for what we will push. */ \
1593 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
1595 if (!DOUBLE_FAIL_STACK (fail_stack)) \
1596 return failure_code; \
1598 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
1599 (fail_stack).size); \
1600 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1603 #define PUSH_FAILURE_POINT2(pattern_place, string_place, failure_code) \
1604 /* Push the info, starting with the registers. */ \
1605 DEBUG_PRINT1 ("\n"); \
1607 PUSH_FAILURE_POINT_LOOP (); \
1609 DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
1610 PUSH_FAILURE_ITEM (lowest_active_reg); \
1612 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
1613 PUSH_FAILURE_ITEM (highest_active_reg); \
1615 DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
1616 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
1617 PUSH_FAILURE_ITEM (pattern_place); \
1619 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
1620 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
1622 DEBUG_PRINT1 ("'\n"); \
1623 PUSH_FAILURE_ITEM (string_place); \
1625 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
1626 DEBUG_PUSH (failure_id); \
1629 /* Pulled out of PUSH_FAILURE_POINT() to shorten the definition
1630 of that macro. (for VAX C) */
1631 #define PUSH_FAILURE_POINT_LOOP() \
1632 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1635 DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
1636 DEBUG_STATEMENT (num_regs_pushed++); \
1638 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
1639 PUSH_FAILURE_ITEM (regstart[this_reg]); \
1641 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
1642 PUSH_FAILURE_ITEM (regend[this_reg]); \
1644 DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
1645 DEBUG_PRINT2 (" match_null=%d", \
1646 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
1647 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
1648 DEBUG_PRINT2 (" matched_something=%d", \
1649 MATCHED_SOMETHING (reg_info[this_reg])); \
1650 DEBUG_PRINT2 (" ever_matched=%d", \
1651 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
1652 DEBUG_PRINT1 ("\n"); \
1653 PUSH_FAILURE_ITEM (reg_info[this_reg].word); \
1656 /* This is the number of items that are pushed and popped on the stack
1657 for each register. */
1658 #define NUM_REG_ITEMS 3
1660 /* Individual items aside from the registers. */
1661 #define NUM_NONREG_ITEMS 4
1663 /* We push at most this many items on the stack. */
1664 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1666 /* We actually push this many items. */
1667 #define NUM_FAILURE_ITEMS \
1668 ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
1671 /* How many items can still be added to the stack without overflowing it. */
1672 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1675 /* Pops what PUSH_FAIL_STACK pushes.
1677 We restore into the parameters, all of which should be lvalues:
1678 STR -- the saved data position.
1679 PAT -- the saved pattern position.
1680 LOW_REG, HIGH_REG -- the highest and lowest active registers.
1681 REGSTART, REGEND -- arrays of string positions.
1682 REG_INFO -- array of information about each subexpression.
1684 Also assumes the variables `fail_stack' and (if debugging), `bufp',
1685 `pend', `string1', `size1', `string2', and `size2'. */
1687 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1689 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
1691 const unsigned char *string_temp; \
1693 assert (!FAIL_STACK_EMPTY ()); \
1695 /* Remove failure points and point to how many regs pushed. */ \
1696 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
1697 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
1698 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
1700 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
1702 DEBUG_POP (&failure_id); \
1703 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
1705 /* If the saved string location is NULL, it came from an \
1706 on_failure_keep_string_jump opcode, and we want to throw away the \
1707 saved NULL, thus retaining our current position in the string. */ \
1708 string_temp = POP_FAILURE_ITEM (); \
1709 if (string_temp != NULL) \
1710 str = (const char *) string_temp; \
1712 DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
1713 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1714 DEBUG_PRINT1 ("'\n"); \
1716 pat = (unsigned char *) POP_FAILURE_ITEM (); \
1717 DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
1718 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
1720 POP_FAILURE_POINT2 (low_reg, high_reg, regstart, regend, reg_info);
1722 /* Pulled out of POP_FAILURE_POINT() to shorten the definition
1723 of that macro. (for MSC 5.1) */
1724 #define POP_FAILURE_POINT2(low_reg, high_reg, regstart, regend, reg_info) \
1726 /* Restore register info. */ \
1727 high_reg = (active_reg_t) POP_FAILURE_ITEM (); \
1728 DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
1730 low_reg = (active_reg_t) POP_FAILURE_ITEM (); \
1731 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
1733 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
1735 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
1737 reg_info[this_reg].word = POP_FAILURE_ITEM (); \
1738 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
1740 regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \
1741 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
1743 regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
1744 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
1747 DEBUG_STATEMENT (nfailure_points_popped++); \
1748 } /* POP_FAILURE_POINT */
1750 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
1751 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
1752 characters can start a string that matches the pattern. This fastmap
1753 is used by re_search to skip quickly over impossible starting points.
1755 The caller must supply the address of a (1 << BYTEWIDTH)-byte data
1756 area as BUFP->fastmap.
1758 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
1761 Returns 0 if we succeed, -2 if an internal error. */
1763 int re_compile_fastmap(bufp)
1764 struct re_pattern_buffer *bufp;
1767 fail_stack_type fail_stack;
1769 /* We don't push any register information onto the failure stack. */
1770 unsigned num_regs = 0;
1772 register char *fastmap = bufp->fastmap;
1773 unsigned char *pattern = bufp->buffer;
1774 const unsigned char *p = pattern;
1775 register unsigned char *pend = pattern + bufp->used;
1777 /* Assume that each path through the pattern can be null until
1778 proven otherwise. We set this false at the bottom of switch
1779 statement, to which we get only if a particular path doesn't
1780 match the empty string. */
1781 boolean path_can_be_null = true;
1783 /* We aren't doing a `succeed_n' to begin with. */
1784 boolean succeed_n_p = false;
1786 assert(fastmap != NULL && p != NULL);
1789 bzero(fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
1790 bufp->fastmap_accurate = 1; /* It will be when we're done. */
1791 bufp->can_be_null = 0;
1793 while (p != pend || !FAIL_STACK_EMPTY()) {
1795 bufp->can_be_null |= path_can_be_null;
1797 /* Reset for next path. */
1798 path_can_be_null = true;
1800 p = fail_stack.stack[--fail_stack.avail];
1803 /* We should never be about to go beyond the end of the pattern. */
1806 switch ((re_opcode_t) * p++) {
1808 /* I guess the idea here is to simply not bother with a fastmap
1809 if a backreference is used, since it's too hard to figure out
1810 the fastmap for the corresponding group. Setting
1811 `can_be_null' stops `re_search_2' from using the fastmap, so
1812 that is all we do. */
1814 bufp->can_be_null = 1;
1818 /* Following are the cases which match a character. These end
1827 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
1828 if (p[j / BYTEWIDTH] &
1829 (1 << (j % BYTEWIDTH)))
1835 /* Chars beyond end of map must be allowed. */
1836 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
1839 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
1842 (1 << (j % BYTEWIDTH))))
1848 for (j = 0; j < (1 << BYTEWIDTH); j++)
1849 if (SYNTAX(j) == Sword)
1855 for (j = 0; j < (1 << BYTEWIDTH); j++)
1856 if (SYNTAX(j) != Sword)
1862 /* `.' matches anything ... */
1863 for (j = 0; j < (1 << BYTEWIDTH); j++)
1866 /* ... except perhaps newline. */
1867 if (!(bufp->syntax & RE_DOT_NEWLINE))
1870 /* Return if we have already set `can_be_null'; if we have,
1871 then the fastmap is irrelevant. Something's wrong here. */
1872 else if (bufp->can_be_null)
1875 /* Otherwise, have to check alternative paths. */
1887 case push_dummy_failure:
1892 case pop_failure_jump:
1893 case maybe_pop_jump:
1896 case dummy_failure_jump:
1897 EXTRACT_NUMBER_AND_INCR(j, p);
1902 /* Jump backward implies we just went through the body of a
1903 loop and matched nothing. Opcode jumped to should be
1904 `on_failure_jump' or `succeed_n'. Just treat it like an
1905 ordinary jump. For a * loop, it has pushed its failure
1906 point already; if so, discard that as redundant. */
1907 if ((re_opcode_t) * p != on_failure_jump
1908 && (re_opcode_t) * p != succeed_n)
1912 EXTRACT_NUMBER_AND_INCR(j, p);
1915 /* If what's on the stack is where we are now, pop it. */
1916 if (!FAIL_STACK_EMPTY()
1917 && fail_stack.stack[fail_stack.avail - 1] == p)
1923 case on_failure_jump:
1924 case on_failure_keep_string_jump:
1925 handle_on_failure_jump:
1926 EXTRACT_NUMBER_AND_INCR(j, p);
1928 /* For some patterns, e.g., `(a?)?', `p+j' here points to the
1929 end of the pattern. We don't want to push such a point,
1930 since when we restore it above, entering the switch will
1931 increment `p' past the end of the pattern. We don't need
1932 to push such a point since we obviously won't find any more
1933 fastmap entries beyond `pend'. Such a pattern can match
1934 the null string, though. */
1936 if (!PUSH_PATTERN_OP(p + j, fail_stack))
1939 bufp->can_be_null = 1;
1942 EXTRACT_NUMBER_AND_INCR(k, p); /* Skip the n. */
1943 succeed_n_p = false;
1950 /* Get to the number of times to succeed. */
1953 /* Increment p past the n for when k != 0. */
1954 EXTRACT_NUMBER_AND_INCR(k, p);
1957 succeed_n_p = true; /* Spaghetti code alert. */
1958 goto handle_on_failure_jump;
1975 abort(); /* We have listed all the cases. */
1978 /* Getting here means we have found the possible starting
1979 characters for one path of the pattern -- and that the empty
1980 string does not match. We need not follow this path further.
1981 Instead, look at the next alternative (remembered on the
1982 stack), or quit if no more. The test at the top of the loop
1983 does these things. */
1984 path_can_be_null = false;
1988 /* Set `can_be_null' for the last path (also the first path, if the
1989 pattern is empty). */
1990 bufp->can_be_null |= path_can_be_null;
1992 } /* re_compile_fastmap */
1994 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
1995 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
1996 this memory for recording register information. STARTS and ENDS
1997 must be allocated using the malloc library routine, and must each
1998 be at least NUM_REGS * sizeof (regoff_t) bytes long.
2000 If NUM_REGS == 0, then subsequent matches should allocate their own
2003 Unless this function is called, the first search or match using
2004 PATTERN_BUFFER will allocate its own register data, without
2005 freeing the old data. */
2007 void re_set_registers(bufp, regs, num_regs, starts, ends)
2008 struct re_pattern_buffer *bufp;
2009 struct re_registers *regs;
2011 regoff_t *starts, *ends;
2014 bufp->regs_allocated = REGS_REALLOCATE;
2015 regs->num_regs = num_regs;
2016 regs->start = starts;
2019 bufp->regs_allocated = REGS_UNALLOCATED;
2021 regs->start = regs->end = 0;
2025 /* Searching routines. */
2027 /* Like re_search_2, below, but only one string is specified, and
2028 doesn't let you say where to stop matching. */
2030 int re_search(bufp, string, size, startpos, range, regs)
2031 struct re_pattern_buffer *bufp;
2033 int size, startpos, range;
2034 struct re_registers *regs;
2036 return re_search_2(bufp, NULL, 0, string, size, startpos, range,
2041 /* Using the compiled pattern in BUFP->buffer, first tries to match the
2042 virtual concatenation of STRING1 and STRING2, starting first at index
2043 STARTPOS, then at STARTPOS + 1, and so on.
2045 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2047 RANGE is how far to scan while trying to match. RANGE = 0 means try
2048 only at STARTPOS; in general, the last start tried is STARTPOS +
2051 In REGS, return the indices of the virtual concatenation of STRING1
2052 and STRING2 that matched the entire BUFP->buffer and its contained
2055 Do not consider matching one past the index STOP in the virtual
2056 concatenation of STRING1 and STRING2.
2058 We return either the position in the strings at which the match was
2059 found, -1 if no match, or -2 if error (such as failure
2063 re_search_2(bufp, string1, size1, string2, size2, startpos, range, regs,
2065 struct re_pattern_buffer *bufp;
2066 const char *string1, *string2;
2070 struct re_registers *regs;
2074 register char *fastmap = bufp->fastmap;
2075 register char *translate = bufp->translate;
2076 int total_size = size1 + size2;
2077 int endpos = startpos + range;
2079 /* Check for out-of-range STARTPOS. */
2080 if (startpos < 0 || startpos > total_size)
2083 /* Fix up RANGE if it might eventually take us outside
2084 the virtual concatenation of STRING1 and STRING2. */
2086 range = -1 - startpos;
2087 else if (endpos > total_size)
2088 range = total_size - startpos;
2090 /* If the search isn't to be a backwards one, don't waste time in a
2091 search for a pattern that must be anchored. */
2092 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf
2100 /* Update the fastmap now if not correct already. */
2101 if (fastmap && !bufp->fastmap_accurate)
2102 if (re_compile_fastmap(bufp) == -2)
2105 /* Loop through the string, looking for a place to start matching. */
2107 /* If a fastmap is supplied, skip quickly over characters that
2108 cannot be the start of a match. If the pattern can match the
2109 null string, however, we don't need to skip characters; we want
2110 the first null string. */
2111 if (fastmap && startpos < total_size && !bufp->can_be_null) {
2112 if (range > 0) { /* Searching forwards. */
2113 register const char *d;
2114 register int lim = 0;
2117 if (startpos < size1
2118 && startpos + range >= size1)
2119 lim = range - (size1 - startpos);
2122 size1 ? string2 - size1 : string1) +
2125 /* Written out as an if-else to avoid testing `translate'
2129 && !fastmap[(unsigned char)
2130 translate[(unsigned char) *d++]])
2134 && !fastmap[(unsigned char)
2138 startpos += irange - range;
2139 } else { /* Searching backwards. */
2141 register char c = (size1 == 0
2143 size1 ? string2[startpos
2145 : string1[startpos]);
2147 if (!fastmap[(unsigned char) TRANSLATE(c)])
2152 /* If can't match the null string, and that's all we have left, fail. */
2153 if (range >= 0 && startpos == total_size && fastmap
2154 && !bufp->can_be_null)
2157 val = re_match_2(bufp, string1, size1, string2, size2,
2158 startpos, regs, stop);
2168 else if (range > 0) {
2179 /* Structure for per-register (a.k.a. per-group) information.
2180 This must not be longer than one word, because we push this value
2181 onto the failure stack. Other register information, such as the
2182 starting and ending positions (which are addresses), and the list of
2183 inner groups (which is a bits list) are maintained in separate
2186 We are making a (strictly speaking) nonportable assumption here: that
2187 the compiler will pack our bit fields into something that fits into
2188 the type of `word', i.e., is something that fits into one item on the
2191 /* Declarations and macros for re_match_2. */
2194 fail_stack_elt_t word;
2196 /* This field is one if this group can match the empty string,
2197 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
2198 #define MATCH_NULL_UNSET_VALUE 3
2199 unsigned match_null_string_p:2;
2200 unsigned is_active:1;
2201 unsigned matched_something:1;
2202 unsigned ever_matched_something:1;
2204 } register_info_type;
2206 #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
2207 #define IS_ACTIVE(R) ((R).bits.is_active)
2208 #define MATCHED_SOMETHING(R) ((R).bits.matched_something)
2209 #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
2211 static boolean group_match_null_string_p (unsigned char **p,
2213 register_info_type *
2216 static boolean alt_match_null_string_p (unsigned char *p, unsigned char *end,
2217 register_info_type * reg_info);
2219 static boolean common_op_match_null_string_p (unsigned char **p,
2221 register_info_type * reg_info);
2223 static int bcmp_translate (const char *s1, const char *s2,
2224 int len, char *translate);
2226 /* Call this when have matched a real character; it sets `matched' flags
2227 for the subexpressions which we are currently inside. Also records
2228 that those subexprs have matched. */
2229 #define SET_REGS_MATCHED() \
2233 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
2235 MATCHED_SOMETHING (reg_info[r]) \
2236 = EVER_MATCHED_SOMETHING (reg_info[r]) \
2243 /* This converts PTR, a pointer into one of the search strings `string1'
2244 and `string2' into an offset from the beginning of that string. */
2245 #define POINTER_TO_OFFSET(ptr) \
2246 (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
2248 /* Registers are set to a sentinel when they haven't yet matched. */
2249 #define REG_UNSET_VALUE ((char *) -1)
2250 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
2253 /* Macros for dealing with the split strings in re_match_2. */
2255 #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
2257 /* Call before fetching a character with *d. This switches over to
2258 string2 if necessary. */
2259 #define PREFETCH() \
2262 /* End of string2 => fail. */ \
2263 if (dend == end_match_2) \
2265 /* End of string1 => advance to string2. */ \
2267 dend = end_match_2; \
2271 /* Test if at very beginning or at very end of the virtual concatenation
2272 of `string1' and `string2'. If only one string, it's `string2'. */
2273 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
2274 #define AT_STRINGS_END(d) ((d) == end2)
2277 /* Test if D points to a character which is word-constituent. We have
2278 two special cases to check for: if past the end of string1, look at
2279 the first character in string2; and if before the beginning of
2280 string2, look at the last character in string1. */
2281 #define WORDCHAR_P(d) \
2282 (SYNTAX ((d) == end1 ? *string2 \
2283 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
2286 /* Test if the character before D and the one at D differ with respect
2287 to being word-constituent. */
2288 #define AT_WORD_BOUNDARY(d) \
2289 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
2290 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
2293 /* Free everything we malloc. */
2294 #define FREE_VARIABLES() alloca (0)
2296 /* These values must meet several constraints. They must not be valid
2297 register values; since we have a limit of 255 registers (because
2298 we use only one byte in the pattern for the register number), we can
2299 use numbers larger than 255. They must differ by 1, because of
2300 NUM_FAILURE_ITEMS above. And the value for the lowest register must
2301 be larger than the value for the highest register, so we do not try
2302 to actually save any registers when none are active. */
2303 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
2304 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
2306 /* Matching routines. */
2308 /* re_match is like re_match_2 except it takes only a single string. */
2310 int re_match(bufp, string, size, pos, regs)
2311 struct re_pattern_buffer *bufp;
2314 struct re_registers *regs;
2316 return re_match_2(bufp, NULL, 0, string, size, pos, regs, size);
2319 /* re_match_2 matches the compiled pattern in BUFP against the
2320 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
2321 and SIZE2, respectively). We start matching at POS, and stop
2324 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
2325 store offsets for the substring each group matched in REGS. See the
2326 documentation for exactly how many groups we fill.
2328 We return -1 if no match, -2 if an internal error (such as the
2329 failure stack overflowing). Otherwise, we return the length of the
2330 matched substring. */
2332 int re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop)
2333 struct re_pattern_buffer *bufp;
2334 const char *string1, *string2;
2337 struct re_registers *regs;
2340 /* General temporaries. */
2344 /* Just past the end of the corresponding string. */
2345 const char *end1, *end2;
2347 /* Pointers into string1 and string2, just past the last characters in
2348 each to consider matching. */
2349 const char *end_match_1, *end_match_2;
2351 /* Where we are in the data, and the end of the current string. */
2352 const char *d, *dend;
2354 /* Where we are in the pattern, and the end of the pattern. */
2355 unsigned char *p = bufp->buffer;
2356 register unsigned char *pend = p + bufp->used;
2358 /* We use this to map every character in the string. */
2359 char *translate = bufp->translate;
2361 /* Failure point stack. Each place that can handle a failure further
2362 down the line pushes a failure point on this stack. It consists of
2363 restart, regend, and reg_info for all registers corresponding to
2364 the subexpressions we're currently inside, plus the number of such
2365 registers, and, finally, two char *'s. The first char * is where
2366 to resume scanning the pattern; the second one is where to resume
2367 scanning the strings. If the latter is zero, the failure point is
2368 a ``dummy''; if a failure happens and the failure point is a dummy,
2369 it gets discarded and the next next one is tried. */
2370 fail_stack_type fail_stack;
2372 /* We fill all the registers internally, independent of what we
2373 return, for use in backreferences. The number here includes
2374 an element for register zero. */
2375 size_t num_regs = bufp->re_nsub + 1;
2377 /* The currently active registers. */
2378 active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
2379 active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
2381 /* Information on the contents of registers. These are pointers into
2382 the input strings; they record just what was matched (on this
2383 attempt) by a subexpression part of the pattern, that is, the
2384 regnum-th regstart pointer points to where in the pattern we began
2385 matching and the regnum-th regend points to right after where we
2386 stopped matching the regnum-th subexpression. (The zeroth register
2387 keeps track of what the whole pattern matches.) */
2388 const char **regstart = 0, **regend = 0;
2390 /* If a group that's operated upon by a repetition operator fails to
2391 match anything, then the register for its start will need to be
2392 restored because it will have been set to wherever in the string we
2393 are when we last see its open-group operator. Similarly for a
2395 const char **old_regstart = 0, **old_regend = 0;
2397 /* The is_active field of reg_info helps us keep track of which (possibly
2398 nested) subexpressions we are currently in. The matched_something
2399 field of reg_info[reg_num] helps us tell whether or not we have
2400 matched any of the pattern so far this time through the reg_num-th
2401 subexpression. These two fields get reset each time through any
2402 loop their register is in. */
2403 register_info_type *reg_info = 0;
2405 /* The following record the register info as found in the above
2406 variables when we find a match better than any we've seen before.
2407 This happens as we backtrack through the failure points, which in
2408 turn happens only if we have not yet matched the entire string. */
2409 unsigned best_regs_set = false;
2410 const char **best_regstart = 0, **best_regend = 0;
2412 /* Logically, this is `best_regend[0]'. But we don't want to have to
2413 allocate space for that if we're not allocating space for anything
2414 else (see below). Also, we never need info about register 0 for
2415 any of the other register vectors, and it seems rather a kludge to
2416 treat `best_regend' differently than the rest. So we keep track of
2417 the end of the best match so far in a separate variable. We
2418 initialize this to NULL so that when we backtrack the first time
2419 and need to test it, it's not garbage. */
2420 const char *match_end = NULL;
2422 /* Used when we pop values we don't care about. */
2423 const char **reg_dummy = 0;
2424 register_info_type *reg_info_dummy = 0;
2426 DEBUG_PRINT1("\n\nEntering re_match_2.\n");
2430 /* Do not bother to initialize all the register variables if there are
2431 no groups in the pattern, as it takes a fair amount of time. If
2432 there are groups, we include space for register 0 (the whole
2433 pattern), even though we never use it, since it simplifies the
2434 array indexing. We should fix this. */
2435 if (bufp->re_nsub) {
2436 regstart = REGEX_TALLOC(num_regs, const char *);
2437 regend = REGEX_TALLOC(num_regs, const char *);
2438 old_regstart = REGEX_TALLOC(num_regs, const char *);
2439 old_regend = REGEX_TALLOC(num_regs, const char *);
2440 best_regstart = REGEX_TALLOC(num_regs, const char *);
2441 best_regend = REGEX_TALLOC(num_regs, const char *);
2442 reg_info = REGEX_TALLOC(num_regs, register_info_type);
2443 reg_dummy = REGEX_TALLOC(num_regs, const char *);
2445 REGEX_TALLOC(num_regs, register_info_type);
2448 (regstart && regend && old_regstart && old_regend
2449 && reg_info && best_regstart && best_regend
2450 && reg_dummy && reg_info_dummy)) {
2456 /* The starting position is bogus. */
2457 if (pos < 0 || pos > size1 + size2) {
2462 /* Initialize subexpression text positions to -1 to mark ones that no
2463 start_memory/stop_memory has been seen for. Also initialize the
2464 register information struct. */
2465 for (mcnt = 1; mcnt < num_regs; mcnt++) {
2466 regstart[mcnt] = regend[mcnt]
2467 = old_regstart[mcnt] = old_regend[mcnt] =
2470 REG_MATCH_NULL_STRING_P(reg_info[mcnt]) =
2471 MATCH_NULL_UNSET_VALUE;
2472 IS_ACTIVE(reg_info[mcnt]) = 0;
2473 MATCHED_SOMETHING(reg_info[mcnt]) = 0;
2474 EVER_MATCHED_SOMETHING(reg_info[mcnt]) = 0;
2477 /* We move `string1' into `string2' if the latter's empty -- but not if
2478 `string1' is null. */
2479 if (size2 == 0 && string1 != NULL) {
2485 end1 = string1 + size1;
2486 end2 = string2 + size2;
2488 /* Compute where to stop matching, within the two strings. */
2489 if (stop <= size1) {
2490 end_match_1 = string1 + stop;
2491 end_match_2 = string2;
2494 end_match_2 = string2 + stop - size1;
2497 /* `p' scans through the pattern as `d' scans through the data.
2498 `dend' is the end of the input string that `d' points within. `d'
2499 is advanced into the following input string whenever necessary, but
2500 this happens before fetching; therefore, at the beginning of the
2501 loop, `d' can be pointing at the end of a string, but it cannot
2503 if (size1 > 0 && pos <= size1) {
2507 d = string2 + pos - size1;
2511 DEBUG_PRINT1("The compiled pattern is: ");
2512 DEBUG_PRINT_COMPILED_PATTERN(bufp, p, pend);
2513 DEBUG_PRINT1("The string to match is: `");
2514 DEBUG_PRINT_DOUBLE_STRING(d, string1, size1, string2, size2);
2515 DEBUG_PRINT1("'\n");
2517 /* This loops over pattern commands. It exits by returning from the
2518 function if the match is complete, or it drops through if the match
2519 fails at this starting point in the input data. */
2521 DEBUG_PRINT2("\n0x%x: ", p);
2523 if (p == pend) { /* End of pattern means we might have succeeded. */
2524 DEBUG_PRINT1("end of pattern ... ");
2526 /* If we haven't matched the entire string, and we want the
2527 longest match, try backtracking. */
2528 if (d != end_match_2) {
2529 DEBUG_PRINT1("backtracking.\n");
2531 if (!FAIL_STACK_EMPTY()) { /* More failure points to try. */
2532 boolean same_str_p =
2533 (FIRST_STRING_P(match_end)
2534 == MATCHING_IN_FIRST_STRING);
2536 /* If exceeds best match so far, save it. */
2542 !MATCHING_IN_FIRST_STRING))
2544 best_regs_set = true;
2548 ("\nSAVING match as best so far.\n");
2563 /* If no failure points, don't restore garbage. */
2564 else if (best_regs_set) {
2566 /* Restore best match. It may happen that `dend ==
2567 end_match_1' while the restored d is in string2.
2568 For example, the pattern `x.*y.*z' against the
2569 strings `x-' and `y-z-', if the two strings are
2570 not consecutive in memory. */
2572 ("Restoring best registers.\n");
2575 dend = ((d >= string1 && d <= end1)
2579 for (mcnt = 1; mcnt < num_regs;
2582 best_regstart[mcnt];
2588 /* d != end_match_2 */
2589 DEBUG_PRINT1("Accepting match.\n");
2591 /* If caller wants register contents data back, do it. */
2592 if (regs && !bufp->no_sub) {
2593 /* Have the register data arrays been allocated? */
2594 if (bufp->regs_allocated == REGS_UNALLOCATED) { /* No. So allocate them with malloc. We need one
2595 extra element beyond `num_regs' for the `-1' marker
2598 MAX(RE_NREGS, num_regs + 1);
2600 TALLOC(regs->num_regs,
2603 TALLOC(regs->num_regs,
2605 if (regs->start == NULL
2606 || regs->end == NULL)
2608 bufp->regs_allocated =
2610 } else if (bufp->regs_allocated == REGS_REALLOCATE) { /* Yes. If we need more elements than were already
2611 allocated, reallocate them. If we need fewer, just
2613 if (regs->num_regs < num_regs + 1) {
2616 RETALLOC(regs->start,
2622 if (regs->start == NULL
2623 || regs->end == NULL)
2627 /* These braces fend off a "empty body in an else-statement"
2628 warning under GCC when assert expands to nothing. */
2629 assert(bufp->regs_allocated ==
2633 /* Convert the pointer data in `regstart' and `regend' to
2634 indices. Register zero has to be set differently,
2635 since we haven't kept track of any info for it. */
2636 if (regs->num_regs > 0) {
2637 regs->start[0] = pos;
2639 (MATCHING_IN_FIRST_STRING ? d -
2640 string1 : d - string2 +
2644 /* Go through the first `min (num_regs, regs->num_regs)'
2645 registers, since that is all we initialized. */
2647 mcnt < MIN(num_regs, regs->num_regs);
2649 if (REG_UNSET(regstart[mcnt])
2650 || REG_UNSET(regend[mcnt]))
2652 regs->end[mcnt] = -1;
2663 /* If the regs structure we return has more elements than
2664 were in the pattern, set the extra elements to -1. If
2665 we (re)allocated the registers, this is the case,
2666 because we always allocate enough to have at least one
2668 for (mcnt = num_regs;
2669 mcnt < regs->num_regs; mcnt++)
2671 regs->end[mcnt] = -1;
2673 /* regs && !bufp->no_sub */
2676 ("%u failure points pushed, %u popped (%u remain).\n",
2677 nfailure_points_pushed,
2678 nfailure_points_popped,
2679 nfailure_points_pushed -
2680 nfailure_points_popped);
2681 DEBUG_PRINT2("%u registers pushed.\n",
2684 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
2685 ? string1 : string2 - size1);
2687 DEBUG_PRINT2("Returning %d from re_match_2.\n",
2693 /* Otherwise match next pattern command. */
2694 switch ((re_opcode_t) * p++) {
2695 /* Ignore these. Used to ignore the n of succeed_n's which
2696 currently have n == 0. */
2698 DEBUG_PRINT1("EXECUTING no_op.\n");
2702 /* Match the next n pattern characters exactly. The following
2703 byte in the pattern defines n, and the n bytes after that
2704 are the characters to match. */
2707 DEBUG_PRINT2("EXECUTING exactn %d.\n", mcnt);
2709 /* This is written out as an if-else so we don't waste time
2710 testing `translate' inside the loop. */
2714 if (translate[(unsigned char) *d++]
2722 if (*d++ != (char) *p++)
2731 /* Match any character except possibly a newline or a null. */
2733 DEBUG_PRINT1("EXECUTING anychar.\n");
2737 if ((!(bufp->syntax & RE_DOT_NEWLINE)
2738 && TRANSLATE(*d) == '\n')
2739 || (bufp->syntax & RE_DOT_NOT_NULL
2740 && TRANSLATE(*d) == '\000'))
2744 DEBUG_PRINT2(" Matched `%d'.\n", *d);
2752 register unsigned char c;
2754 (re_opcode_t) * (p - 1) == charset_not;
2756 DEBUG_PRINT2("EXECUTING charset%s.\n",