Ruby 2.7.7p221 (2022-11-24 revision 168ec2b1e5ad0e4688e963d9de019557c78feed9)
regcomp.c
Go to the documentation of this file.
1/**********************************************************************
2 regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regparse.h"
32
34
37{
39}
40
41extern int
43{
44 OnigDefaultCaseFoldFlag = case_fold_flag;
45 return 0;
46}
47
48
49#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51#endif
52
53#if 0
54static UChar*
55str_dup(UChar* s, UChar* end)
56{
57 ptrdiff_t len = end - s;
58
59 if (len > 0) {
60 UChar* r = (UChar* )xmalloc(len + 1);
62 xmemcpy(r, s, len);
63 r[len] = (UChar )0;
64 return r;
65 }
66 else return NULL;
67}
68#endif
69
70static void
71swap_node(Node* a, Node* b)
72{
73 Node c;
74 c = *a; *a = *b; *b = c;
75
76 if (NTYPE(a) == NT_STR) {
77 StrNode* sn = NSTR(a);
78 if (sn->capa == 0) {
79 size_t len = sn->end - sn->s;
80 sn->s = sn->buf;
81 sn->end = sn->s + len;
82 }
83 }
84
85 if (NTYPE(b) == NT_STR) {
86 StrNode* sn = NSTR(b);
87 if (sn->capa == 0) {
88 size_t len = sn->end - sn->s;
89 sn->s = sn->buf;
90 sn->end = sn->s + len;
91 }
92 }
93}
94
95static OnigDistance
96distance_add(OnigDistance d1, OnigDistance d2)
97{
100 else {
101 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102 else return ONIG_INFINITE_DISTANCE;
103 }
104}
105
106static OnigDistance
107distance_multiply(OnigDistance d, int m)
108{
109 if (m == 0) return 0;
110
111 if (d < ONIG_INFINITE_DISTANCE / m)
112 return d * m;
113 else
115}
116
117static int
118bitset_is_empty(BitSetRef bs)
119{
120 int i;
121 for (i = 0; i < BITSET_SIZE; i++) {
122 if (bs[i] != 0) return 0;
123 }
124 return 1;
125}
126
127#ifdef ONIG_DEBUG
128static int
129bitset_on_num(BitSetRef bs)
130{
131 int i, n;
132
133 n = 0;
134 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135 if (BITSET_AT(bs, i)) n++;
136 }
137 return n;
138}
139#endif
140
141extern int
143{
144 if (size <= 0) {
145 size = 0;
146 buf->p = NULL;
147 }
148 else {
149 buf->p = (UChar* )xmalloc(size);
150 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
151 }
152
153 buf->alloc = (unsigned int )size;
154 buf->used = 0;
155 return 0;
156}
157
158
159#ifdef USE_SUBEXP_CALL
160
161static int
162unset_addr_list_init(UnsetAddrList* uslist, int size)
163{
164 UnsetAddr* p;
165
166 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
168 uslist->num = 0;
169 uslist->alloc = size;
170 uslist->us = p;
171 return 0;
172}
173
174static void
175unset_addr_list_end(UnsetAddrList* uslist)
176{
177 if (IS_NOT_NULL(uslist->us))
178 xfree(uslist->us);
179}
180
181static int
182unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
183{
184 UnsetAddr* p;
185 int size;
186
187 if (uslist->num >= uslist->alloc) {
188 size = uslist->alloc * 2;
189 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
191 uslist->alloc = size;
192 uslist->us = p;
193 }
194
195 uslist->us[uslist->num].offset = offset;
196 uslist->us[uslist->num].target = node;
197 uslist->num++;
198 return 0;
199}
200#endif /* USE_SUBEXP_CALL */
201
202
203static int
204add_opcode(regex_t* reg, int opcode)
205{
206 BBUF_ADD1(reg, opcode);
207 return 0;
208}
209
210#ifdef USE_COMBINATION_EXPLOSION_CHECK
211static int
212add_state_check_num(regex_t* reg, int num)
213{
215
217 return 0;
218}
219#endif
220
221static int
222add_rel_addr(regex_t* reg, int addr)
223{
224 RelAddrType ra = (RelAddrType )addr;
225
226 BBUF_ADD(reg, &ra, SIZE_RELADDR);
227 return 0;
228}
229
230static int
231add_abs_addr(regex_t* reg, int addr)
232{
233 AbsAddrType ra = (AbsAddrType )addr;
234
235 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
236 return 0;
237}
238
239static int
240add_length(regex_t* reg, OnigDistance len)
241{
243
244 BBUF_ADD(reg, &l, SIZE_LENGTH);
245 return 0;
246}
247
248static int
249add_mem_num(regex_t* reg, int num)
250{
251 MemNumType n = (MemNumType )num;
252
253 BBUF_ADD(reg, &n, SIZE_MEMNUM);
254 return 0;
255}
256
257#if 0
258static int
259add_pointer(regex_t* reg, void* addr)
260{
261 PointerType ptr = (PointerType )addr;
262
263 BBUF_ADD(reg, &ptr, SIZE_POINTER);
264 return 0;
265}
266#endif
267
268static int
269add_option(regex_t* reg, OnigOptionType option)
270{
271 BBUF_ADD(reg, &option, SIZE_OPTION);
272 return 0;
273}
274
275static int
276add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
277{
278 int r;
279
280 r = add_opcode(reg, opcode);
281 if (r) return r;
282 r = add_rel_addr(reg, addr);
283 return r;
284}
285
286static int
287add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
288{
289 BBUF_ADD(reg, bytes, len);
290 return 0;
291}
292
293static int
294add_bitset(regex_t* reg, BitSetRef bs)
295{
296 BBUF_ADD(reg, bs, SIZE_BITSET);
297 return 0;
298}
299
300static int
301add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
302{
303 int r;
304
305 r = add_opcode(reg, opcode);
306 if (r) return r;
307 r = add_option(reg, option);
308 return r;
309}
310
311static int compile_length_tree(Node* node, regex_t* reg);
312static int compile_tree(Node* node, regex_t* reg);
313
314
315#define IS_NEED_STR_LEN_OP_EXACT(op) \
316 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
317 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
318
319static int
320select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
321{
322 int op;
323 OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
324
325 if (ignore_case) {
326 switch (str_len) {
327 case 1: op = OP_EXACT1_IC; break;
328 default: op = OP_EXACTN_IC; break;
329 }
330 }
331 else {
332 switch (mb_len) {
333 case 1:
334 switch (str_len) {
335 case 1: op = OP_EXACT1; break;
336 case 2: op = OP_EXACT2; break;
337 case 3: op = OP_EXACT3; break;
338 case 4: op = OP_EXACT4; break;
339 case 5: op = OP_EXACT5; break;
340 default: op = OP_EXACTN; break;
341 }
342 break;
343
344 case 2:
345 switch (str_len) {
346 case 1: op = OP_EXACTMB2N1; break;
347 case 2: op = OP_EXACTMB2N2; break;
348 case 3: op = OP_EXACTMB2N3; break;
349 default: op = OP_EXACTMB2N; break;
350 }
351 break;
352
353 case 3:
354 op = OP_EXACTMB3N;
355 break;
356
357 default:
358 op = OP_EXACTMBN;
359 break;
360 }
361 }
362 return op;
363}
364
365static int
366compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
367{
368 int r;
369 int saved_num_null_check = reg->num_null_check;
370
371 if (empty_info != 0) {
372 r = add_opcode(reg, OP_NULL_CHECK_START);
373 if (r) return r;
374 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
375 if (r) return r;
376 reg->num_null_check++;
377 }
378
379 r = compile_tree(node, reg);
380 if (r) return r;
381
382 if (empty_info != 0) {
383 if (empty_info == NQ_TARGET_IS_EMPTY)
384 r = add_opcode(reg, OP_NULL_CHECK_END);
385 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
386 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
387 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
388 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
389
390 if (r) return r;
391 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
392 }
393 return r;
394}
395
396#ifdef USE_SUBEXP_CALL
397static int
398compile_call(CallNode* node, regex_t* reg)
399{
400 int r;
401
402 r = add_opcode(reg, OP_CALL);
403 if (r) return r;
404 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
405 node->target);
406 if (r) return r;
407 r = add_abs_addr(reg, 0 /*dummy addr.*/);
408 return r;
409}
410#endif
411
412static int
413compile_tree_n_times(Node* node, int n, regex_t* reg)
414{
415 int i, r;
416
417 for (i = 0; i < n; i++) {
418 r = compile_tree(node, reg);
419 if (r) return r;
420 }
421 return 0;
422}
423
424static int
425add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
426 regex_t* reg ARG_UNUSED, int ignore_case)
427{
428 int len;
429 int op = select_str_opcode(mb_len, byte_len, ignore_case);
430
432
433 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
435 len += SIZE_LENGTH;
436
437 len += (int )byte_len;
438 return len;
439}
440
441static int
442add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
443 regex_t* reg, int ignore_case)
444{
445 int op = select_str_opcode(mb_len, byte_len, ignore_case);
446 add_opcode(reg, op);
447
448 if (op == OP_EXACTMBN)
449 add_length(reg, mb_len);
450
451 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
452 if (op == OP_EXACTN_IC)
453 add_length(reg, byte_len);
454 else
455 add_length(reg, byte_len / mb_len);
456 }
457
458 add_bytes(reg, s, byte_len);
459 return 0;
460}
461
462
463static int
464compile_length_string_node(Node* node, regex_t* reg)
465{
466 int rlen, r, len, prev_len, blen, ambig;
467 OnigEncoding enc = reg->enc;
468 UChar *p, *prev;
469 StrNode* sn;
470
471 sn = NSTR(node);
472 if (sn->end <= sn->s)
473 return 0;
474
475 ambig = NSTRING_IS_AMBIG(node);
476
477 p = prev = sn->s;
478 prev_len = enclen(enc, p, sn->end);
479 p += prev_len;
480 blen = prev_len;
481 rlen = 0;
482
483 for (; p < sn->end; ) {
484 len = enclen(enc, p, sn->end);
485 if (len == prev_len || ambig) {
486 blen += len;
487 }
488 else {
489 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
490 rlen += r;
491 prev = p;
492 blen = len;
493 prev_len = len;
494 }
495 p += len;
496 }
497 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
498 rlen += r;
499 return rlen;
500}
501
502static int
503compile_length_string_raw_node(StrNode* sn, regex_t* reg)
504{
505 if (sn->end <= sn->s)
506 return 0;
507
508 return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
509}
510
511static int
512compile_string_node(Node* node, regex_t* reg)
513{
514 int r, len, prev_len, blen, ambig;
515 OnigEncoding enc = reg->enc;
516 UChar *p, *prev, *end;
517 StrNode* sn;
518
519 sn = NSTR(node);
520 if (sn->end <= sn->s)
521 return 0;
522
523 end = sn->end;
524 ambig = NSTRING_IS_AMBIG(node);
525
526 p = prev = sn->s;
527 prev_len = enclen(enc, p, end);
528 p += prev_len;
529 blen = prev_len;
530
531 for (; p < end; ) {
532 len = enclen(enc, p, end);
533 if (len == prev_len || ambig) {
534 blen += len;
535 }
536 else {
537 r = add_compile_string(prev, prev_len, blen, reg, ambig);
538 if (r) return r;
539
540 prev = p;
541 blen = len;
542 prev_len = len;
543 }
544
545 p += len;
546 }
547 return add_compile_string(prev, prev_len, blen, reg, ambig);
548}
549
550static int
551compile_string_raw_node(StrNode* sn, regex_t* reg)
552{
553 if (sn->end <= sn->s)
554 return 0;
555
556 return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
557}
558
559static int
560add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
561{
562#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
563 add_length(reg, mbuf->used);
564 return add_bytes(reg, mbuf->p, mbuf->used);
565#else
566 int r, pad_size;
568
569 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
570 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
571 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
572
573 r = add_bytes(reg, mbuf->p, mbuf->used);
574
575 /* padding for return value from compile_length_cclass_node() to be fix. */
576 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
577 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
578 return r;
579#endif
580}
581
582static int
583compile_length_cclass_node(CClassNode* cc, regex_t* reg)
584{
585 int len;
586
587 if (IS_NULL(cc->mbuf)) {
589 }
590 else {
591 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
593 }
594 else {
596 }
597#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
598 len += SIZE_LENGTH + cc->mbuf->used;
599#else
600 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
601#endif
602 }
603
604 return len;
605}
606
607static int
608compile_cclass_node(CClassNode* cc, regex_t* reg)
609{
610 int r;
611
612 if (IS_NULL(cc->mbuf)) {
613 if (IS_NCCLASS_NOT(cc))
614 add_opcode(reg, OP_CCLASS_NOT);
615 else
616 add_opcode(reg, OP_CCLASS);
617
618 r = add_bitset(reg, cc->bs);
619 }
620 else {
621 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
622 if (IS_NCCLASS_NOT(cc))
623 add_opcode(reg, OP_CCLASS_MB_NOT);
624 else
625 add_opcode(reg, OP_CCLASS_MB);
626
627 r = add_multi_byte_cclass(cc->mbuf, reg);
628 }
629 else {
630 if (IS_NCCLASS_NOT(cc))
631 add_opcode(reg, OP_CCLASS_MIX_NOT);
632 else
633 add_opcode(reg, OP_CCLASS_MIX);
634
635 r = add_bitset(reg, cc->bs);
636 if (r) return r;
637 r = add_multi_byte_cclass(cc->mbuf, reg);
638 }
639 }
640
641 return r;
642}
643
644static int
645entry_repeat_range(regex_t* reg, int id, int lower, int upper)
646{
647#define REPEAT_RANGE_ALLOC 4
648
650
651 if (reg->repeat_range_alloc == 0) {
654 reg->repeat_range = p;
656 }
657 else if (reg->repeat_range_alloc <= id) {
658 int n;
661 sizeof(OnigRepeatRange) * n);
663 reg->repeat_range = p;
664 reg->repeat_range_alloc = n;
665 }
666 else {
667 p = reg->repeat_range;
668 }
669
670 p[id].lower = lower;
671 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
672 return 0;
673}
674
675static int
676compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
677 regex_t* reg)
678{
679 int r;
680 int num_repeat = reg->num_repeat;
681
682 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
683 if (r) return r;
684 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
685 reg->num_repeat++;
686 if (r) return r;
687 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
688 if (r) return r;
689
690 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
691 if (r) return r;
692
693 r = compile_tree_empty_check(qn->target, reg, empty_info);
694 if (r) return r;
695
696 if (
697#ifdef USE_SUBEXP_CALL
698 reg->num_call > 0 ||
699#endif
701 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
702 }
703 else {
704 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
705 }
706 if (r) return r;
707 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
708 return r;
709}
710
711static int
712is_anychar_star_quantifier(QtfrNode* qn)
713{
714 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
715 NTYPE(qn->target) == NT_CANY)
716 return 1;
717 else
718 return 0;
719}
720
721#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
722#define CKN_ON (ckn > 0)
723
724#ifdef USE_COMBINATION_EXPLOSION_CHECK
725
726static int
727compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
728{
729 int len, mod_tlen, cklen;
730 int ckn;
731 int infinite = IS_REPEAT_INFINITE(qn->upper);
732 int empty_info = qn->target_empty_info;
733 int tlen = compile_length_tree(qn->target, reg);
734
735 if (tlen < 0) return tlen;
736
737 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
738
739 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
740
741 /* anychar repeat */
742 if (NTYPE(qn->target) == NT_CANY) {
743 if (qn->greedy && infinite) {
745 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
746 else
747 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
748 }
749 }
750
751 if (empty_info != 0)
753 else
754 mod_tlen = tlen;
755
756 if (infinite && qn->lower <= 1) {
757 if (qn->greedy) {
758 if (qn->lower == 1)
760 else
761 len = 0;
762
763 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
764 }
765 else {
766 if (qn->lower == 0)
768 else
769 len = 0;
770
771 len += mod_tlen + SIZE_OP_PUSH + cklen;
772 }
773 }
774 else if (qn->upper == 0) {
775 if (qn->is_referred != 0) /* /(?<n>..){0}/ */
776 len = SIZE_OP_JUMP + tlen;
777 else
778 len = 0;
779 }
780 else if (qn->upper == 1 && qn->greedy) {
781 if (qn->lower == 0) {
782 if (CKN_ON) {
783 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
784 }
785 else {
786 len = SIZE_OP_PUSH + tlen;
787 }
788 }
789 else {
790 len = tlen;
791 }
792 }
793 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
794 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
795 }
796 else {
798 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
799 if (CKN_ON)
800 len += SIZE_OP_STATE_CHECK;
801 }
802
803 return len;
804}
805
806static int
807compile_quantifier_node(QtfrNode* qn, regex_t* reg)
808{
809 int r, mod_tlen;
810 int ckn;
811 int infinite = IS_REPEAT_INFINITE(qn->upper);
812 int empty_info = qn->target_empty_info;
813 int tlen = compile_length_tree(qn->target, reg);
814
815 if (tlen < 0) return tlen;
816
817 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
818
819 if (is_anychar_star_quantifier(qn)) {
820 r = compile_tree_n_times(qn->target, qn->lower, reg);
821 if (r) return r;
822 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
823 if (IS_MULTILINE(reg->options))
824 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
825 else
826 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
827 if (r) return r;
828 if (CKN_ON) {
829 r = add_state_check_num(reg, ckn);
830 if (r) return r;
831 }
832
833 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
834 }
835 else {
836 if (IS_MULTILINE(reg->options)) {
837 r = add_opcode(reg, (CKN_ON ?
840 }
841 else {
842 r = add_opcode(reg, (CKN_ON ?
844 : OP_ANYCHAR_STAR));
845 }
846 if (r) return r;
847 if (CKN_ON)
848 r = add_state_check_num(reg, ckn);
849
850 return r;
851 }
852 }
853
854 if (empty_info != 0)
856 else
857 mod_tlen = tlen;
858
859 if (infinite && qn->lower <= 1) {
860 if (qn->greedy) {
861 if (qn->lower == 1) {
862 r = add_opcode_rel_addr(reg, OP_JUMP,
863 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
864 if (r) return r;
865 }
866
867 if (CKN_ON) {
868 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
869 if (r) return r;
870 r = add_state_check_num(reg, ckn);
871 if (r) return r;
872 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
873 }
874 else {
875 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
876 }
877 if (r) return r;
878 r = compile_tree_empty_check(qn->target, reg, empty_info);
879 if (r) return r;
880 r = add_opcode_rel_addr(reg, OP_JUMP,
881 -(mod_tlen + (int )SIZE_OP_JUMP
882 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
883 }
884 else {
885 if (qn->lower == 0) {
886 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
887 if (r) return r;
888 }
889 r = compile_tree_empty_check(qn->target, reg, empty_info);
890 if (r) return r;
891 if (CKN_ON) {
892 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
893 if (r) return r;
894 r = add_state_check_num(reg, ckn);
895 if (r) return r;
896 r = add_rel_addr(reg,
897 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
898 }
899 else
900 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
901 }
902 }
903 else if (qn->upper == 0) {
904 if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
905 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
906 if (r) return r;
907 r = compile_tree(qn->target, reg);
908 }
909 else
910 r = 0;
911 }
912 else if (qn->upper == 1 && qn->greedy) {
913 if (qn->lower == 0) {
914 if (CKN_ON) {
915 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
916 if (r) return r;
917 r = add_state_check_num(reg, ckn);
918 if (r) return r;
919 r = add_rel_addr(reg, tlen);
920 }
921 else {
922 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
923 }
924 if (r) return r;
925 }
926
927 r = compile_tree(qn->target, reg);
928 }
929 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
930 if (CKN_ON) {
931 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
932 if (r) return r;
933 r = add_state_check_num(reg, ckn);
934 if (r) return r;
935 r = add_rel_addr(reg, SIZE_OP_JUMP);
936 }
937 else {
938 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
939 }
940
941 if (r) return r;
942 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
943 if (r) return r;
944 r = compile_tree(qn->target, reg);
945 }
946 else {
947 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
948 if (CKN_ON) {
949 if (r) return r;
950 r = add_opcode(reg, OP_STATE_CHECK);
951 if (r) return r;
952 r = add_state_check_num(reg, ckn);
953 }
954 }
955 return r;
956}
957
958#else /* USE_COMBINATION_EXPLOSION_CHECK */
959
960static int
961compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
962{
963 int len, mod_tlen;
964 int infinite = IS_REPEAT_INFINITE(qn->upper);
965 int empty_info = qn->target_empty_info;
966 int tlen = compile_length_tree(qn->target, reg);
967
968 if (tlen < 0) return tlen;
969
970 /* anychar repeat */
971 if (NTYPE(qn->target) == NT_CANY) {
972 if (qn->greedy && infinite) {
974 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
975 else
976 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
977 }
978 }
979
980 if (empty_info != 0)
982 else
983 mod_tlen = tlen;
984
985 if (infinite &&
986 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
987 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
989 }
990 else {
991 len = tlen * qn->lower;
992 }
993
994 if (qn->greedy) {
995#ifdef USE_OP_PUSH_OR_JUMP_EXACT
996 if (IS_NOT_NULL(qn->head_exact))
998 else
999#endif
1002 else
1003 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1004 }
1005 else
1006 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1007 }
1008 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1009 len = SIZE_OP_JUMP + tlen;
1010 }
1011 else if (!infinite && qn->greedy &&
1012 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1014 len = tlen * qn->lower;
1015 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1016 }
1017 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1018 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1019 }
1020 else {
1022 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1023 }
1024
1025 return len;
1026}
1027
1028static int
1029compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1030{
1031 int i, r, mod_tlen;
1032 int infinite = IS_REPEAT_INFINITE(qn->upper);
1033 int empty_info = qn->target_empty_info;
1034 int tlen = compile_length_tree(qn->target, reg);
1035
1036 if (tlen < 0) return tlen;
1037
1038 if (is_anychar_star_quantifier(qn)) {
1039 r = compile_tree_n_times(qn->target, qn->lower, reg);
1040 if (r) return r;
1041 if (IS_NOT_NULL(qn->next_head_exact)) {
1042 if (IS_MULTILINE(reg->options))
1043 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1044 else
1045 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1046 if (r) return r;
1047 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1048 }
1049 else {
1050 if (IS_MULTILINE(reg->options))
1051 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1052 else
1053 return add_opcode(reg, OP_ANYCHAR_STAR);
1054 }
1055 }
1056
1057 if (empty_info != 0)
1059 else
1060 mod_tlen = tlen;
1061
1062 if (infinite &&
1063 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1064 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1065 if (qn->greedy) {
1066#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1067 if (IS_NOT_NULL(qn->head_exact))
1068 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1069 else
1070#endif
1072 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1073 else
1074 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1075 }
1076 else {
1077 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1078 }
1079 if (r) return r;
1080 }
1081 else {
1082 r = compile_tree_n_times(qn->target, qn->lower, reg);
1083 if (r) return r;
1084 }
1085
1086 if (qn->greedy) {
1087#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1088 if (IS_NOT_NULL(qn->head_exact)) {
1089 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1090 mod_tlen + SIZE_OP_JUMP);
1091 if (r) return r;
1092 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1093 r = compile_tree_empty_check(qn->target, reg, empty_info);
1094 if (r) return r;
1095 r = add_opcode_rel_addr(reg, OP_JUMP,
1096 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1097 }
1098 else
1099#endif
1100 if (IS_NOT_NULL(qn->next_head_exact)) {
1101 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1102 mod_tlen + SIZE_OP_JUMP);
1103 if (r) return r;
1104 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1105 r = compile_tree_empty_check(qn->target, reg, empty_info);
1106 if (r) return r;
1107 r = add_opcode_rel_addr(reg, OP_JUMP,
1108 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1109 }
1110 else {
1111 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1112 if (r) return r;
1113 r = compile_tree_empty_check(qn->target, reg, empty_info);
1114 if (r) return r;
1115 r = add_opcode_rel_addr(reg, OP_JUMP,
1116 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1117 }
1118 }
1119 else {
1120 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1121 if (r) return r;
1122 r = compile_tree_empty_check(qn->target, reg, empty_info);
1123 if (r) return r;
1124 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1125 }
1126 }
1127 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1128 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1129 if (r) return r;
1130 r = compile_tree(qn->target, reg);
1131 }
1132 else if (!infinite && qn->greedy &&
1133 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1135 int n = qn->upper - qn->lower;
1136
1137 r = compile_tree_n_times(qn->target, qn->lower, reg);
1138 if (r) return r;
1139
1140 for (i = 0; i < n; i++) {
1141 r = add_opcode_rel_addr(reg, OP_PUSH,
1142 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1143 if (r) return r;
1144 r = compile_tree(qn->target, reg);
1145 if (r) return r;
1146 }
1147 }
1148 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1149 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1150 if (r) return r;
1151 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1152 if (r) return r;
1153 r = compile_tree(qn->target, reg);
1154 }
1155 else {
1156 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1157 }
1158 return r;
1159}
1160#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1161
1162static int
1163compile_length_option_node(EncloseNode* node, regex_t* reg)
1164{
1165 int tlen;
1166 OnigOptionType prev = reg->options;
1167
1168 reg->options = node->option;
1169 tlen = compile_length_tree(node->target, reg);
1170 reg->options = prev;
1171
1172 if (tlen < 0) return tlen;
1173
1174 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1176 + tlen + SIZE_OP_SET_OPTION;
1177 }
1178 else
1179 return tlen;
1180}
1181
1182static int
1183compile_option_node(EncloseNode* node, regex_t* reg)
1184{
1185 int r;
1186 OnigOptionType prev = reg->options;
1187
1188 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1189 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1190 if (r) return r;
1191 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1192 if (r) return r;
1193 r = add_opcode(reg, OP_FAIL);
1194 if (r) return r;
1195 }
1196
1197 reg->options = node->option;
1198 r = compile_tree(node->target, reg);
1199 reg->options = prev;
1200
1201 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1202 if (r) return r;
1203 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1204 }
1205 return r;
1206}
1207
1208static int
1209compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1210{
1211 int len;
1212 int tlen;
1213
1214 if (node->type == ENCLOSE_OPTION)
1215 return compile_length_option_node(node, reg);
1216
1217 if (node->target) {
1218 tlen = compile_length_tree(node->target, reg);
1219 if (tlen < 0) return tlen;
1220 }
1221 else
1222 tlen = 0;
1223
1224 switch (node->type) {
1225 case ENCLOSE_MEMORY:
1226#ifdef USE_SUBEXP_CALL
1227 if (IS_ENCLOSE_CALLED(node)) {
1230 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1231 len += (IS_ENCLOSE_RECURSION(node)
1233 else
1234 len += (IS_ENCLOSE_RECURSION(node)
1236 }
1237 else if (IS_ENCLOSE_RECURSION(node)) {
1239 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1241 }
1242 else
1243#endif
1244 {
1245 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1247 else
1249
1250 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1252 }
1253 break;
1254
1257 QtfrNode* qn = NQTFR(node->target);
1258 tlen = compile_length_tree(qn->target, reg);
1259 if (tlen < 0) return tlen;
1260
1261 len = tlen * qn->lower
1263 }
1264 else {
1266 }
1267 break;
1268
1269 case ENCLOSE_CONDITION:
1271 if (NTYPE(node->target) == NT_ALT) {
1272 Node* x = node->target;
1273
1274 tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1275 if (tlen < 0) return tlen;
1276 len += tlen + SIZE_OP_JUMP;
1277 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1278 x = NCDR(x);
1279 tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1280 if (tlen < 0) return tlen;
1281 len += tlen;
1283 }
1284 else {
1285 return ONIGERR_PARSER_BUG;
1286 }
1287 break;
1288
1289 case ENCLOSE_ABSENT:
1291 break;
1292
1293 default:
1294 return ONIGERR_TYPE_BUG;
1295 break;
1296 }
1297
1298 return len;
1299}
1300
1301static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1302
1303static int
1304compile_enclose_node(EncloseNode* node, regex_t* reg)
1305{
1306 int r, len;
1307
1308 if (node->type == ENCLOSE_OPTION)
1309 return compile_option_node(node, reg);
1310
1311 switch (node->type) {
1312 case ENCLOSE_MEMORY:
1313#ifdef USE_SUBEXP_CALL
1314 if (IS_ENCLOSE_CALLED(node)) {
1315 r = add_opcode(reg, OP_CALL);
1316 if (r) return r;
1318 node->state |= NST_ADDR_FIXED;
1319 r = add_abs_addr(reg, (int )node->call_addr);
1320 if (r) return r;
1321 len = compile_length_tree(node->target, reg);
1323 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1324 len += (IS_ENCLOSE_RECURSION(node)
1326 else
1327 len += (IS_ENCLOSE_RECURSION(node)
1329
1330 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1331 if (r) return r;
1332 }
1333#endif
1334 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1335 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1336 else
1337 r = add_opcode(reg, OP_MEMORY_START);
1338 if (r) return r;
1339 r = add_mem_num(reg, node->regnum);
1340 if (r) return r;
1341 r = compile_tree(node->target, reg);
1342 if (r) return r;
1343#ifdef USE_SUBEXP_CALL
1344 if (IS_ENCLOSE_CALLED(node)) {
1345 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1346 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1348 else
1349 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1351
1352 if (r) return r;
1353 r = add_mem_num(reg, node->regnum);
1354 if (r) return r;
1355 r = add_opcode(reg, OP_RETURN);
1356 }
1357 else if (IS_ENCLOSE_RECURSION(node)) {
1358 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1359 r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1360 else
1361 r = add_opcode(reg, OP_MEMORY_END_REC);
1362 if (r) return r;
1363 r = add_mem_num(reg, node->regnum);
1364 }
1365 else
1366#endif
1367 {
1368 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1369 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1370 else
1371 r = add_opcode(reg, OP_MEMORY_END);
1372 if (r) return r;
1373 r = add_mem_num(reg, node->regnum);
1374 }
1375 break;
1376
1379 QtfrNode* qn = NQTFR(node->target);
1380 r = compile_tree_n_times(qn->target, qn->lower, reg);
1381 if (r) return r;
1382
1383 len = compile_length_tree(qn->target, reg);
1384 if (len < 0) return len;
1385
1386 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1387 if (r) return r;
1388 r = compile_tree(qn->target, reg);
1389 if (r) return r;
1390 r = add_opcode(reg, OP_POP);
1391 if (r) return r;
1392 r = add_opcode_rel_addr(reg, OP_JUMP,
1393 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1394 }
1395 else {
1396 r = add_opcode(reg, OP_PUSH_STOP_BT);
1397 if (r) return r;
1398 r = compile_tree(node->target, reg);
1399 if (r) return r;
1400 r = add_opcode(reg, OP_POP_STOP_BT);
1401 }
1402 break;
1403
1404 case ENCLOSE_CONDITION:
1405 r = add_opcode(reg, OP_CONDITION);
1406 if (r) return r;
1407 r = add_mem_num(reg, node->regnum);
1408 if (r) return r;
1409
1410 if (NTYPE(node->target) == NT_ALT) {
1411 Node* x = node->target;
1412 int len2;
1413
1414 len = compile_length_tree(NCAR(x), reg); /* yes-node */
1415 if (len < 0) return len;
1416 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1417 x = NCDR(x);
1418 len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1419 if (len2 < 0) return len2;
1421
1422 x = node->target;
1423 r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1424 if (r) return r;
1425 r = compile_tree(NCAR(x), reg); /* yes-node */
1426 if (r) return r;
1427 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1428 if (r) return r;
1429 x = NCDR(x);
1430 r = compile_tree(NCAR(x), reg); /* no-node */
1431 }
1432 else {
1433 return ONIGERR_PARSER_BUG;
1434 }
1435 break;
1436
1437 case ENCLOSE_ABSENT:
1438 len = compile_length_tree(node->target, reg);
1439 if (len < 0) return len;
1440
1441 r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1442 if (r) return r;
1443 r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1444 if (r) return r;
1445 r = compile_tree(node->target, reg);
1446 if (r) return r;
1447 r = add_opcode(reg, OP_ABSENT_END);
1448 break;
1449
1450 default:
1451 return ONIGERR_TYPE_BUG;
1452 break;
1453 }
1454
1455 return r;
1456}
1457
1458static int
1459compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1460{
1461 int len;
1462 int tlen = 0;
1463
1464 if (node->target) {
1465 tlen = compile_length_tree(node->target, reg);
1466 if (tlen < 0) return tlen;
1467 }
1468
1469 switch (node->type) {
1470 case ANCHOR_PREC_READ:
1472 break;
1475 break;
1476 case ANCHOR_LOOK_BEHIND:
1477 len = SIZE_OP_LOOK_BEHIND + tlen;
1478 break;
1481 break;
1482
1483 default:
1484 len = SIZE_OPCODE;
1485 break;
1486 }
1487
1488 return len;
1489}
1490
1491static int
1492compile_anchor_node(AnchorNode* node, regex_t* reg)
1493{
1494 int r, len;
1495
1496 switch (node->type) {
1497 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1498 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1499 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1500 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1501 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1502 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1503
1504 case ANCHOR_WORD_BOUND:
1505 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1506 else r = add_opcode(reg, OP_WORD_BOUND);
1507 break;
1509 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1510 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1511 break;
1512#ifdef USE_WORD_BEGIN_END
1513 case ANCHOR_WORD_BEGIN:
1514 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1515 else r = add_opcode(reg, OP_WORD_BEGIN);
1516 break;
1517 case ANCHOR_WORD_END:
1518 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1519 else r = add_opcode(reg, OP_WORD_END);
1520 break;
1521#endif
1522 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1523
1524 case ANCHOR_PREC_READ:
1525 r = add_opcode(reg, OP_PUSH_POS);
1526 if (r) return r;
1527 r = compile_tree(node->target, reg);
1528 if (r) return r;
1529 r = add_opcode(reg, OP_POP_POS);
1530 break;
1531
1533 len = compile_length_tree(node->target, reg);
1534 if (len < 0) return len;
1535 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1536 if (r) return r;
1537 r = compile_tree(node->target, reg);
1538 if (r) return r;
1539 r = add_opcode(reg, OP_FAIL_POS);
1540 break;
1541
1542 case ANCHOR_LOOK_BEHIND:
1543 {
1544 int n;
1545 r = add_opcode(reg, OP_LOOK_BEHIND);
1546 if (r) return r;
1547 if (node->char_len < 0) {
1548 r = get_char_length_tree(node->target, reg, &n);
1550 }
1551 else
1552 n = node->char_len;
1553 r = add_length(reg, n);
1554 if (r) return r;
1555 r = compile_tree(node->target, reg);
1556 }
1557 break;
1558
1560 {
1561 int n;
1562 len = compile_length_tree(node->target, reg);
1563 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1565 if (r) return r;
1566 if (node->char_len < 0) {
1567 r = get_char_length_tree(node->target, reg, &n);
1569 }
1570 else
1571 n = node->char_len;
1572 r = add_length(reg, n);
1573 if (r) return r;
1574 r = compile_tree(node->target, reg);
1575 if (r) return r;
1576 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1577 }
1578 break;
1579
1580 default:
1581 return ONIGERR_TYPE_BUG;
1582 break;
1583 }
1584
1585 return r;
1586}
1587
1588static int
1589compile_length_tree(Node* node, regex_t* reg)
1590{
1591 int len, type, r;
1592
1593 type = NTYPE(node);
1594 switch (type) {
1595 case NT_LIST:
1596 len = 0;
1597 do {
1598 r = compile_length_tree(NCAR(node), reg);
1599 if (r < 0) return r;
1600 len += r;
1601 } while (IS_NOT_NULL(node = NCDR(node)));
1602 r = len;
1603 break;
1604
1605 case NT_ALT:
1606 {
1607 int n = 0;
1608 len = 0;
1609 do {
1610 r = compile_length_tree(NCAR(node), reg);
1611 if (r < 0) return r;
1612 len += r;
1613 n++;
1614 } while (IS_NOT_NULL(node = NCDR(node)));
1615 r = len;
1616 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1617 }
1618 break;
1619
1620 case NT_STR:
1621 if (NSTRING_IS_RAW(node))
1622 r = compile_length_string_raw_node(NSTR(node), reg);
1623 else
1624 r = compile_length_string_node(node, reg);
1625 break;
1626
1627 case NT_CCLASS:
1628 r = compile_length_cclass_node(NCCLASS(node), reg);
1629 break;
1630
1631 case NT_CTYPE:
1632 case NT_CANY:
1633 r = SIZE_OPCODE;
1634 break;
1635
1636 case NT_BREF:
1637 {
1638 BRefNode* br = NBREF(node);
1639
1640#ifdef USE_BACKREF_WITH_LEVEL
1641 if (IS_BACKREF_NEST_LEVEL(br)) {
1644 }
1645 else
1646#endif
1647 if (br->back_num == 1) {
1648 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1650 }
1651 else {
1653 }
1654 }
1655 break;
1656
1657#ifdef USE_SUBEXP_CALL
1658 case NT_CALL:
1659 r = SIZE_OP_CALL;
1660 break;
1661#endif
1662
1663 case NT_QTFR:
1664 r = compile_length_quantifier_node(NQTFR(node), reg);
1665 break;
1666
1667 case NT_ENCLOSE:
1668 r = compile_length_enclose_node(NENCLOSE(node), reg);
1669 break;
1670
1671 case NT_ANCHOR:
1672 r = compile_length_anchor_node(NANCHOR(node), reg);
1673 break;
1674
1675 default:
1676 return ONIGERR_TYPE_BUG;
1677 break;
1678 }
1679
1680 return r;
1681}
1682
1683static int
1684compile_tree(Node* node, regex_t* reg)
1685{
1686 int n, type, len, pos, r = 0;
1687
1688 type = NTYPE(node);
1689 switch (type) {
1690 case NT_LIST:
1691 do {
1692 r = compile_tree(NCAR(node), reg);
1693 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1694 break;
1695
1696 case NT_ALT:
1697 {
1698 Node* x = node;
1699 len = 0;
1700 do {
1701 len += compile_length_tree(NCAR(x), reg);
1702 if (NCDR(x) != NULL) {
1704 }
1705 } while (IS_NOT_NULL(x = NCDR(x)));
1706 pos = reg->used + len; /* goal position */
1707
1708 do {
1709 len = compile_length_tree(NCAR(node), reg);
1710 if (IS_NOT_NULL(NCDR(node))) {
1711 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1712 if (r) break;
1713 }
1714 r = compile_tree(NCAR(node), reg);
1715 if (r) break;
1716 if (IS_NOT_NULL(NCDR(node))) {
1717 len = pos - (reg->used + SIZE_OP_JUMP);
1718 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1719 if (r) break;
1720 }
1721 } while (IS_NOT_NULL(node = NCDR(node)));
1722 }
1723 break;
1724
1725 case NT_STR:
1726 if (NSTRING_IS_RAW(node))
1727 r = compile_string_raw_node(NSTR(node), reg);
1728 else
1729 r = compile_string_node(node, reg);
1730 break;
1731
1732 case NT_CCLASS:
1733 r = compile_cclass_node(NCCLASS(node), reg);
1734 break;
1735
1736 case NT_CTYPE:
1737 {
1738 int op;
1739
1740 switch (NCTYPE(node)->ctype) {
1741 case ONIGENC_CTYPE_WORD:
1742 if (NCTYPE(node)->ascii_range != 0) {
1743 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1744 else op = OP_ASCII_WORD;
1745 }
1746 else {
1747 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1748 else op = OP_WORD;
1749 }
1750 break;
1751 default:
1752 return ONIGERR_TYPE_BUG;
1753 break;
1754 }
1755 r = add_opcode(reg, op);
1756 }
1757 break;
1758
1759 case NT_CANY:
1760 if (IS_MULTILINE(reg->options))
1761 r = add_opcode(reg, OP_ANYCHAR_ML);
1762 else
1763 r = add_opcode(reg, OP_ANYCHAR);
1764 break;
1765
1766 case NT_BREF:
1767 {
1768 BRefNode* br = NBREF(node);
1769
1770#ifdef USE_BACKREF_WITH_LEVEL
1771 if (IS_BACKREF_NEST_LEVEL(br)) {
1772 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1773 if (r) return r;
1774 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1775 if (r) return r;
1776 r = add_length(reg, br->nest_level);
1777 if (r) return r;
1778
1779 goto add_bacref_mems;
1780 }
1781 else
1782#endif
1783 if (br->back_num == 1) {
1784 n = br->back_static[0];
1785 if (IS_IGNORECASE(reg->options)) {
1786 r = add_opcode(reg, OP_BACKREFN_IC);
1787 if (r) return r;
1788 r = add_mem_num(reg, n);
1789 }
1790 else {
1791 switch (n) {
1792 case 1: r = add_opcode(reg, OP_BACKREF1); break;
1793 case 2: r = add_opcode(reg, OP_BACKREF2); break;
1794 default:
1795 r = add_opcode(reg, OP_BACKREFN);
1796 if (r) return r;
1797 r = add_mem_num(reg, n);
1798 break;
1799 }
1800 }
1801 }
1802 else {
1803 int i;
1804 int* p;
1805
1806 if (IS_IGNORECASE(reg->options)) {
1807 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1808 }
1809 else {
1810 r = add_opcode(reg, OP_BACKREF_MULTI);
1811 }
1812 if (r) return r;
1813
1814#ifdef USE_BACKREF_WITH_LEVEL
1815 add_bacref_mems:
1816#endif
1817 r = add_length(reg, br->back_num);
1818 if (r) return r;
1819 p = BACKREFS_P(br);
1820 for (i = br->back_num - 1; i >= 0; i--) {
1821 r = add_mem_num(reg, p[i]);
1822 if (r) return r;
1823 }
1824 }
1825 }
1826 break;
1827
1828#ifdef USE_SUBEXP_CALL
1829 case NT_CALL:
1830 r = compile_call(NCALL(node), reg);
1831 break;
1832#endif
1833
1834 case NT_QTFR:
1835 r = compile_quantifier_node(NQTFR(node), reg);
1836 break;
1837
1838 case NT_ENCLOSE:
1839 r = compile_enclose_node(NENCLOSE(node), reg);
1840 break;
1841
1842 case NT_ANCHOR:
1843 r = compile_anchor_node(NANCHOR(node), reg);
1844 break;
1845
1846 default:
1847#ifdef ONIG_DEBUG
1848 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1849#endif
1850 break;
1851 }
1852
1853 return r;
1854}
1855
1856#ifdef USE_NAMED_GROUP
1857
1858static int
1859noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1860{
1861 int r = 0;
1862 Node* node = *plink;
1863
1864 switch (NTYPE(node)) {
1865 case NT_LIST:
1866 case NT_ALT:
1867 do {
1868 r = noname_disable_map(&(NCAR(node)), map, counter);
1869 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1870 break;
1871
1872 case NT_QTFR:
1873 {
1874 Node** ptarget = &(NQTFR(node)->target);
1875 Node* old = *ptarget;
1876 r = noname_disable_map(ptarget, map, counter);
1877 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1878 onig_reduce_nested_quantifier(node, *ptarget);
1879 }
1880 }
1881 break;
1882
1883 case NT_ENCLOSE:
1884 {
1885 EncloseNode* en = NENCLOSE(node);
1886 if (en->type == ENCLOSE_MEMORY) {
1887 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1888 (*counter)++;
1889 map[en->regnum].new_val = *counter;
1890 en->regnum = *counter;
1891 }
1892 else if (en->regnum != 0) {
1893 *plink = en->target;
1894 en->target = NULL_NODE;
1895 onig_node_free(node);
1896 r = noname_disable_map(plink, map, counter);
1897 break;
1898 }
1899 }
1900 r = noname_disable_map(&(en->target), map, counter);
1901 }
1902 break;
1903
1904 case NT_ANCHOR:
1905 if (NANCHOR(node)->target)
1906 r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1907 break;
1908
1909 default:
1910 break;
1911 }
1912
1913 return r;
1914}
1915
1916static int
1917renumber_node_backref(Node* node, GroupNumRemap* map, const int num_mem)
1918{
1919 int i, pos, n, old_num;
1920 int *backs;
1921 BRefNode* bn = NBREF(node);
1922
1923 if (! IS_BACKREF_NAME_REF(bn))
1925
1926 old_num = bn->back_num;
1927 if (IS_NULL(bn->back_dynamic))
1928 backs = bn->back_static;
1929 else
1930 backs = bn->back_dynamic;
1931
1932 for (i = 0, pos = 0; i < old_num; i++) {
1933 if (backs[i] > num_mem) return ONIGERR_INVALID_BACKREF;
1934 n = map[backs[i]].new_val;
1935 if (n > 0) {
1936 backs[pos] = n;
1937 pos++;
1938 }
1939 }
1940
1941 bn->back_num = pos;
1942 return 0;
1943}
1944
1945static int
1946renumber_by_map(Node* node, GroupNumRemap* map, const int num_mem)
1947{
1948 int r = 0;
1949
1950 switch (NTYPE(node)) {
1951 case NT_LIST:
1952 case NT_ALT:
1953 do {
1954 r = renumber_by_map(NCAR(node), map, num_mem);
1955 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1956 break;
1957 case NT_QTFR:
1958 r = renumber_by_map(NQTFR(node)->target, map, num_mem);
1959 break;
1960 case NT_ENCLOSE:
1961 {
1962 EncloseNode* en = NENCLOSE(node);
1963 if (en->type == ENCLOSE_CONDITION) {
1964 if (en->regnum > num_mem) return ONIGERR_INVALID_BACKREF;
1965 en->regnum = map[en->regnum].new_val;
1966 }
1967 r = renumber_by_map(en->target, map, num_mem);
1968 }
1969 break;
1970
1971 case NT_BREF:
1972 r = renumber_node_backref(node, map, num_mem);
1973 break;
1974
1975 case NT_ANCHOR:
1976 if (NANCHOR(node)->target)
1977 r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
1978 break;
1979
1980 default:
1981 break;
1982 }
1983
1984 return r;
1985}
1986
1987static int
1988numbered_ref_check(Node* node)
1989{
1990 int r = 0;
1991
1992 switch (NTYPE(node)) {
1993 case NT_LIST:
1994 case NT_ALT:
1995 do {
1996 r = numbered_ref_check(NCAR(node));
1997 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1998 break;
1999 case NT_QTFR:
2000 r = numbered_ref_check(NQTFR(node)->target);
2001 break;
2002 case NT_ENCLOSE:
2003 r = numbered_ref_check(NENCLOSE(node)->target);
2004 break;
2005
2006 case NT_BREF:
2007 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2009 break;
2010
2011 case NT_ANCHOR:
2012 if (NANCHOR(node)->target)
2013 r = numbered_ref_check(NANCHOR(node)->target);
2014 break;
2015
2016 default:
2017 break;
2018 }
2019
2020 return r;
2021}
2022
2023static int
2024disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2025{
2026 int r, i, pos, counter;
2027 BitStatusType loc;
2028 GroupNumRemap* map;
2029
2030 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2032 for (i = 1; i <= env->num_mem; i++) {
2033 map[i].new_val = 0;
2034 }
2035 counter = 0;
2036 r = noname_disable_map(root, map, &counter);
2037 if (r != 0) return r;
2038
2039 r = renumber_by_map(*root, map, env->num_mem);
2040 if (r != 0) return r;
2041
2042 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2043 if (map[i].new_val > 0) {
2045 pos++;
2046 }
2047 }
2048
2049 loc = env->capture_history;
2050 BIT_STATUS_CLEAR(env->capture_history);
2051 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2052 if (BIT_STATUS_AT(loc, i)) {
2053 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2054 }
2055 }
2056
2057 env->num_mem = env->num_named;
2058 reg->num_mem = env->num_named;
2059
2060 return onig_renumber_name_table(reg, map);
2061}
2062#endif /* USE_NAMED_GROUP */
2063
2064#ifdef USE_SUBEXP_CALL
2065static int
2066unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2067{
2068 int i, offset;
2069 EncloseNode* en;
2070 AbsAddrType addr;
2071
2072 for (i = 0; i < uslist->num; i++) {
2073 en = NENCLOSE(uslist->us[i].target);
2074 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2075 addr = en->call_addr;
2076 offset = uslist->us[i].offset;
2077
2078 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2079 }
2080 return 0;
2081}
2082#endif
2083
2084#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2085static int
2086quantifiers_memory_node_info(Node* node)
2087{
2088 int r = 0;
2089
2090 switch (NTYPE(node)) {
2091 case NT_LIST:
2092 case NT_ALT:
2093 {
2094 int v;
2095 do {
2096 v = quantifiers_memory_node_info(NCAR(node));
2097 if (v > r) r = v;
2098 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2099 }
2100 break;
2101
2102# ifdef USE_SUBEXP_CALL
2103 case NT_CALL:
2104 if (IS_CALL_RECURSION(NCALL(node))) {
2105 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2106 }
2107 else
2108 r = quantifiers_memory_node_info(NCALL(node)->target);
2109 break;
2110# endif
2111
2112 case NT_QTFR:
2113 {
2114 QtfrNode* qn = NQTFR(node);
2115 if (qn->upper != 0) {
2116 r = quantifiers_memory_node_info(qn->target);
2117 }
2118 }
2119 break;
2120
2121 case NT_ENCLOSE:
2122 {
2123 EncloseNode* en = NENCLOSE(node);
2124 switch (en->type) {
2125 case ENCLOSE_MEMORY:
2127 break;
2128
2129 case ENCLOSE_OPTION:
2131 case ENCLOSE_CONDITION:
2132 case ENCLOSE_ABSENT:
2133 r = quantifiers_memory_node_info(en->target);
2134 break;
2135 default:
2136 break;
2137 }
2138 }
2139 break;
2140
2141 case NT_BREF:
2142 case NT_STR:
2143 case NT_CTYPE:
2144 case NT_CCLASS:
2145 case NT_CANY:
2146 case NT_ANCHOR:
2147 default:
2148 break;
2149 }
2150
2151 return r;
2152}
2153#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2154
2155static int
2156get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2157{
2158 OnigDistance tmin;
2159 int r = 0;
2160
2161 *min = 0;
2162 switch (NTYPE(node)) {
2163 case NT_BREF:
2164 {
2165 int i;
2166 int* backs;
2167 Node** nodes = SCANENV_MEM_NODES(env);
2168 BRefNode* br = NBREF(node);
2169 if (br->state & NST_RECURSION) break;
2170
2171 backs = BACKREFS_P(br);
2172 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2173 r = get_min_match_length(nodes[backs[0]], min, env);
2174 if (r != 0) break;
2175 for (i = 1; i < br->back_num; i++) {
2176 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2177 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2178 if (r != 0) break;
2179 if (*min > tmin) *min = tmin;
2180 }
2181 }
2182 break;
2183
2184#ifdef USE_SUBEXP_CALL
2185 case NT_CALL:
2186 if (IS_CALL_RECURSION(NCALL(node))) {
2187 EncloseNode* en = NENCLOSE(NCALL(node)->target);
2188 if (IS_ENCLOSE_MIN_FIXED(en))
2189 *min = en->min_len;
2190 }
2191 else
2192 r = get_min_match_length(NCALL(node)->target, min, env);
2193 break;
2194#endif
2195
2196 case NT_LIST:
2197 do {
2198 r = get_min_match_length(NCAR(node), &tmin, env);
2199 if (r == 0) *min += tmin;
2200 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2201 break;
2202
2203 case NT_ALT:
2204 {
2205 Node *x, *y;
2206 y = node;
2207 do {
2208 x = NCAR(y);
2209 r = get_min_match_length(x, &tmin, env);
2210 if (r != 0) break;
2211 if (y == node) *min = tmin;
2212 else if (*min > tmin) *min = tmin;
2213 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2214 }
2215 break;
2216
2217 case NT_STR:
2218 {
2219 StrNode* sn = NSTR(node);
2220 *min = sn->end - sn->s;
2221 }
2222 break;
2223
2224 case NT_CTYPE:
2225 *min = 1;
2226 break;
2227
2228 case NT_CCLASS:
2229 case NT_CANY:
2230 *min = 1;
2231 break;
2232
2233 case NT_QTFR:
2234 {
2235 QtfrNode* qn = NQTFR(node);
2236
2237 if (qn->lower > 0) {
2238 r = get_min_match_length(qn->target, min, env);
2239 if (r == 0)
2240 *min = distance_multiply(*min, qn->lower);
2241 }
2242 }
2243 break;
2244
2245 case NT_ENCLOSE:
2246 {
2247 EncloseNode* en = NENCLOSE(node);
2248 switch (en->type) {
2249 case ENCLOSE_MEMORY:
2250 if (IS_ENCLOSE_MIN_FIXED(en))
2251 *min = en->min_len;
2252 else {
2253 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2254 *min = 0; /* recursive */
2255 else {
2257 r = get_min_match_length(en->target, min, env);
2259 if (r == 0) {
2260 en->min_len = *min;
2262 }
2263 }
2264 }
2265 break;
2266
2267 case ENCLOSE_OPTION:
2269 case ENCLOSE_CONDITION:
2270 r = get_min_match_length(en->target, min, env);
2271 break;
2272
2273 case ENCLOSE_ABSENT:
2274 break;
2275 }
2276 }
2277 break;
2278
2279 case NT_ANCHOR:
2280 default:
2281 break;
2282 }
2283
2284 return r;
2285}
2286
2287static int
2288get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2289{
2290 OnigDistance tmax;
2291 int r = 0;
2292
2293 *max = 0;
2294 switch (NTYPE(node)) {
2295 case NT_LIST:
2296 do {
2297 r = get_max_match_length(NCAR(node), &tmax, env);
2298 if (r == 0)
2299 *max = distance_add(*max, tmax);
2300 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2301 break;
2302
2303 case NT_ALT:
2304 do {
2305 r = get_max_match_length(NCAR(node), &tmax, env);
2306 if (r == 0 && *max < tmax) *max = tmax;
2307 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2308 break;
2309
2310 case NT_STR:
2311 {
2312 StrNode* sn = NSTR(node);
2313 *max = sn->end - sn->s;
2314 }
2315 break;
2316
2317 case NT_CTYPE:
2318 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2319 break;
2320
2321 case NT_CCLASS:
2322 case NT_CANY:
2323 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2324 break;
2325
2326 case NT_BREF:
2327 {
2328 int i;
2329 int* backs;
2330 Node** nodes = SCANENV_MEM_NODES(env);
2331 BRefNode* br = NBREF(node);
2332 if (br->state & NST_RECURSION) {
2334 break;
2335 }
2336 backs = BACKREFS_P(br);
2337 for (i = 0; i < br->back_num; i++) {
2338 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2339 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2340 if (r != 0) break;
2341 if (*max < tmax) *max = tmax;
2342 }
2343 }
2344 break;
2345
2346#ifdef USE_SUBEXP_CALL
2347 case NT_CALL:
2348 if (! IS_CALL_RECURSION(NCALL(node)))
2349 r = get_max_match_length(NCALL(node)->target, max, env);
2350 else
2352 break;
2353#endif
2354
2355 case NT_QTFR:
2356 {
2357 QtfrNode* qn = NQTFR(node);
2358
2359 if (qn->upper != 0) {
2360 r = get_max_match_length(qn->target, max, env);
2361 if (r == 0 && *max != 0) {
2362 if (! IS_REPEAT_INFINITE(qn->upper))
2363 *max = distance_multiply(*max, qn->upper);
2364 else
2366 }
2367 }
2368 }
2369 break;
2370
2371 case NT_ENCLOSE:
2372 {
2373 EncloseNode* en = NENCLOSE(node);
2374 switch (en->type) {
2375 case ENCLOSE_MEMORY:
2376 if (IS_ENCLOSE_MAX_FIXED(en))
2377 *max = en->max_len;
2378 else {
2379 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2381 else {
2383 r = get_max_match_length(en->target, max, env);
2385 if (r == 0) {
2386 en->max_len = *max;
2388 }
2389 }
2390 }
2391 break;
2392
2393 case ENCLOSE_OPTION:
2395 case ENCLOSE_CONDITION:
2396 r = get_max_match_length(en->target, max, env);
2397 break;
2398
2399 case ENCLOSE_ABSENT:
2400 break;
2401 }
2402 }
2403 break;
2404
2405 case NT_ANCHOR:
2406 default:
2407 break;
2408 }
2409
2410 return r;
2411}
2412
2413#define GET_CHAR_LEN_VARLEN -1
2414#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2415
2416/* fixed size pattern node only */
2417static int
2418get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2419{
2420 int tlen;
2421 int r = 0;
2422
2423 level++;
2424 *len = 0;
2425 switch (NTYPE(node)) {
2426 case NT_LIST:
2427 do {
2428 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2429 if (r == 0)
2430 *len = (int )distance_add(*len, tlen);
2431 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2432 break;
2433
2434 case NT_ALT:
2435 {
2436 int tlen2;
2437 int varlen = 0;
2438
2439 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2440 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2441 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2442 if (r == 0) {
2443 if (tlen != tlen2)
2444 varlen = 1;
2445 }
2446 }
2447 if (r == 0) {
2448 if (varlen != 0) {
2449 if (level == 1)
2451 else
2453 }
2454 else
2455 *len = tlen;
2456 }
2457 }
2458 break;
2459
2460 case NT_STR:
2461 {
2462 StrNode* sn = NSTR(node);
2463 UChar *s = sn->s;
2464 while (s < sn->end) {
2465 s += enclen(reg->enc, s, sn->end);
2466 (*len)++;
2467 }
2468 }
2469 break;
2470
2471 case NT_QTFR:
2472 {
2473 QtfrNode* qn = NQTFR(node);
2474 if (qn->lower == qn->upper) {
2475 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2476 if (r == 0)
2477 *len = (int )distance_multiply(tlen, qn->lower);
2478 }
2479 else
2481 }
2482 break;
2483
2484#ifdef USE_SUBEXP_CALL
2485 case NT_CALL:
2486 if (! IS_CALL_RECURSION(NCALL(node)))
2487 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2488 else
2490 break;
2491#endif
2492
2493 case NT_CTYPE:
2494 *len = 1;
2495 break;
2496
2497 case NT_CCLASS:
2498 case NT_CANY:
2499 *len = 1;
2500 break;
2501
2502 case NT_ENCLOSE:
2503 {
2504 EncloseNode* en = NENCLOSE(node);
2505 switch (en->type) {
2506 case ENCLOSE_MEMORY:
2507#ifdef USE_SUBEXP_CALL
2508 if (IS_ENCLOSE_CLEN_FIXED(en))
2509 *len = en->char_len;
2510 else {
2511 r = get_char_length_tree1(en->target, reg, len, level);
2512 if (r == 0) {
2513 en->char_len = *len;
2515 }
2516 }
2517 break;
2518#endif
2519 case ENCLOSE_OPTION:
2521 case ENCLOSE_CONDITION:
2522 r = get_char_length_tree1(en->target, reg, len, level);
2523 break;
2524 case ENCLOSE_ABSENT:
2525 default:
2526 break;
2527 }
2528 }
2529 break;
2530
2531 case NT_ANCHOR:
2532 break;
2533
2534 default:
2536 break;
2537 }
2538
2539 return r;
2540}
2541
2542static int
2543get_char_length_tree(Node* node, regex_t* reg, int* len)
2544{
2545 return get_char_length_tree1(node, reg, len, 0);
2546}
2547
2548/* x is not included y ==> 1 : 0 */
2549static int
2550is_not_included(Node* x, Node* y, regex_t* reg)
2551{
2552 int i;
2554 OnigCodePoint code;
2555 UChar *p;
2556 int ytype;
2557
2558 retry:
2559 ytype = NTYPE(y);
2560 switch (NTYPE(x)) {
2561 case NT_CTYPE:
2562 {
2563 switch (ytype) {
2564 case NT_CTYPE:
2565 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2566 NCTYPE(y)->not != NCTYPE(x)->not &&
2567 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2568 return 1;
2569 else
2570 return 0;
2571 break;
2572
2573 case NT_CCLASS:
2574 swap:
2575 {
2576 Node* tmp;
2577 tmp = x; x = y; y = tmp;
2578 goto retry;
2579 }
2580 break;
2581
2582 case NT_STR:
2583 goto swap;
2584 break;
2585
2586 default:
2587 break;
2588 }
2589 }
2590 break;
2591
2592 case NT_CCLASS:
2593 {
2594 CClassNode* xc = NCCLASS(x);
2595 switch (ytype) {
2596 case NT_CTYPE:
2597 switch (NCTYPE(y)->ctype) {
2598 case ONIGENC_CTYPE_WORD:
2599 if (NCTYPE(y)->not == 0) {
2600 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2601 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2602 if (BITSET_AT(xc->bs, i)) {
2603 if (NCTYPE(y)->ascii_range) {
2604 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2605 }
2606 else {
2607 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2608 }
2609 }
2610 }
2611 return 1;
2612 }
2613 return 0;
2614 }
2615 else {
2616 if (IS_NOT_NULL(xc->mbuf)) return 0;
2617 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2618 int is_word;
2619 if (NCTYPE(y)->ascii_range)
2620 is_word = IS_CODE_SB_WORD(reg->enc, i);
2621 else
2622 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2623 if (! is_word) {
2624 if (!IS_NCCLASS_NOT(xc)) {
2625 if (BITSET_AT(xc->bs, i))
2626 return 0;
2627 }
2628 else {
2629 if (! BITSET_AT(xc->bs, i))
2630 return 0;
2631 }
2632 }
2633 }
2634 return 1;
2635 }
2636 break;
2637
2638 default:
2639 break;
2640 }
2641 break;
2642
2643 case NT_CCLASS:
2644 {
2645 int v;
2646 CClassNode* yc = NCCLASS(y);
2647
2648 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2649 v = BITSET_AT(xc->bs, i);
2650 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2651 (v == 0 && IS_NCCLASS_NOT(xc))) {
2652 v = BITSET_AT(yc->bs, i);
2653 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2654 (v == 0 && IS_NCCLASS_NOT(yc)))
2655 return 0;
2656 }
2657 }
2658 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2659 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2660 return 1;
2661 return 0;
2662 }
2663 break;
2664
2665 case NT_STR:
2666 goto swap;
2667 break;
2668
2669 default:
2670 break;
2671 }
2672 }
2673 break;
2674
2675 case NT_STR:
2676 {
2677 StrNode* xs = NSTR(x);
2678 if (NSTRING_LEN(x) == 0)
2679 break;
2680
2681 switch (ytype) {
2682 case NT_CTYPE:
2683 switch (NCTYPE(y)->ctype) {
2684 case ONIGENC_CTYPE_WORD:
2685 if (NCTYPE(y)->ascii_range) {
2686 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2687 return NCTYPE(y)->not;
2688 else
2689 return !(NCTYPE(y)->not);
2690 }
2691 else {
2692 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2693 return NCTYPE(y)->not;
2694 else
2695 return !(NCTYPE(y)->not);
2696 }
2697 break;
2698 default:
2699 break;
2700 }
2701 break;
2702
2703 case NT_CCLASS:
2704 {
2705 CClassNode* cc = NCCLASS(y);
2706
2707 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2708 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2709 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2710 }
2711 break;
2712
2713 case NT_STR:
2714 {
2715 UChar *q;
2716 StrNode* ys = NSTR(y);
2717 len = NSTRING_LEN(x);
2718 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2719 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2720 /* tiny version */
2721 return 0;
2722 }
2723 else {
2724 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2725 if (*p != *q) return 1;
2726 }
2727 }
2728 }
2729 break;
2730
2731 default:
2732 break;
2733 }
2734 }
2735 break;
2736
2737 default:
2738 break;
2739 }
2740
2741 return 0;
2742}
2743
2744static Node*
2745get_head_value_node(Node* node, int exact, regex_t* reg)
2746{
2747 Node* n = NULL_NODE;
2748
2749 switch (NTYPE(node)) {
2750 case NT_BREF:
2751 case NT_ALT:
2752 case NT_CANY:
2753#ifdef USE_SUBEXP_CALL
2754 case NT_CALL:
2755#endif
2756 break;
2757
2758 case NT_CTYPE:
2759 case NT_CCLASS:
2760 if (exact == 0) {
2761 n = node;
2762 }
2763 break;
2764
2765 case NT_LIST:
2766 n = get_head_value_node(NCAR(node), exact, reg);
2767 break;
2768
2769 case NT_STR:
2770 {
2771 StrNode* sn = NSTR(node);
2772
2773 if (sn->end <= sn->s)
2774 break;
2775
2776 if (exact != 0 &&
2777 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2778 }
2779 else {
2780 n = node;
2781 }
2782 }
2783 break;
2784
2785 case NT_QTFR:
2786 {
2787 QtfrNode* qn = NQTFR(node);
2788 if (qn->lower > 0) {
2789#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2790 if (IS_NOT_NULL(qn->head_exact))
2791 n = qn->head_exact;
2792 else
2793#endif
2794 n = get_head_value_node(qn->target, exact, reg);
2795 }
2796 }
2797 break;
2798
2799 case NT_ENCLOSE:
2800 {
2801 EncloseNode* en = NENCLOSE(node);
2802 switch (en->type) {
2803 case ENCLOSE_OPTION:
2804 {
2805 OnigOptionType options = reg->options;
2806
2807 reg->options = NENCLOSE(node)->option;
2808 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2809 reg->options = options;
2810 }
2811 break;
2812
2813 case ENCLOSE_MEMORY:
2815 case ENCLOSE_CONDITION:
2816 n = get_head_value_node(en->target, exact, reg);
2817 break;
2818
2819 case ENCLOSE_ABSENT:
2820 break;
2821 }
2822 }
2823 break;
2824
2825 case NT_ANCHOR:
2826 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2827 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2828 break;
2829
2830 default:
2831 break;
2832 }
2833
2834 return n;
2835}
2836
2837static int
2838check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2839{
2840 int type, r = 0;
2841
2842 type = NTYPE(node);
2843 if ((NTYPE2BIT(type) & type_mask) == 0)
2844 return 1;
2845
2846 switch (type) {
2847 case NT_LIST:
2848 case NT_ALT:
2849 do {
2850 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2851 anchor_mask);
2852 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2853 break;
2854
2855 case NT_QTFR:
2856 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2857 anchor_mask);
2858 break;
2859
2860 case NT_ENCLOSE:
2861 {
2862 EncloseNode* en = NENCLOSE(node);
2863 if ((en->type & enclose_mask) == 0)
2864 return 1;
2865
2866 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2867 }
2868 break;
2869
2870 case NT_ANCHOR:
2871 type = NANCHOR(node)->type;
2872 if ((type & anchor_mask) == 0)
2873 return 1;
2874
2875 if (NANCHOR(node)->target)
2876 r = check_type_tree(NANCHOR(node)->target,
2877 type_mask, enclose_mask, anchor_mask);
2878 break;
2879
2880 default:
2881 break;
2882 }
2883 return r;
2884}
2885
2886#ifdef USE_SUBEXP_CALL
2887
2888# define RECURSION_EXIST 1
2889# define RECURSION_INFINITE 2
2890
2891static int
2892subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2893{
2894 int type;
2895 int r = 0;
2896
2897 type = NTYPE(node);
2898 switch (type) {
2899 case NT_LIST:
2900 {
2901 Node *x;
2902 OnigDistance min;
2903 int ret;
2904
2905 x = node;
2906 do {
2907 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2908 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2909 r |= ret;
2910 if (head) {
2911 ret = get_min_match_length(NCAR(x), &min, env);
2912 if (ret != 0) return ret;
2913 if (min != 0) head = 0;
2914 }
2915 } while (IS_NOT_NULL(x = NCDR(x)));
2916 }
2917 break;
2918
2919 case NT_ALT:
2920 {
2921 int ret;
2922 r = RECURSION_EXIST;
2923 do {
2924 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2925 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2926 r &= ret;
2927 } while (IS_NOT_NULL(node = NCDR(node)));
2928 }
2929 break;
2930
2931 case NT_QTFR:
2932 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2933 if (r == RECURSION_EXIST) {
2934 if (NQTFR(node)->lower == 0) r = 0;
2935 }
2936 break;
2937
2938 case NT_ANCHOR:
2939 {
2940 AnchorNode* an = NANCHOR(node);
2941 switch (an->type) {
2942 case ANCHOR_PREC_READ:
2944 case ANCHOR_LOOK_BEHIND:
2946 r = subexp_inf_recursive_check(an->target, env, head);
2947 break;
2948 }
2949 }
2950 break;
2951
2952 case NT_CALL:
2953 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2954 break;
2955
2956 case NT_ENCLOSE:
2957 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2958 return 0;
2959 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2960 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2961 else {
2963 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2965 }
2966 break;
2967
2968 default:
2969 break;
2970 }
2971
2972 return r;
2973}
2974
2975static int
2976subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2977{
2978 int type;
2979 int r = 0;
2980
2981 type = NTYPE(node);
2982 switch (type) {
2983 case NT_LIST:
2984 case NT_ALT:
2985 do {
2986 r = subexp_inf_recursive_check_trav(NCAR(node), env);
2987 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2988 break;
2989
2990 case NT_QTFR:
2991 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2992 break;
2993
2994 case NT_ANCHOR:
2995 {
2996 AnchorNode* an = NANCHOR(node);
2997 switch (an->type) {
2998 case ANCHOR_PREC_READ:
3000 case ANCHOR_LOOK_BEHIND:
3002 r = subexp_inf_recursive_check_trav(an->target, env);
3003 break;
3004 }
3005 }
3006 break;
3007
3008 case NT_ENCLOSE:
3009 {
3010 EncloseNode* en = NENCLOSE(node);
3011
3012 if (IS_ENCLOSE_RECURSION(en)) {
3014 r = subexp_inf_recursive_check(en->target, env, 1);
3015 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3017 }
3018 r = subexp_inf_recursive_check_trav(en->target, env);
3019 }
3020
3021 break;
3022
3023 default:
3024 break;
3025 }
3026
3027 return r;
3028}
3029
3030static int
3031subexp_recursive_check(Node* node)
3032{
3033 int r = 0;
3034
3035 switch (NTYPE(node)) {
3036 case NT_LIST:
3037 case NT_ALT:
3038 do {
3039 r |= subexp_recursive_check(NCAR(node));
3040 } while (IS_NOT_NULL(node = NCDR(node)));
3041 break;
3042
3043 case NT_QTFR:
3044 r = subexp_recursive_check(NQTFR(node)->target);
3045 break;
3046
3047 case NT_ANCHOR:
3048 {
3049 AnchorNode* an = NANCHOR(node);
3050 switch (an->type) {
3051 case ANCHOR_PREC_READ:
3053 case ANCHOR_LOOK_BEHIND:
3055 r = subexp_recursive_check(an->target);
3056 break;
3057 }
3058 }
3059 break;
3060
3061 case NT_CALL:
3062 r = subexp_recursive_check(NCALL(node)->target);
3063 if (r != 0) SET_CALL_RECURSION(node);
3064 break;
3065
3066 case NT_ENCLOSE:
3067 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3068 return 0;
3069 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3070 return 1; /* recursion */
3071 else {
3073 r = subexp_recursive_check(NENCLOSE(node)->target);
3075 }
3076 break;
3077
3078 default:
3079 break;
3080 }
3081
3082 return r;
3083}
3084
3085
3086static int
3087subexp_recursive_check_trav(Node* node, ScanEnv* env)
3088{
3089# define FOUND_CALLED_NODE 1
3090
3091 int type;
3092 int r = 0;
3093
3094 type = NTYPE(node);
3095 switch (type) {
3096 case NT_LIST:
3097 case NT_ALT:
3098 {
3099 int ret;
3100 do {
3101 ret = subexp_recursive_check_trav(NCAR(node), env);
3102 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3103 else if (ret < 0) return ret;
3104 } while (IS_NOT_NULL(node = NCDR(node)));
3105 }
3106 break;
3107
3108 case NT_QTFR:
3109 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3110 if (NQTFR(node)->upper == 0) {
3111 if (r == FOUND_CALLED_NODE)
3112 NQTFR(node)->is_referred = 1;
3113 }
3114 break;
3115
3116 case NT_ANCHOR:
3117 {
3118 AnchorNode* an = NANCHOR(node);
3119 switch (an->type) {
3120 case ANCHOR_PREC_READ:
3122 case ANCHOR_LOOK_BEHIND:
3124 r = subexp_recursive_check_trav(an->target, env);
3125 break;
3126 }
3127 }
3128 break;
3129
3130 case NT_ENCLOSE:
3131 {
3132 EncloseNode* en = NENCLOSE(node);
3133
3134 if (! IS_ENCLOSE_RECURSION(en)) {
3135 if (IS_ENCLOSE_CALLED(en)) {
3137 r = subexp_recursive_check(en->target);
3138 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3140 }
3141 }
3142 r = subexp_recursive_check_trav(en->target, env);
3143 if (IS_ENCLOSE_CALLED(en))
3144 r |= FOUND_CALLED_NODE;
3145 }
3146 break;
3147
3148 default:
3149 break;
3150 }
3151
3152 return r;
3153}
3154
3155static int
3156setup_subexp_call(Node* node, ScanEnv* env)
3157{
3158 int type;
3159 int r = 0;
3160
3161 type = NTYPE(node);
3162 switch (type) {
3163 case NT_LIST:
3164 do {
3165 r = setup_subexp_call(NCAR(node), env);
3166 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3167 break;
3168
3169 case NT_ALT:
3170 do {
3171 r = setup_subexp_call(NCAR(node), env);
3172 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3173 break;
3174
3175 case NT_QTFR:
3176 r = setup_subexp_call(NQTFR(node)->target, env);
3177 break;
3178 case NT_ENCLOSE:
3179 r = setup_subexp_call(NENCLOSE(node)->target, env);
3180 break;
3181
3182 case NT_CALL:
3183 {
3184 CallNode* cn = NCALL(node);
3185 Node** nodes = SCANENV_MEM_NODES(env);
3186
3187 if (cn->group_num != 0) {
3188 int gnum = cn->group_num;
3189
3190# ifdef USE_NAMED_GROUP
3191 if (env->num_named > 0 &&
3195 }
3196# endif
3197 if (gnum > env->num_mem) {
3201 }
3202
3203# ifdef USE_NAMED_GROUP
3204 set_call_attr:
3205# endif
3206 cn->target = nodes[cn->group_num];
3207 if (IS_NULL(cn->target)) {
3211 }
3213 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3214 cn->unset_addr_list = env->unset_addr_list;
3215 }
3216# ifdef USE_NAMED_GROUP
3217# ifdef USE_PERL_SUBEXP_CALL
3218 else if (cn->name == cn->name_end) {
3219 goto set_call_attr;
3220 }
3221# endif
3222 else {
3223 int *refs;
3224
3225 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3226 &refs);
3227 if (n <= 0) {
3231 }
3232 else if (n > 1 &&
3237 }
3238 else {
3239 cn->group_num = refs[0];
3240 goto set_call_attr;
3241 }
3242 }
3243# endif
3244 }
3245 break;
3246
3247 case NT_ANCHOR:
3248 {
3249 AnchorNode* an = NANCHOR(node);
3250
3251 switch (an->type) {
3252 case ANCHOR_PREC_READ:
3254 case ANCHOR_LOOK_BEHIND:
3256 r = setup_subexp_call(an->target, env);
3257 break;
3258 }
3259 }
3260 break;
3261
3262 default:
3263 break;
3264 }
3265
3266 return r;
3267}
3268#endif
3269
3270/* divide different length alternatives in look-behind.
3271 (?<=A|B) ==> (?<=A)|(?<=B)
3272 (?<!A|B) ==> (?<!A)(?<!B)
3273*/
3274static int
3275divide_look_behind_alternatives(Node* node)
3276{
3277 Node *head, *np, *insert_node;
3278 AnchorNode* an = NANCHOR(node);
3279 int anc_type = an->type;
3280
3281 head = an->target;
3282 np = NCAR(head);
3283 swap_node(node, head);
3284 NCAR(node) = head;
3285 NANCHOR(head)->target = np;
3286
3287 np = node;
3288 while ((np = NCDR(np)) != NULL_NODE) {
3289 insert_node = onig_node_new_anchor(anc_type);
3290 CHECK_NULL_RETURN_MEMERR(insert_node);
3291 NANCHOR(insert_node)->target = NCAR(np);
3292 NCAR(np) = insert_node;
3293 }
3294
3295 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3296 np = node;
3297 do {
3298 SET_NTYPE(np, NT_LIST); /* alt -> list */
3299 } while ((np = NCDR(np)) != NULL_NODE);
3300 }
3301 return 0;
3302}
3303
3304static int
3305setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3306{
3307 int r, len;
3308 AnchorNode* an = NANCHOR(node);
3309
3310 r = get_char_length_tree(an->target, reg, &len);
3311 if (r == 0)
3312 an->char_len = len;
3313 else if (r == GET_CHAR_LEN_VARLEN)
3315 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3317 r = divide_look_behind_alternatives(node);
3318 else
3320 }
3321
3322 return r;
3323}
3324
3325static int
3326next_setup(Node* node, Node* next_node, regex_t* reg)
3327{
3328 int type;
3329
3330 retry:
3331 type = NTYPE(node);
3332 if (type == NT_QTFR) {
3333 QtfrNode* qn = NQTFR(node);
3334 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3335#ifdef USE_QTFR_PEEK_NEXT
3336 Node* n = get_head_value_node(next_node, 1, reg);
3337 /* '\0': for UTF-16BE etc... */
3338 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3339 qn->next_head_exact = n;
3340 }
3341#endif
3342 /* automatic possessification a*b ==> (?>a*)b */
3343 if (qn->lower <= 1) {
3344 int ttype = NTYPE(qn->target);
3345 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3346 Node *x, *y;
3347 x = get_head_value_node(qn->target, 0, reg);
3348 if (IS_NOT_NULL(x)) {
3349 y = get_head_value_node(next_node, 0, reg);
3350 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3354 swap_node(node, en);
3355 NENCLOSE(node)->target = en;
3356 }
3357 }
3358 }
3359 }
3360 }
3361 }
3362 else if (type == NT_ENCLOSE) {
3363 EncloseNode* en = NENCLOSE(node);
3364 if (en->type == ENCLOSE_MEMORY) {
3365 node = en->target;
3366 goto retry;
3367 }
3368 }
3369 return 0;
3370}
3371
3372
3373static int
3374update_string_node_case_fold(regex_t* reg, Node *node)
3375{
3377 UChar *sbuf, *ebuf, *sp;
3378 int r, i, len;
3379 OnigDistance sbuf_size;
3380 StrNode* sn = NSTR(node);
3381
3382 end = sn->end;
3383 sbuf_size = (end - sn->s) * 2;
3384 sbuf = (UChar* )xmalloc(sbuf_size);
3386 ebuf = sbuf + sbuf_size;
3387
3388 sp = sbuf;
3389 p = sn->s;
3390 while (p < end) {
3391 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3392 for (i = 0; i < len; i++) {
3393 if (sp >= ebuf) {
3394 UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3395 if (IS_NULL(p)) {
3396 xfree(sbuf);
3397 return ONIGERR_MEMORY;
3398 }
3399 sbuf = p;
3400 sp = sbuf + sbuf_size;
3401 sbuf_size *= 2;
3402 ebuf = sbuf + sbuf_size;
3403 }
3404
3405 *sp++ = buf[i];
3406 }
3407 }
3408
3409 r = onig_node_str_set(node, sbuf, sp);
3410
3411 xfree(sbuf);
3412 return r;
3413}
3414
3415static int
3416expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3417 regex_t* reg)
3418{
3419 int r;
3420 Node *node;
3421
3422 node = onig_node_new_str(s, end);
3423 if (IS_NULL(node)) return ONIGERR_MEMORY;
3424
3425 r = update_string_node_case_fold(reg, node);
3426 if (r != 0) {
3427 onig_node_free(node);
3428 return r;
3429 }
3430
3431 NSTRING_SET_AMBIG(node);
3433 *rnode = node;
3434 return 0;
3435}
3436
3437static int
3438is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3439 int slen)
3440{
3441 int i;
3442
3443 for (i = 0; i < item_num; i++) {
3444 if (items[i].byte_len != slen) {
3445 return 1;
3446 }
3447 if (items[i].code_len != 1) {
3448 return 1;
3449 }
3450 }
3451 return 0;
3452}
3453
3454static int
3455expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3456 UChar *p, int slen, UChar *end,
3457 regex_t* reg, Node **rnode)
3458{
3459 int r, i, j, len, varlen;
3460 Node *anode, *var_anode, *snode, *xnode, *an;
3462
3463 *rnode = var_anode = NULL_NODE;
3464
3465 varlen = 0;
3466 for (i = 0; i < item_num; i++) {
3467 if (items[i].byte_len != slen) {
3468 varlen = 1;
3469 break;
3470 }
3471 }
3472
3473 if (varlen != 0) {
3474 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3475 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3476
3477 xnode = onig_node_new_list(NULL, NULL);
3478 if (IS_NULL(xnode)) goto mem_err;
3479 NCAR(var_anode) = xnode;
3480
3482 if (IS_NULL(anode)) goto mem_err;
3483 NCAR(xnode) = anode;
3484 }
3485 else {
3486 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3487 if (IS_NULL(anode)) return ONIGERR_MEMORY;
3488 }
3489
3490 snode = onig_node_new_str(p, p + slen);
3491 if (IS_NULL(snode)) goto mem_err;
3492
3493 NCAR(anode) = snode;
3494
3495 for (i = 0; i < item_num; i++) {
3496 snode = onig_node_new_str(NULL, NULL);
3497 if (IS_NULL(snode)) goto mem_err;
3498
3499 for (j = 0; j < items[i].code_len; j++) {
3500 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3501 if (len < 0) {
3502 r = len;
3503 goto mem_err2;
3504 }
3505
3506 r = onig_node_str_cat(snode, buf, buf + len);
3507 if (r != 0) goto mem_err2;
3508 }
3509
3511 if (IS_NULL(an)) {
3512 goto mem_err2;
3513 }
3514
3515 if (items[i].byte_len != slen) {
3516 Node *rem;
3517 UChar *q = p + items[i].byte_len;
3518
3519 if (q < end) {
3520 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3521 if (r != 0) {
3522 onig_node_free(an);
3523 goto mem_err2;
3524 }
3525
3526 xnode = onig_node_list_add(NULL_NODE, snode);
3527 if (IS_NULL(xnode)) {
3528 onig_node_free(an);
3529 onig_node_free(rem);
3530 goto mem_err2;
3531 }
3532 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3533 onig_node_free(an);
3534 onig_node_free(xnode);
3535 onig_node_free(rem);
3536 goto mem_err;
3537 }
3538
3539 NCAR(an) = xnode;
3540 }
3541 else {
3542 NCAR(an) = snode;
3543 }
3544
3545 NCDR(var_anode) = an;
3546 var_anode = an;
3547 }
3548 else {
3549 NCAR(an) = snode;
3550 NCDR(anode) = an;
3551 anode = an;
3552 }
3553 }
3554
3555 return varlen;
3556
3557 mem_err2:
3558 onig_node_free(snode);
3559
3560 mem_err:
3561 onig_node_free(*rnode);
3562
3563 return ONIGERR_MEMORY;
3564}
3565
3566static int
3567expand_case_fold_string(Node* node, regex_t* reg)
3568{
3569#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3570
3571 int r, n, len, alt_num;
3572 int varlen = 0;
3573 UChar *start, *end, *p;
3574 Node *top_root, *root, *snode, *prev_node;
3576 StrNode* sn = NSTR(node);
3577
3578 if (NSTRING_IS_AMBIG(node)) return 0;
3579
3580 start = sn->s;
3581 end = sn->end;
3582 if (start >= end) return 0;
3583
3584 r = 0;
3585 top_root = root = prev_node = snode = NULL_NODE;
3586 alt_num = 1;
3587 p = start;
3588 while (p < end) {
3590 p, end, items);
3591 if (n < 0) {
3592 r = n;
3593 goto err;
3594 }
3595
3596 len = enclen(reg->enc, p, end);
3597
3598 varlen = is_case_fold_variable_len(n, items, len);
3599 if (n == 0 || varlen == 0) {
3600 if (IS_NULL(snode)) {
3601 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3602 onig_node_free(top_root);
3603 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3604 if (IS_NULL(root)) {
3605 onig_node_free(prev_node);
3606 goto mem_err;
3607 }
3608 }
3609
3610 prev_node = snode = onig_node_new_str(NULL, NULL);
3611 if (IS_NULL(snode)) goto mem_err;
3612 if (IS_NOT_NULL(root)) {
3613 if (IS_NULL(onig_node_list_add(root, snode))) {
3614 onig_node_free(snode);
3615 goto mem_err;
3616 }
3617 }
3618 }
3619
3620 r = onig_node_str_cat(snode, p, p + len);
3621 if (r != 0) goto err;
3622 }
3623 else {
3624 alt_num *= (n + 1);
3625 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3626
3627 if (IS_NOT_NULL(snode)) {
3628 r = update_string_node_case_fold(reg, snode);
3629 if (r == 0) {
3630 NSTRING_SET_AMBIG(snode);
3631 }
3632 }
3633 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3634 onig_node_free(top_root);
3635 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3636 if (IS_NULL(root)) {
3637 onig_node_free(prev_node);
3638 goto mem_err;
3639 }
3640 }
3641
3642 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3643 if (r < 0) goto mem_err;
3644 if (r == 1) {
3645 if (IS_NULL(root)) {
3646 top_root = prev_node;
3647 }
3648 else {
3649 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3650 onig_node_free(prev_node);
3651 goto mem_err;
3652 }
3653 }
3654
3655 root = NCAR(prev_node);
3656 }
3657 else { /* r == 0 */
3658 if (IS_NOT_NULL(root)) {
3659 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3660 onig_node_free(prev_node);
3661 goto mem_err;
3662 }
3663 }
3664 }
3665
3666 snode = NULL_NODE;
3667 }
3668
3669 p += len;
3670 }
3671 if (IS_NOT_NULL(snode)) {
3672 r = update_string_node_case_fold(reg, snode);
3673 if (r == 0) {
3674 NSTRING_SET_AMBIG(snode);
3675 }
3676 }
3677
3678 if (p < end) {
3679 Node *srem;
3680
3681 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3682 if (r != 0) goto mem_err;
3683
3684 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3685 onig_node_free(top_root);
3686 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3687 if (IS_NULL(root)) {
3688 onig_node_free(srem);
3689 onig_node_free(prev_node);
3690 goto mem_err;
3691 }
3692 }
3693
3694 if (IS_NULL(root)) {
3695 prev_node = srem;
3696 }
3697 else {
3698 if (IS_NULL(onig_node_list_add(root, srem))) {
3699 onig_node_free(srem);
3700 goto mem_err;
3701 }
3702 }
3703 }
3704
3705 /* ending */
3706 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3707 swap_node(node, top_root);
3708 onig_node_free(top_root);
3709 return 0;
3710
3711 mem_err:
3712 r = ONIGERR_MEMORY;
3713
3714 err:
3715 onig_node_free(top_root);
3716 return r;
3717}
3718
3719
3720#ifdef USE_COMBINATION_EXPLOSION_CHECK
3721
3722# define CEC_THRES_NUM_BIG_REPEAT 512
3723# define CEC_INFINITE_NUM 0x7fffffff
3724
3725# define CEC_IN_INFINITE_REPEAT (1<<0)
3726# define CEC_IN_FINITE_REPEAT (1<<1)
3727# define CEC_CONT_BIG_REPEAT (1<<2)
3728
3729static int
3730setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3731{
3732 int type;
3733 int r = state;
3734
3735 type = NTYPE(node);
3736 switch (type) {
3737 case NT_LIST:
3738 {
3739 Node* prev = NULL_NODE;
3740 do {
3741 r = setup_comb_exp_check(NCAR(node), r, env);
3742 prev = NCAR(node);
3743 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3744 }
3745 break;
3746
3747 case NT_ALT:
3748 {
3749 int ret;
3750 do {
3751 ret = setup_comb_exp_check(NCAR(node), state, env);
3752 r |= ret;
3753 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3754 }
3755 break;
3756
3757 case NT_QTFR:
3758 {
3759 int child_state = state;
3760 int add_state = 0;
3761 QtfrNode* qn = NQTFR(node);
3762 Node* target = qn->target;
3763 int var_num;
3764
3765 if (! IS_REPEAT_INFINITE(qn->upper)) {
3766 if (qn->upper > 1) {
3767 /* {0,1}, {1,1} are allowed */
3768 child_state |= CEC_IN_FINITE_REPEAT;
3769
3770 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3771 if (env->backrefed_mem == 0) {
3772 if (NTYPE(qn->target) == NT_ENCLOSE) {
3773 EncloseNode* en = NENCLOSE(qn->target);
3774 if (en->type == ENCLOSE_MEMORY) {
3775 if (NTYPE(en->target) == NT_QTFR) {
3776 QtfrNode* q = NQTFR(en->target);
3778 && q->greedy == qn->greedy) {
3779 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3780 if (qn->upper == 1)
3781 child_state = state;
3782 }
3783 }
3784 }
3785 }
3786 }
3787 }
3788 }
3789
3790 if (state & CEC_IN_FINITE_REPEAT) {
3791 qn->comb_exp_check_num = -1;
3792 }
3793 else {
3794 if (IS_REPEAT_INFINITE(qn->upper)) {
3795 var_num = CEC_INFINITE_NUM;
3796 child_state |= CEC_IN_INFINITE_REPEAT;
3797 }
3798 else {
3799 var_num = qn->upper - qn->lower;
3800 }
3801
3802 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3803 add_state |= CEC_CONT_BIG_REPEAT;
3804
3805 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3806 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3807 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3808 if (qn->comb_exp_check_num == 0) {
3809 env->num_comb_exp_check++;
3810 qn->comb_exp_check_num = env->num_comb_exp_check;
3811 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3812 env->comb_exp_max_regnum = env->curr_max_regnum;
3813 }
3814 }
3815 }
3816
3817 r = setup_comb_exp_check(target, child_state, env);
3818 r |= add_state;
3819 }
3820 break;
3821
3822 case NT_ENCLOSE:
3823 {
3824 EncloseNode* en = NENCLOSE(node);
3825
3826 switch (en->type) {
3827 case ENCLOSE_MEMORY:
3828 {
3829 if (env->curr_max_regnum < en->regnum)
3830 env->curr_max_regnum = en->regnum;
3831
3832 r = setup_comb_exp_check(en->target, state, env);
3833 }
3834 break;
3835
3836 default:
3837 r = setup_comb_exp_check(en->target, state, env);
3838 break;
3839 }
3840 }
3841 break;
3842
3843# ifdef USE_SUBEXP_CALL
3844 case NT_CALL:
3845 if (IS_CALL_RECURSION(NCALL(node)))
3846 env->has_recursion = 1;
3847 else
3848 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3849 break;
3850# endif
3851
3852 default:
3853 break;
3854 }
3855
3856 return r;
3857}
3858#endif
3859
3860#define IN_ALT (1<<0)
3861#define IN_NOT (1<<1)
3862#define IN_REPEAT (1<<2)
3863#define IN_VAR_REPEAT (1<<3)
3864#define IN_CALL (1<<4)
3865#define IN_RECCALL (1<<5)
3866
3867/* setup_tree does the following work.
3868 1. check empty loop. (set qn->target_empty_info)
3869 2. expand ignore-case in char class.
3870 3. set memory status bit flags. (reg->mem_stats)
3871 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3872 5. find invalid patterns in look-behind.
3873 6. expand repeated string.
3874 */
3875static int
3876setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3877{
3878 int type;
3879 int r = 0;
3880
3881restart:
3882 type = NTYPE(node);
3883 switch (type) {
3884 case NT_LIST:
3885 {
3886 Node* prev = NULL_NODE;
3887 do {
3888 r = setup_tree(NCAR(node), reg, state, env);
3889 if (IS_NOT_NULL(prev) && r == 0) {
3890 r = next_setup(prev, NCAR(node), reg);
3891 }
3892 prev = NCAR(node);
3893 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3894 }
3895 break;
3896
3897 case NT_ALT:
3898 do {
3899 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3900 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3901 break;
3902
3903 case NT_CCLASS:
3904 break;
3905
3906 case NT_STR:
3907 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3908 r = expand_case_fold_string(node, reg);
3909 }
3910 break;
3911
3912 case NT_CTYPE:
3913 case NT_CANY:
3914 break;
3915
3916#ifdef USE_SUBEXP_CALL
3917 case NT_CALL:
3918 break;
3919#endif
3920
3921 case NT_BREF:
3922 {
3923 int i;
3924 int* p;
3925 Node** nodes = SCANENV_MEM_NODES(env);
3926 BRefNode* br = NBREF(node);
3927 p = BACKREFS_P(br);
3928 for (i = 0; i < br->back_num; i++) {
3929 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3930 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3931 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3932#ifdef USE_BACKREF_WITH_LEVEL
3933 if (IS_BACKREF_NEST_LEVEL(br)) {
3934 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3935 }
3936#endif
3938 }
3939 }
3940 break;
3941
3942 case NT_QTFR:
3943 {
3944 OnigDistance d;
3945 QtfrNode* qn = NQTFR(node);
3946 Node* target = qn->target;
3947
3948 if ((state & IN_REPEAT) != 0) {
3949 qn->state |= NST_IN_REPEAT;
3950 }
3951
3952 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3953 r = get_min_match_length(target, &d, env);
3954 if (r) break;
3955 if (d == 0) {
3957#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3958 r = quantifiers_memory_node_info(target);
3959 if (r < 0) break;
3960 if (r > 0) {
3961 qn->target_empty_info = r;
3962 }
3963#endif
3964#if 0
3965 r = get_max_match_length(target, &d, env);
3966 if (r == 0 && d == 0) {
3967 /* ()* ==> ()?, ()+ ==> () */
3968 qn->upper = 1;
3969 if (qn->lower > 1) qn->lower = 1;
3970 if (NTYPE(target) == NT_STR) {
3971 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
3972 }
3973 }
3974#endif
3975 }
3976 }
3977
3978 state |= IN_REPEAT;
3979 if (qn->lower != qn->upper)
3980 state |= IN_VAR_REPEAT;
3981 r = setup_tree(target, reg, state, env);
3982 if (r) break;
3983
3984 /* expand string */
3985#define EXPAND_STRING_MAX_LENGTH 100
3986 if (NTYPE(target) == NT_STR) {
3987 if (qn->lower > 1) {
3988 int i, n = qn->lower;
3989 OnigDistance len = NSTRING_LEN(target);
3990 StrNode* sn = NSTR(target);
3991 Node* np;
3992
3993 np = onig_node_new_str(sn->s, sn->end);
3994 if (IS_NULL(np)) return ONIGERR_MEMORY;
3995 NSTR(np)->flag = sn->flag;
3996
3997 for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
3998 r = onig_node_str_cat(np, sn->s, sn->end);
3999 if (r) {
4000 onig_node_free(np);
4001 return r;
4002 }
4003 }
4004 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4005 Node *np1, *np2;
4006
4007 qn->lower -= i;
4008 if (! IS_REPEAT_INFINITE(qn->upper))
4009 qn->upper -= i;
4010
4011 np1 = onig_node_new_list(np, NULL);
4012 if (IS_NULL(np1)) {
4013 onig_node_free(np);
4014 return ONIGERR_MEMORY;
4015 }
4016 swap_node(np1, node);
4017 np2 = onig_node_list_add(node, np1);
4018 if (IS_NULL(np2)) {
4019 onig_node_free(np1);
4020 return ONIGERR_MEMORY;
4021 }
4022 }
4023 else {
4024 swap_node(np, node);
4025 onig_node_free(np);
4026 }
4027 break; /* break case NT_QTFR: */
4028 }
4029 }
4030
4031#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4032 if (qn->greedy && (qn->target_empty_info != 0)) {
4033 if (NTYPE(target) == NT_QTFR) {
4034 QtfrNode* tqn = NQTFR(target);
4035 if (IS_NOT_NULL(tqn->head_exact)) {
4036 qn->head_exact = tqn->head_exact;
4037 tqn->head_exact = NULL;
4038 }
4039 }
4040 else {
4041 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4042 }
4043 }
4044#endif
4045 }
4046 break;
4047
4048 case NT_ENCLOSE:
4049 {
4050 EncloseNode* en = NENCLOSE(node);
4051
4052 switch (en->type) {
4053 case ENCLOSE_OPTION:
4054 {
4055 OnigOptionType options = reg->options;
4056 reg->options = NENCLOSE(node)->option;
4057 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4058 reg->options = options;
4059 }
4060 break;
4061
4062 case ENCLOSE_MEMORY:
4063 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4064 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4065 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4066 }
4067 if (IS_ENCLOSE_CALLED(en))
4068 state |= IN_CALL;
4069 if (IS_ENCLOSE_RECURSION(en))
4070 state |= IN_RECCALL;
4071 else if ((state & IN_RECCALL) != 0)
4072 SET_CALL_RECURSION(node);
4073 r = setup_tree(en->target, reg, state, env);
4074 break;
4075
4077 {
4078 Node* target = en->target;
4079 r = setup_tree(target, reg, state, env);
4080 if (NTYPE(target) == NT_QTFR) {
4081 QtfrNode* tqn = NQTFR(target);
4082 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4083 tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4084 int qtype = NTYPE(tqn->target);
4085 if (IS_NODE_TYPE_SIMPLE(qtype))
4087 }
4088 }
4089 }
4090 break;
4091
4092 case ENCLOSE_CONDITION:
4093#ifdef USE_NAMED_GROUP
4094 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4095 env->num_named > 0 &&
4099 }
4100#endif
4101 if (NENCLOSE(node)->regnum > env->num_mem)
4103 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4104 break;
4105
4106 case ENCLOSE_ABSENT:
4107 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4108 break;
4109 }
4110 }
4111 break;
4112
4113 case NT_ANCHOR:
4114 {
4115 AnchorNode* an = NANCHOR(node);
4116
4117 switch (an->type) {
4118 case ANCHOR_PREC_READ:
4119 r = setup_tree(an->target, reg, state, env);
4120 break;
4122 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4123 break;
4124
4125/* allowed node types in look-behind */
4126#define ALLOWED_TYPE_IN_LB \
4127 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4128 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4129
4130#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4131#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4132
4133#define ALLOWED_ANCHOR_IN_LB \
4134( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4135 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4136 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4137 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4138#define ALLOWED_ANCHOR_IN_LB_NOT \
4139( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4140 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4141 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4142 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4143
4144 case ANCHOR_LOOK_BEHIND:
4145 {
4146 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4148 if (r < 0) return r;
4149 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4150 if (NTYPE(node) != NT_ANCHOR) goto restart;
4151 r = setup_tree(an->target, reg, state, env);
4152 if (r != 0) return r;
4153 r = setup_look_behind(node, reg, env);
4154 }
4155 break;
4156
4158 {
4159 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4161 if (r < 0) return r;
4162 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4163 if (NTYPE(node) != NT_ANCHOR) goto restart;
4164 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4165 if (r != 0) return r;
4166 r = setup_look_behind(node, reg, env);
4167 }
4168 break;
4169 }
4170 }
4171 break;
4172
4173 default:
4174 break;
4175 }
4176
4177 return r;
4178}
4179
4180#ifndef USE_SUNDAY_QUICK_SEARCH
4181/* set skip map for Boyer-Moore search */
4182static int
4183set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4184 UChar skip[], int** int_skip, int ignore_case)
4185{
4187 int clen, flen, n, j, k;
4190 OnigEncoding enc = reg->enc;
4191
4192 len = end - s;
4193 if (len < ONIG_CHAR_TABLE_SIZE) {
4194 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
4195
4196 n = 0;
4197 for (i = 0; i < len - 1; i += clen) {
4198 p = s + i;
4199 if (ignore_case)
4201 p, end, items);
4202 clen = enclen(enc, p, end);
4203 if (p + clen > end)
4204 clen = (int )(end - p);
4205
4206 for (j = 0; j < n; j++) {
4207 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4208 return 1; /* different length isn't supported. */
4209 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4210 if (flen != clen)
4211 return 1; /* different length isn't supported. */
4212 }
4213 for (j = 0; j < clen; j++) {
4214 skip[s[i + j]] = (UChar )(len - 1 - i - j);
4215 for (k = 0; k < n; k++) {
4216 skip[buf[k][j]] = (UChar )(len - 1 - i - j);
4217 }
4218 }
4219 }
4220 }
4221 else {
4222# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4223 /* This should not happen. */
4224 return ONIGERR_TYPE_BUG;
4225# else
4226 if (IS_NULL(*int_skip)) {
4227 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4228 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4229 }
4230 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
4231
4232 n = 0;
4233 for (i = 0; i < len - 1; i += clen) {
4234 p = s + i;
4235 if (ignore_case)
4237 p, end, items);
4238 clen = enclen(enc, p, end);
4239 if (p + clen > end)
4240 clen = (int )(end - p);
4241
4242 for (j = 0; j < n; j++) {
4243 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4244 return 1; /* different length isn't supported. */
4245 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4246 if (flen != clen)
4247 return 1; /* different length isn't supported. */
4248 }
4249 for (j = 0; j < clen; j++) {
4250 (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
4251 for (k = 0; k < n; k++) {
4252 (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
4253 }
4254 }
4255 }
4256# endif
4257 }
4258 return 0;
4259}
4260
4261#else /* USE_SUNDAY_QUICK_SEARCH */
4262
4263/* set skip map for Sunday's quick search */
4264static int
4265set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4266 UChar skip[], int** int_skip, int ignore_case)
4267{
4269 int clen, flen, n, j, k;
4272 OnigEncoding enc = reg->enc;
4273
4274 len = end - s;
4275 if (len < ONIG_CHAR_TABLE_SIZE) {
4276 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
4277
4278 n = 0;
4279 for (i = 0; i < len; i += clen) {
4280 p = s + i;
4281 if (ignore_case)
4283 p, end, items);
4284 clen = enclen(enc, p, end);
4285 if (p + clen > end)
4286 clen = (int )(end - p);
4287
4288 for (j = 0; j < n; j++) {
4289 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4290 return 1; /* different length isn't supported. */
4291 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4292 if (flen != clen)
4293 return 1; /* different length isn't supported. */
4294 }
4295 for (j = 0; j < clen; j++) {
4296 skip[s[i + j]] = (UChar )(len - i - j);
4297 for (k = 0; k < n; k++) {
4298 skip[buf[k][j]] = (UChar )(len - i - j);
4299 }
4300 }
4301 }
4302 }
4303 else {
4304# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4305 /* This should not happen. */
4306 return ONIGERR_TYPE_BUG;
4307# else
4308 if (IS_NULL(*int_skip)) {
4309 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4310 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4311 }
4312 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4313
4314 n = 0;
4315 for (i = 0; i < len; i += clen) {
4316 p = s + i;
4317 if (ignore_case)
4319 p, end, items);
4320 clen = enclen(enc, p, end);
4321 if (p + clen > end)
4322 clen = (int )(end - p);
4323
4324 for (j = 0; j < n; j++) {
4325 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4326 return 1; /* different length isn't supported. */
4327 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4328 if (flen != clen)
4329 return 1; /* different length isn't supported. */
4330 }
4331 for (j = 0; j < clen; j++) {
4332 (*int_skip)[s[i + j]] = (int )(len - i - j);
4333 for (k = 0; k < n; k++) {
4334 (*int_skip)[buf[k][j]] = (int )(len - i - j);
4335 }
4336 }
4337 }
4338# endif
4339 }
4340 return 0;
4341}
4342#endif /* USE_SUNDAY_QUICK_SEARCH */
4343
4344typedef struct {
4345 OnigDistance min; /* min byte length */
4346 OnigDistance max; /* max byte length */
4347} MinMaxLen;
4348
4349typedef struct {
4355} OptEnv;
4356
4357typedef struct {
4360} OptAncInfo;
4361
4362typedef struct {
4363 MinMaxLen mmd; /* info position */
4365
4367 int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4368 int len;
4370} OptExactInfo;
4371
4372typedef struct {
4373 MinMaxLen mmd; /* info position */
4375
4376 int value; /* weighted value */
4378} OptMapInfo;
4379
4380typedef struct {
4382
4384 OptExactInfo exb; /* boundary */
4385 OptExactInfo exm; /* middle */
4386 OptExactInfo expr; /* prec read (?=...) */
4387
4388 OptMapInfo map; /* boundary */
4389} NodeOptInfo;
4390
4391
4392static int
4393map_position_value(OnigEncoding enc, int i)
4394{
4395 static const short int ByteValTable[] = {
4396 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4397 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4398 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4399 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4400 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4401 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4402 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4403 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4404 };
4405
4406 if (i < numberof(ByteValTable)) {
4407 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4408 return 20;
4409 else
4410 return (int )ByteValTable[i];
4411 }
4412 else
4413 return 4; /* Take it easy. */
4414}
4415
4416static int
4417distance_value(MinMaxLen* mm)
4418{
4419 /* 1000 / (min-max-dist + 1) */
4420 static const short int dist_vals[] = {
4421 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4422 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4423 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4424 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4425 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4426 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4427 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4428 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4429 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4430 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4431 };
4432
4433 OnigDistance d;
4434
4435 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4436
4437 d = mm->max - mm->min;
4438 if (d < numberof(dist_vals))
4439 /* return dist_vals[d] * 16 / (mm->min + 12); */
4440 return (int )dist_vals[d];
4441 else
4442 return 1;
4443}
4444
4445static int
4446comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4447{
4448 if (v2 <= 0) return -1;
4449 if (v1 <= 0) return 1;
4450
4451 v1 *= distance_value(d1);
4452 v2 *= distance_value(d2);
4453
4454 if (v2 > v1) return 1;
4455 if (v2 < v1) return -1;
4456
4457 if (d2->min < d1->min) return 1;
4458 if (d2->min > d1->min) return -1;
4459 return 0;
4460}
4461
4462static int
4463is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4464{
4465 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4466}
4467
4468
4469static void
4470set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4471{
4472 mml->min = min;
4473 mml->max = max;
4474}
4475
4476static void
4477clear_mml(MinMaxLen* mml)
4478{
4479 mml->min = mml->max = 0;
4480}
4481
4482static void
4483copy_mml(MinMaxLen* to, MinMaxLen* from)
4484{
4485 to->min = from->min;
4486 to->max = from->max;
4487}
4488
4489static void
4490add_mml(MinMaxLen* to, MinMaxLen* from)
4491{
4492 to->min = distance_add(to->min, from->min);
4493 to->max = distance_add(to->max, from->max);
4494}
4495
4496#if 0
4497static void
4498add_len_mml(MinMaxLen* to, OnigDistance len)
4499{
4500 to->min = distance_add(to->min, len);
4501 to->max = distance_add(to->max, len);
4502}
4503#endif
4504
4505static void
4506alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4507{
4508 if (to->min > from->min) to->min = from->min;
4509 if (to->max < from->max) to->max = from->max;
4510}
4511
4512static void
4513copy_opt_env(OptEnv* to, OptEnv* from)
4514{
4515 *to = *from;
4516}
4517
4518static void
4519clear_opt_anc_info(OptAncInfo* anc)
4520{
4521 anc->left_anchor = 0;
4522 anc->right_anchor = 0;
4523}
4524
4525static void
4526copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4527{
4528 *to = *from;
4529}
4530
4531static void
4532concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4533 OnigDistance left_len, OnigDistance right_len)
4534{
4535 clear_opt_anc_info(to);
4536
4537 to->left_anchor = left->left_anchor;
4538 if (left_len == 0) {
4539 to->left_anchor |= right->left_anchor;
4540 }
4541
4542 to->right_anchor = right->right_anchor;
4543 if (right_len == 0) {
4544 to->right_anchor |= left->right_anchor;
4545 }
4546 else {
4548 }
4549}
4550
4551static int
4552is_left_anchor(int anc)
4553{
4554 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4555 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4556 anc == ANCHOR_PREC_READ_NOT)
4557 return 0;
4558
4559 return 1;
4560}
4561
4562static int
4563is_set_opt_anc_info(OptAncInfo* to, int anc)
4564{
4565 if ((to->left_anchor & anc) != 0) return 1;
4566
4567 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4568}
4569
4570static void
4571add_opt_anc_info(OptAncInfo* to, int anc)
4572{
4573 if (is_left_anchor(anc))
4574 to->left_anchor |= anc;
4575 else
4576 to->right_anchor |= anc;
4577}
4578
4579static void
4580remove_opt_anc_info(OptAncInfo* to, int anc)
4581{
4582 if (is_left_anchor(anc))
4583 to->left_anchor &= ~anc;
4584 else
4585 to->right_anchor &= ~anc;
4586}
4587
4588static void
4589alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4590{
4591 to->left_anchor &= add->left_anchor;
4592 to->right_anchor &= add->right_anchor;
4593}
4594
4595static int
4596is_full_opt_exact_info(OptExactInfo* ex)
4597{
4598 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4599}
4600
4601static void
4602clear_opt_exact_info(OptExactInfo* ex)
4603{
4604 clear_mml(&ex->mmd);
4605 clear_opt_anc_info(&ex->anc);
4606 ex->reach_end = 0;
4607 ex->ignore_case = -1; /* unset */
4608 ex->len = 0;
4609 ex->s[0] = '\0';
4610}
4611
4612static void
4613copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4614{
4615 *to = *from;
4616}
4617
4618static void
4619concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4620{
4621 int i, j, len;
4622 UChar *p, *end;
4623 OptAncInfo tanc;
4624
4625 if (to->ignore_case < 0)
4626 to->ignore_case = add->ignore_case;
4627 else if (to->ignore_case != add->ignore_case)
4628 return ; /* avoid */
4629
4630 p = add->s;
4631 end = p + add->len;
4632 for (i = to->len; p < end; ) {
4633 len = enclen(enc, p, end);
4634 if (i + len > OPT_EXACT_MAXLEN) break;
4635 for (j = 0; j < len && p < end; j++)
4636 to->s[i++] = *p++;
4637 }
4638
4639 to->len = i;
4640 to->reach_end = (p == end ? add->reach_end : 0);
4641
4642 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4643 if (! to->reach_end) tanc.right_anchor = 0;
4644 copy_opt_anc_info(&to->anc, &tanc);
4645}
4646
4647static void
4648concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4649 int raw ARG_UNUSED, OnigEncoding enc)
4650{
4651 int i, j, len;
4652 UChar *p;
4653
4654 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4655 len = enclen(enc, p, end);
4656 if (i + len > OPT_EXACT_MAXLEN) break;
4657 for (j = 0; j < len && p < end; j++)
4658 to->s[i++] = *p++;
4659 }
4660
4661 to->len = i;
4662}
4663
4664static void
4665alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4666{
4667 int i, j, len;
4668
4669 if (add->len == 0 || to->len == 0) {
4670 clear_opt_exact_info(to);
4671 return ;
4672 }
4673
4674 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4675 clear_opt_exact_info(to);
4676 return ;
4677 }
4678
4679 for (i = 0; i < to->len && i < add->len; ) {
4680 if (to->s[i] != add->s[i]) break;
4681 len = enclen(env->enc, to->s + i, to->s + to->len);
4682
4683 for (j = 1; j < len; j++) {
4684 if (to->s[i+j] != add->s[i+j]) break;
4685 }
4686 if (j < len) break;
4687 i += len;
4688 }
4689
4690 if (! add->reach_end || i < add->len || i < to->len) {
4691 to->reach_end = 0;
4692 }
4693 to->len = i;
4694 if (to->ignore_case < 0)
4695 to->ignore_case = add->ignore_case;
4696 else if (add->ignore_case >= 0)
4697 to->ignore_case |= add->ignore_case;
4698
4699 alt_merge_opt_anc_info(&to->anc, &add->anc);
4700 if (! to->reach_end) to->anc.right_anchor = 0;
4701}
4702
4703static void
4704select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4705{
4706 int v1, v2;
4707
4708 v1 = now->len;
4709 v2 = alt->len;
4710
4711 if (v2 == 0) {
4712 return ;
4713 }
4714 else if (v1 == 0) {
4715 copy_opt_exact_info(now, alt);
4716 return ;
4717 }
4718 else if (v1 <= 2 && v2 <= 2) {
4719 /* ByteValTable[x] is big value --> low price */
4720 v2 = map_position_value(enc, now->s[0]);
4721 v1 = map_position_value(enc, alt->s[0]);
4722
4723 if (now->len > 1) v1 += 5;
4724 if (alt->len > 1) v2 += 5;
4725 }
4726
4727 if (now->ignore_case <= 0) v1 *= 2;
4728 if (alt->ignore_case <= 0) v2 *= 2;
4729
4730 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4731 copy_opt_exact_info(now, alt);
4732}
4733
4734static void
4735clear_opt_map_info(OptMapInfo* map)
4736{
4737 static const OptMapInfo clean_info = {
4738 {0, 0}, {0, 0}, 0,
4739 {
4740 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4741 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4742 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4743 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4744 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4745 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4746 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4747 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4748 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4749 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4750 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4751 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4752 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4753 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4754 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4755 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4756 }
4757 };
4758
4759 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4760}
4761
4762static void
4763copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4764{
4765 *to = *from;
4766}
4767
4768static void
4769add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4770{
4771 if (map->map[c] == 0) {
4772 map->map[c] = 1;
4773 map->value += map_position_value(enc, c);
4774 }
4775}
4776
4777static int
4778add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4779 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4780{
4783 int i, n;
4784
4785 add_char_opt_map_info(map, p[0], enc);
4786
4787 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4788 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4789 if (n < 0) return n;
4790
4791 for (i = 0; i < n; i++) {
4792 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4793 add_char_opt_map_info(map, buf[0], enc);
4794 }
4795
4796 return 0;
4797}
4798
4799static void
4800select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4801{
4802 const int z = 1<<15; /* 32768: something big value */
4803
4804 int v1, v2;
4805
4806 if (alt->value == 0) return ;
4807 if (now->value == 0) {
4808 copy_opt_map_info(now, alt);
4809 return ;
4810 }
4811
4812 v1 = z / now->value;
4813 v2 = z / alt->value;
4814 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4815 copy_opt_map_info(now, alt);
4816}
4817
4818static int
4819comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4820{
4821#define COMP_EM_BASE 20
4822 int ve, vm;
4823
4824 if (m->value <= 0) return -1;
4825
4826 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4827 vm = COMP_EM_BASE * 5 * 2 / m->value;
4828 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4829}
4830
4831static void
4832alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4833{
4834 int i, val;
4835
4836 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4837 if (to->value == 0) return ;
4838 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4839 clear_opt_map_info(to);
4840 return ;
4841 }
4842
4843 alt_merge_mml(&to->mmd, &add->mmd);
4844
4845 val = 0;
4846 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4847 if (add->map[i])
4848 to->map[i] = 1;
4849
4850 if (to->map[i])
4851 val += map_position_value(enc, i);
4852 }
4853 to->value = val;
4854
4855 alt_merge_opt_anc_info(&to->anc, &add->anc);
4856}
4857
4858static void
4859set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4860{
4861 copy_mml(&(opt->exb.mmd), mmd);
4862 copy_mml(&(opt->expr.mmd), mmd);
4863 copy_mml(&(opt->map.mmd), mmd);
4864}
4865
4866static void
4867clear_node_opt_info(NodeOptInfo* opt)
4868{
4869 clear_mml(&opt->len);
4870 clear_opt_anc_info(&opt->anc);
4871 clear_opt_exact_info(&opt->exb);
4872 clear_opt_exact_info(&opt->exm);
4873 clear_opt_exact_info(&opt->expr);
4874 clear_opt_map_info(&opt->map);
4875}
4876
4877static void
4878copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4879{
4880 *to = *from;
4881}
4882
4883static void
4884concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4885{
4886 int exb_reach, exm_reach;
4887 OptAncInfo tanc;
4888
4889 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4890 copy_opt_anc_info(&to->anc, &tanc);
4891
4892 if (add->exb.len > 0 && to->len.max == 0) {
4893 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4894 to->len.max, add->len.max);
4895 copy_opt_anc_info(&add->exb.anc, &tanc);
4896 }
4897
4898 if (add->map.value > 0 && to->len.max == 0) {
4899 if (add->map.mmd.max == 0)
4900 add->map.anc.left_anchor |= to->anc.left_anchor;
4901 }
4902
4903 exb_reach = to->exb.reach_end;
4904 exm_reach = to->exm.reach_end;
4905
4906 if (add->len.max != 0)
4907 to->exb.reach_end = to->exm.reach_end = 0;
4908
4909 if (add->exb.len > 0) {
4910 if (exb_reach) {
4911 concat_opt_exact_info(&to->exb, &add->exb, enc);
4912 clear_opt_exact_info(&add->exb);
4913 }
4914 else if (exm_reach) {
4915 concat_opt_exact_info(&to->exm, &add->exb, enc);
4916 clear_opt_exact_info(&add->exb);
4917 }
4918 }
4919 select_opt_exact_info(enc, &to->exm, &add->exb);
4920 select_opt_exact_info(enc, &to->exm, &add->exm);
4921
4922 if (to->expr.len > 0) {
4923 if (add->len.max > 0) {
4924 if (to->expr.len > (int )add->len.max)
4925 to->expr.len = (int )add->len.max;
4926
4927 if (to->expr.mmd.max == 0)
4928 select_opt_exact_info(enc, &to->exb, &to->expr);
4929 else
4930 select_opt_exact_info(enc, &to->exm, &to->expr);
4931 }
4932 }
4933 else if (add->expr.len > 0) {
4934 copy_opt_exact_info(&to->expr, &add->expr);
4935 }
4936
4937 select_opt_map_info(&to->map, &add->map);
4938
4939 add_mml(&to->len, &add->len);
4940}
4941
4942static void
4943alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4944{
4945 alt_merge_opt_anc_info (&to->anc, &add->anc);
4946 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4947 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4948 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4949 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4950
4951 alt_merge_mml(&to->len, &add->len);
4952}
4953
4954
4955#define MAX_NODE_OPT_INFO_REF_COUNT 5
4956
4957static int
4958optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4959{
4960 int type;
4961 int r = 0;
4962
4963 clear_node_opt_info(opt);
4964 set_bound_node_opt_info(opt, &env->mmd);
4965
4966 type = NTYPE(node);
4967 switch (type) {
4968 case NT_LIST:
4969 {
4970 OptEnv nenv;
4971 NodeOptInfo nopt;
4972 Node* nd = node;
4973
4974 copy_opt_env(&nenv, env);
4975 do {
4976 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4977 if (r == 0) {
4978 add_mml(&nenv.mmd, &nopt.len);
4979 concat_left_node_opt_info(env->enc, opt, &nopt);
4980 }
4981 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4982 }
4983 break;
4984
4985 case NT_ALT:
4986 {
4987 NodeOptInfo nopt;
4988 Node* nd = node;
4989
4990 do {
4991 r = optimize_node_left(NCAR(nd), &nopt, env);
4992 if (r == 0) {
4993 if (nd == node) copy_node_opt_info(opt, &nopt);
4994 else alt_merge_node_opt_info(opt, &nopt, env);
4995 }
4996 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4997 }
4998 break;
4999
5000 case NT_STR:
5001 {
5002 StrNode* sn = NSTR(node);
5003 OnigDistance slen = sn->end - sn->s;
5004 int is_raw = NSTRING_IS_RAW(node);
5005
5006 if (! NSTRING_IS_AMBIG(node)) {
5007 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5008 is_raw, env->enc);
5009 opt->exb.ignore_case = 0;
5010 if (slen > 0) {
5011 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
5012 }
5013 set_mml(&opt->len, slen, slen);
5014 }
5015 else {
5016 OnigDistance max;
5017
5018 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
5019 int n = onigenc_strlen(env->enc, sn->s, sn->end);
5020 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
5021 }
5022 else {
5023 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5024 is_raw, env->enc);
5025 opt->exb.ignore_case = 1;
5026
5027 if (slen > 0) {
5028 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5029 env->enc, env->case_fold_flag);
5030 if (r != 0) break;
5031 }
5032
5033 max = slen;
5034 }
5035
5036 set_mml(&opt->len, slen, max);
5037 }
5038
5039 if ((OnigDistance )opt->exb.len == slen)
5040 opt->exb.reach_end = 1;
5041 }
5042 break;
5043
5044 case NT_CCLASS:
5045 {
5046 int i, z;
5047 CClassNode* cc = NCCLASS(node);
5048
5049 /* no need to check ignore case. (set in setup_tree()) */
5050
5051 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5054
5055 set_mml(&opt->len, min, max);
5056 }
5057 else {
5058 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5059 z = BITSET_AT(cc->bs, i);
5060 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5061 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5062 }
5063 }
5064 set_mml(&opt->len, 1, 1);
5065 }
5066 }
5067 break;
5068
5069 case NT_CTYPE:
5070 {
5071 int i, min, max;
5072 int maxcode;
5073
5074 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5075
5076 if (max == 1) {
5077 min = 1;
5078
5079 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5080 switch (NCTYPE(node)->ctype) {
5081 case ONIGENC_CTYPE_WORD:
5082 if (NCTYPE(node)->not != 0) {
5083 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5084 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5085 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5086 }
5087 }
5088 }
5089 else {
5090 for (i = 0; i < maxcode; i++) {
5091 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5092 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5093 }
5094 }
5095 }
5096 break;
5097 }
5098 }
5099 else {
5100 min = ONIGENC_MBC_MINLEN(env->enc);
5101 }
5102 set_mml(&opt->len, min, max);
5103 }
5104 break;
5105
5106 case NT_CANY:
5107 {
5110 set_mml(&opt->len, min, max);
5111 }
5112 break;
5113
5114 case NT_ANCHOR:
5115 switch (NANCHOR(node)->type) {
5116 case ANCHOR_BEGIN_BUF:
5118 case ANCHOR_BEGIN_LINE:
5119 case ANCHOR_END_BUF:
5121 case ANCHOR_END_LINE:
5122 case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5123 case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5124 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5125 break;
5126
5127 case ANCHOR_PREC_READ:
5128 {
5129 NodeOptInfo nopt;
5130
5131 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5132 if (r == 0) {
5133 if (nopt.exb.len > 0)
5134 copy_opt_exact_info(&opt->expr, &nopt.exb);
5135 else if (nopt.exm.len > 0)
5136 copy_opt_exact_info(&opt->expr, &nopt.exm);
5137
5138 opt->expr.reach_end = 0;
5139
5140 if (nopt.map.value > 0)
5141 copy_opt_map_info(&opt->map, &nopt.map);
5142 }
5143 }
5144 break;
5145
5147 break;
5148 }
5149 break;
5150
5151 case NT_BREF:
5152 {
5153 int i;
5154 int* backs;
5155 OnigDistance min, max, tmin, tmax;
5156 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5157 BRefNode* br = NBREF(node);
5158
5159 if (br->state & NST_RECURSION) {
5160 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5161 break;
5162 }
5163 backs = BACKREFS_P(br);
5164 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5165 if (r != 0) break;
5166 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5167 if (r != 0) break;
5168 for (i = 1; i < br->back_num; i++) {
5169 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5170 if (r != 0) break;
5171 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5172 if (r != 0) break;
5173 if (min > tmin) min = tmin;
5174 if (max < tmax) max = tmax;
5175 }
5176 if (r == 0) set_mml(&opt->len, min, max);
5177 }
5178 break;
5179
5180#ifdef USE_SUBEXP_CALL
5181 case NT_CALL:
5182 if (IS_CALL_RECURSION(NCALL(node)))
5183 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5184 else {
5185 OnigOptionType save = env->options;
5186 env->options = NENCLOSE(NCALL(node)->target)->option;
5187 r = optimize_node_left(NCALL(node)->target, opt, env);
5188 env->options = save;
5189 }
5190 break;
5191#endif
5192
5193 case NT_QTFR:
5194 {
5195 int i;
5196 OnigDistance min, max;
5197 NodeOptInfo nopt;
5198 QtfrNode* qn = NQTFR(node);
5199
5200 r = optimize_node_left(qn->target, &nopt, env);
5201 if (r) break;
5202
5203 if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
5204 if (env->mmd.max == 0 &&
5205 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5206 if (IS_MULTILINE(env->options))
5207 /* implicit anchor: /.*a/ ==> /\A.*a/ */
5208 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5209 else
5210 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5211 }
5212 }
5213 else {
5214 if (qn->lower > 0) {
5215 copy_node_opt_info(opt, &nopt);
5216 if (nopt.exb.len > 0) {
5217 if (nopt.exb.reach_end) {
5218 for (i = 2; i <= qn->lower &&
5219 ! is_full_opt_exact_info(&opt->exb); i++) {
5220 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5221 }
5222 if (i < qn->lower) {
5223 opt->exb.reach_end = 0;
5224 }
5225 }
5226 }
5227
5228 if (qn->lower != qn->upper) {
5229 opt->exb.reach_end = 0;
5230 opt->exm.reach_end = 0;
5231 }
5232 if (qn->lower > 1)
5233 opt->exm.reach_end = 0;
5234 }
5235 }
5236
5237 min = distance_multiply(nopt.len.min, qn->lower);
5238 if (IS_REPEAT_INFINITE(qn->upper))
5239 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5240 else
5241 max = distance_multiply(nopt.len.max, qn->upper);
5242
5243 set_mml(&opt->len, min, max);
5244 }
5245 break;
5246
5247 case NT_ENCLOSE:
5248 {
5249 EncloseNode* en = NENCLOSE(node);
5250
5251 switch (en->type) {
5252 case ENCLOSE_OPTION:
5253 {
5254 OnigOptionType save = env->options;
5255
5256 env->options = en->option;
5257 r = optimize_node_left(en->target, opt, env);
5258 env->options = save;
5259 }
5260 break;
5261
5262 case ENCLOSE_MEMORY:
5263#ifdef USE_SUBEXP_CALL
5264 en->opt_count++;
5266 OnigDistance min, max;
5267
5268 min = 0;
5270 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5271 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5272 set_mml(&opt->len, min, max);
5273 }
5274 else
5275#endif
5276 {
5277 r = optimize_node_left(en->target, opt, env);
5278
5279 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5280 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5281 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5282 }
5283 }
5284 break;
5285
5287 case ENCLOSE_CONDITION:
5288 r = optimize_node_left(en->target, opt, env);
5289 break;
5290
5291 case ENCLOSE_ABSENT:
5292 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5293 break;
5294 }
5295 }
5296 break;
5297
5298 default:
5299#ifdef ONIG_DEBUG
5300 fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5301 NTYPE(node));
5302#endif
5303 r = ONIGERR_TYPE_BUG;
5304 break;
5305 }
5306
5307 return r;
5308}
5309
5310static int
5311set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5312{
5313 int r;
5314 int allow_reverse;
5315
5316 if (e->len == 0) return 0;
5317
5318 reg->exact = (UChar* )xmalloc(e->len);
5320 xmemcpy(reg->exact, e->s, e->len);
5321 reg->exact_end = reg->exact + e->len;
5322
5323 allow_reverse =
5325
5326 if (e->ignore_case > 0) {
5327 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5328 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5329 reg->map, &(reg->int_map), 1);
5330 if (r == 0) {
5331 reg->optimize = (allow_reverse != 0
5333 }
5334 else {
5336 }
5337 }
5338 else {
5340 }
5341 }
5342 else {
5343 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5344 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5345 reg->map, &(reg->int_map), 0);
5346 if (r == 0) {
5347 reg->optimize = (allow_reverse != 0
5349 }
5350 else {
5352 }
5353 }
5354 else {
5356 }
5357 }
5358
5359 reg->dmin = e->mmd.min;
5360 reg->dmax = e->mmd.max;
5361
5362 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5363 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5364 }
5365
5366 return 0;
5367}
5368
5369static void
5370set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5371{
5372 int i;
5373
5374 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5375 reg->map[i] = m->map[i];
5376
5378 reg->dmin = m->mmd.min;
5379 reg->dmax = m->mmd.max;
5380
5381 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5382 reg->threshold_len = (int )(reg->dmin + 1);
5383 }
5384}
5385
5386static void
5387set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5388{
5391}
5392
5393#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5394static void print_optimize_info(FILE* f, regex_t* reg);
5395#endif
5396
5397static int
5398set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5399{
5400
5401 int r;
5402 NodeOptInfo opt;
5403 OptEnv env;
5404
5405 env.enc = reg->enc;
5406 env.options = reg->options;
5407 env.case_fold_flag = reg->case_fold_flag;
5408 env.scan_env = scan_env;
5409 clear_mml(&env.mmd);
5410
5411 r = optimize_node_left(node, &opt, &env);
5412 if (r) return r;
5413
5414 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5417
5420
5423
5424 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5425 reg->anchor_dmin = opt.len.min;
5426 reg->anchor_dmax = opt.len.max;
5427 }
5428
5429 if (opt.exb.len > 0 || opt.exm.len > 0) {
5430 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5431 if (opt.map.value > 0 &&
5432 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5433 goto set_map;
5434 }
5435 else {
5436 r = set_optimize_exact_info(reg, &opt.exb);
5437 set_sub_anchor(reg, &opt.exb.anc);
5438 }
5439 }
5440 else if (opt.map.value > 0) {
5441 set_map:
5442 set_optimize_map_info(reg, &opt.map);
5443 set_sub_anchor(reg, &opt.map.anc);
5444 }
5445 else {
5447 if (opt.len.max == 0)
5449 }
5450
5451#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5452 print_optimize_info(stderr, reg);
5453#endif
5454 return r;
5455}
5456
5457static void
5458clear_optimize_info(regex_t* reg)
5459{
5461 reg->anchor = 0;
5462 reg->anchor_dmin = 0;
5463 reg->anchor_dmax = 0;
5464 reg->sub_anchor = 0;
5465 reg->exact_end = (UChar* )NULL;
5466 reg->threshold_len = 0;
5467 if (IS_NOT_NULL(reg->exact)) {
5468 xfree(reg->exact);
5469 reg->exact = (UChar* )NULL;
5470 }
5471}
5472
5473#ifdef ONIG_DEBUG
5474
5475static void print_enc_string(FILE* fp, OnigEncoding enc,
5476 const UChar *s, const UChar *end)
5477{
5478 fprintf(fp, "\nPATTERN: /");
5479
5480 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5481 const UChar *p;
5482 OnigCodePoint code;
5483
5484 p = s;
5485 while (p < end) {
5486 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5487 if (code >= 0x80) {
5488 fprintf(fp, " 0x%04x ", (int )code);
5489 }
5490 else {
5491 fputc((int )code, fp);
5492 }
5493
5494 p += enclen(enc, p, end);
5495 }
5496 }
5497 else {
5498 while (s < end) {
5499 fputc((int )*s, fp);
5500 s++;
5501 }
5502 }
5503
5504 fprintf(fp, "/ (%s)\n", enc->name);
5505}
5506#endif /* ONIG_DEBUG */
5507
5508#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5509static void
5510print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5511{
5512 if (a == ONIG_INFINITE_DISTANCE)
5513 fputs("inf", f);
5514 else
5515 fprintf(f, "(%"PRIuPTR")", a);
5516
5517 fputs("-", f);
5518
5519 if (b == ONIG_INFINITE_DISTANCE)
5520 fputs("inf", f);
5521 else
5522 fprintf(f, "(%"PRIuPTR")", b);
5523}
5524
5525static void
5526print_anchor(FILE* f, int anchor)
5527{
5528 int q = 0;
5529
5530 fprintf(f, "[");
5531
5532 if (anchor & ANCHOR_BEGIN_BUF) {
5533 fprintf(f, "begin-buf");
5534 q = 1;
5535 }
5536 if (anchor & ANCHOR_BEGIN_LINE) {
5537 if (q) fprintf(f, ", ");
5538 q = 1;
5539 fprintf(f, "begin-line");
5540 }
5541 if (anchor & ANCHOR_BEGIN_POSITION) {
5542 if (q) fprintf(f, ", ");
5543 q = 1;
5544 fprintf(f, "begin-pos");
5545 }
5546 if (anchor & ANCHOR_END_BUF) {
5547 if (q) fprintf(f, ", ");
5548 q = 1;
5549 fprintf(f, "end-buf");
5550 }
5551 if (anchor & ANCHOR_SEMI_END_BUF) {
5552 if (q) fprintf(f, ", ");
5553 q = 1;
5554 fprintf(f, "semi-end-buf");
5555 }
5556 if (anchor & ANCHOR_END_LINE) {
5557 if (q) fprintf(f, ", ");
5558 q = 1;
5559 fprintf(f, "end-line");
5560 }
5561 if (anchor & ANCHOR_ANYCHAR_STAR) {
5562 if (q) fprintf(f, ", ");
5563 q = 1;
5564 fprintf(f, "anychar-star");
5565 }
5566 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5567 if (q) fprintf(f, ", ");
5568 fprintf(f, "anychar-star-ml");
5569 }
5570
5571 fprintf(f, "]");
5572}
5573
5574static void
5575print_optimize_info(FILE* f, regex_t* reg)
5576{
5577 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5578 "EXACT_IC", "MAP",
5579 "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5580
5581 fprintf(f, "optimize: %s\n", on[reg->optimize]);
5582 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5583 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5584 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5585 fprintf(f, "\n");
5586
5587 if (reg->optimize) {
5588 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5589 fprintf(f, "\n");
5590 }
5591 fprintf(f, "\n");
5592
5593 if (reg->exact) {
5594 UChar *p;
5595 fprintf(f, "exact: [");
5596 for (p = reg->exact; p < reg->exact_end; p++) {
5597 fputc(*p, f);
5598 }
5599 fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5600 }
5601 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5602 int c, i, n = 0;
5603
5604 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5605 if (reg->map[i]) n++;
5606
5607 fprintf(f, "map: n=%d\n", n);
5608 if (n > 0) {
5609 c = 0;
5610 fputc('[', f);
5611 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5612 if (reg->map[i] != 0) {
5613 if (c > 0) fputs(", ", f);
5614 c++;
5615 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5617 fputc(i, f);
5618 else
5619 fprintf(f, "%d", i);
5620 }
5621 }
5622 fprintf(f, "]\n");
5623 }
5624 }
5625}
5626#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5627
5628
5629extern void
5631{
5632 if (IS_NOT_NULL(reg)) {
5633 if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5634 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5635 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5637 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5638 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5639
5640#ifdef USE_NAMED_GROUP
5641 onig_names_free(reg);
5642#endif
5643 }
5644}
5645
5646extern void
5648{
5649 if (IS_NOT_NULL(reg)) {
5650 onig_free_body(reg);
5651 xfree(reg);
5652 }
5653}
5654
5655#ifdef RUBY
5656size_t
5658{
5659 size_t size = sizeof(regex_t);
5660 if (IS_NULL(reg)) return 0;
5661 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5662 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5663 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5664 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5665 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5666 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5667
5668 return size;
5669}
5670
5671size_t
5673{
5674 size_t size = sizeof(*regs);
5675 if (IS_NULL(regs)) return 0;
5676 size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5677 return size;
5678}
5679#endif
5680
5681#define REGEX_TRANSFER(to,from) do {\
5682 onig_free_body(to);\
5683 xmemcpy(to, from, sizeof(regex_t));\
5684 xfree(from);\
5685} while (0)
5686
5687#if 0
5688extern void
5689onig_transfer(regex_t* to, regex_t* from)
5690{
5691 REGEX_TRANSFER(to, from);
5692}
5693#endif
5694
5695#ifdef ONIG_DEBUG_COMPILE
5696static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5697#endif
5698#ifdef ONIG_DEBUG_PARSE_TREE
5699static void print_tree(FILE* f, Node* node);
5700#endif
5701
5702#ifdef RUBY
5703extern int
5704onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5705 OnigErrorInfo* einfo)
5706{
5707 return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5708}
5709#endif
5710
5711#ifdef RUBY
5712extern int
5713onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5714 OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5715#else
5716extern int
5717onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5718 OnigErrorInfo* einfo)
5719#endif
5720{
5721#define COMPILE_INIT_SIZE 20
5722
5723 int r;
5724 OnigDistance init_size;
5725 Node* root;
5726 ScanEnv scan_env = {0};
5727#ifdef USE_SUBEXP_CALL
5728 UnsetAddrList uslist;
5729#endif
5730
5731 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5732
5733#ifdef RUBY
5734 scan_env.sourcefile = sourcefile;
5735 scan_env.sourceline = sourceline;
5736#endif
5737
5738#ifdef ONIG_DEBUG
5739 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5740#endif
5741
5742 if (reg->alloc == 0) {
5743 init_size = (pattern_end - pattern) * 2;
5744 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5745 r = BBUF_INIT(reg, init_size);
5746 if (r != 0) goto end;
5747 }
5748 else
5749 reg->used = 0;
5750
5751 reg->num_mem = 0;
5752 reg->num_repeat = 0;
5753 reg->num_null_check = 0;
5754 reg->repeat_range_alloc = 0;
5756#ifdef USE_COMBINATION_EXPLOSION_CHECK
5757 reg->num_comb_exp_check = 0;
5758#endif
5759
5760 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5761 if (r != 0) goto err;
5762
5763#ifdef ONIG_DEBUG_PARSE_TREE
5764# if 0
5765 fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5766 print_tree(stderr, root);
5767# endif
5768#endif
5769
5770#ifdef USE_NAMED_GROUP
5771 /* mixed use named group and no-named group */
5772 if (scan_env.num_named > 0 &&
5775 if (scan_env.num_named != scan_env.num_mem)
5776 r = disable_noname_group_capture(&root, reg, &scan_env);
5777 else
5778 r = numbered_ref_check(root);
5779
5780 if (r != 0) goto err;
5781 }
5782#endif
5783
5784#ifdef USE_SUBEXP_CALL
5785 if (scan_env.num_call > 0) {
5786 r = unset_addr_list_init(&uslist, scan_env.num_call);
5787 if (r != 0) goto err;
5788 scan_env.unset_addr_list = &uslist;
5789 r = setup_subexp_call(root, &scan_env);
5790 if (r != 0) goto err_unset;
5791 r = subexp_recursive_check_trav(root, &scan_env);
5792 if (r < 0) goto err_unset;
5793 r = subexp_inf_recursive_check_trav(root, &scan_env);
5794 if (r != 0) goto err_unset;
5795
5796 reg->num_call = scan_env.num_call;
5797 }
5798 else
5799 reg->num_call = 0;
5800#endif
5801
5802 r = setup_tree(root, reg, 0, &scan_env);
5803 if (r != 0) goto err_unset;
5804
5805#ifdef ONIG_DEBUG_PARSE_TREE
5806 print_tree(stderr, root);
5807#endif
5808
5809 reg->capture_history = scan_env.capture_history;
5810 reg->bt_mem_start = scan_env.bt_mem_start;
5811 reg->bt_mem_start |= reg->capture_history;
5812 if (IS_FIND_CONDITION(reg->options))
5814 else {
5815 reg->bt_mem_end = scan_env.bt_mem_end;
5816 reg->bt_mem_end |= reg->capture_history;
5817 }
5818
5819#ifdef USE_COMBINATION_EXPLOSION_CHECK
5820 if (scan_env.backrefed_mem == 0
5821# ifdef USE_SUBEXP_CALL
5822 || scan_env.num_call == 0
5823# endif
5824 ) {
5825 setup_comb_exp_check(root, 0, &scan_env);
5826# ifdef USE_SUBEXP_CALL
5827 if (scan_env.has_recursion != 0) {
5828 scan_env.num_comb_exp_check = 0;
5829 }
5830 else
5831# endif
5832 if (scan_env.comb_exp_max_regnum > 0) {
5833 int i;
5834 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5835 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5836 scan_env.num_comb_exp_check = 0;
5837 break;
5838 }
5839 }
5840 }
5841 }
5842
5843 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5844#endif
5845
5846 clear_optimize_info(reg);
5847#ifndef ONIG_DONT_OPTIMIZE
5848 r = set_optimize_info_from_tree(root, reg, &scan_env);
5849 if (r != 0) goto err_unset;
5850#endif
5851
5852 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5853 xfree(scan_env.mem_nodes_dynamic);
5854 scan_env.mem_nodes_dynamic = (Node** )NULL;
5855 }
5856
5857 r = compile_tree(root, reg);
5858 if (r == 0) {
5859 r = add_opcode(reg, OP_END);
5860#ifdef USE_SUBEXP_CALL
5861 if (scan_env.num_call > 0) {
5862 r = unset_addr_list_fix(&uslist, reg);
5863 unset_addr_list_end(&uslist);
5864 if (r) goto err;
5865 }
5866#endif
5867
5868 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5870 else {
5871 if (reg->bt_mem_start != 0)
5873 else
5875 }
5876 }
5877#ifdef USE_SUBEXP_CALL
5878 else if (scan_env.num_call > 0) {
5879 unset_addr_list_end(&uslist);
5880 }
5881#endif
5882 onig_node_free(root);
5883
5884#ifdef ONIG_DEBUG_COMPILE
5885# ifdef USE_NAMED_GROUP
5886 onig_print_names(stderr, reg);
5887# endif
5888 print_compiled_byte_code_list(stderr, reg);
5889#endif
5890
5891 end:
5892 return r;
5893
5894 err_unset:
5895#ifdef USE_SUBEXP_CALL
5896 if (scan_env.num_call > 0) {
5897 unset_addr_list_end(&uslist);
5898 }
5899#endif
5900 err:
5901 if (IS_NOT_NULL(scan_env.error)) {
5902 if (IS_NOT_NULL(einfo)) {
5903 einfo->enc = scan_env.enc;
5904 einfo->par = scan_env.error;
5905 einfo->par_end = scan_env.error_end;
5906 }
5907 }
5908
5909 onig_node_free(root);
5910 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5911 xfree(scan_env.mem_nodes_dynamic);
5912 return r;
5913}
5914
5915static int onig_inited = 0;
5916
5917extern int
5919 OnigCaseFoldType case_fold_flag,
5920 OnigEncoding enc, const OnigSyntaxType* syntax)
5921{
5922 if (! onig_inited)
5923 onig_init();
5924
5925 if (IS_NULL(reg))
5927
5928 if (ONIGENC_IS_UNDEF(enc))
5930
5934 }
5935
5936 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5937 option |= syntax->options;
5938 option &= ~ONIG_OPTION_SINGLELINE;
5939 }
5940 else
5941 option |= syntax->options;
5942
5943 (reg)->enc = enc;
5944 (reg)->options = option;
5945 (reg)->syntax = syntax;
5946 (reg)->optimize = 0;
5947 (reg)->exact = (UChar* )NULL;
5948 (reg)->int_map = (int* )NULL;
5949 (reg)->int_map_backward = (int* )NULL;
5950 (reg)->chain = (regex_t* )NULL;
5951
5952 (reg)->p = (UChar* )NULL;
5953 (reg)->alloc = 0;
5954 (reg)->used = 0;
5955 (reg)->name_table = (void* )NULL;
5956
5957 (reg)->case_fold_flag = case_fold_flag;
5958 return 0;
5959}
5960
5961extern int
5963 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5964 const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5965{
5966 int r;
5967
5968 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5969 if (r) return r;
5970
5971 r = onig_compile(reg, pattern, pattern_end, einfo);
5972 return r;
5973}
5974
5975extern int
5976onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5977 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
5978 OnigErrorInfo* einfo)
5979{
5980 int r;
5981
5982 *reg = (regex_t* )xmalloc(sizeof(regex_t));
5983 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5984
5985 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5986 if (r) goto err;
5987
5988 r = onig_compile(*reg, pattern, pattern_end, einfo);
5989 if (r) {
5990 err:
5991 onig_free(*reg);
5992 *reg = NULL;
5993 }
5994 return r;
5995}
5996
5997extern int
5999{
6000 return onig_init();
6001}
6002
6003extern int
6005{
6006 if (onig_inited != 0)
6007 return 0;
6008
6009 onig_inited = 1;
6010
6011#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6012 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6013#endif
6014
6015 onigenc_init();
6016 /* onigenc_set_default_caseconv_table((UChar* )0); */
6017
6018#ifdef ONIG_DEBUG_STATISTICS
6019 onig_statistics_init();
6020#endif
6021
6022 return 0;
6023}
6024
6025
6026static OnigEndCallListItemType* EndCallTop;
6027
6028extern void onig_add_end_call(void (*func)(void))
6029{
6031
6032 item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
6033 if (item == 0) return ;
6034
6035 item->next = EndCallTop;
6036 item->func = func;
6037
6038 EndCallTop = item;
6039}
6040
6041static void
6042exec_end_call_list(void)
6043{
6045 void (*func)(void);
6046
6047 while (EndCallTop != 0) {
6048 func = EndCallTop->func;
6049 (*func)();
6050
6051 prev = EndCallTop;
6052 EndCallTop = EndCallTop->next;
6053 xfree(prev);
6054 }
6055}
6056
6057extern int
6059{
6060 exec_end_call_list();
6061
6062#ifdef ONIG_DEBUG_STATISTICS
6063 onig_print_statistics(stderr);
6064#endif
6065
6066#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6067 _CrtDumpMemoryLeaks();
6068#endif
6069
6070 onig_inited = 0;
6071
6072 return 0;
6073}
6074
6075extern int
6077{
6078 OnigCodePoint n, *data;
6079 OnigCodePoint low, high, x;
6080
6081 GET_CODE_POINT(n, p);
6082 data = (OnigCodePoint* )p;
6083 data++;
6084
6085 for (low = 0, high = n; low < high; ) {
6086 x = (low + high) >> 1;
6087 if (code > data[x * 2 + 1])
6088 low = x + 1;
6089 else
6090 high = x;
6091 }
6092
6093 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6094}
6095
6096extern int
6098{
6099 int found;
6100
6101 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6102 if (IS_NULL(cc->mbuf)) {
6103 found = 0;
6104 }
6105 else {
6106 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6107 }
6108 }
6109 else {
6110 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6111 }
6112
6113 if (IS_NCCLASS_NOT(cc))
6114 return !found;
6115 else
6116 return found;
6117}
6118
6119extern int
6121{
6122 int len;
6123
6124 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6125 len = 2;
6126 }
6127 else {
6128 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6129 }
6130 return onig_is_code_in_cc_len(len, code, cc);
6131}
6132
6133
6134#ifdef ONIG_DEBUG
6135
6136/* arguments type */
6137# define ARG_SPECIAL -1
6138# define ARG_NON 0
6139# define ARG_RELADDR 1
6140# define ARG_ABSADDR 2
6141# define ARG_LENGTH 3
6142# define ARG_MEMNUM 4
6143# define ARG_OPTION 5
6144# define ARG_STATE_CHECK 6
6145
6146OnigOpInfoType OnigOpInfo[] = {
6147 { OP_FINISH, "finish", ARG_NON },
6148 { OP_END, "end", ARG_NON },
6149 { OP_EXACT1, "exact1", ARG_SPECIAL },
6150 { OP_EXACT2, "exact2", ARG_SPECIAL },
6151 { OP_EXACT3, "exact3", ARG_SPECIAL },
6152 { OP_EXACT4, "exact4", ARG_SPECIAL },
6153 { OP_EXACT5, "exact5", ARG_SPECIAL },
6154 { OP_EXACTN, "exactn", ARG_SPECIAL },
6155 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6156 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6157 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6158 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6159 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6160 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6161 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6162 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6163 { OP_CCLASS, "cclass", ARG_SPECIAL },
6164 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6165 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6166 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6167 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6168 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6169 { OP_ANYCHAR, "anychar", ARG_NON },
6170 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6171 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6172 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6173 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6174 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6175 { OP_WORD, "word", ARG_NON },
6176 { OP_NOT_WORD, "not-word", ARG_NON },
6177 { OP_WORD_BOUND, "word-bound", ARG_NON },
6178 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6179 { OP_WORD_BEGIN, "word-begin", ARG_NON },
6180 { OP_WORD_END, "word-end", ARG_NON },
6181 { OP_ASCII_WORD, "ascii-word", ARG_NON },
6182 { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6183 { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6184 { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6185 { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6186 { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6187 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6188 { OP_END_BUF, "end-buf", ARG_NON },
6189 { OP_BEGIN_LINE, "begin-line", ARG_NON },
6190 { OP_END_LINE, "end-line", ARG_NON },
6191 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6192 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6193 { OP_BACKREF1, "backref1", ARG_NON },
6194 { OP_BACKREF2, "backref2", ARG_NON },
6195 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6196 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6197 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6198 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6199 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6200 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6201 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6202 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6203 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6204 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6205 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6206 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6207 { OP_SET_OPTION, "set-option", ARG_OPTION },
6208 { OP_KEEP, "keep", ARG_NON },
6209 { OP_FAIL, "fail", ARG_NON },
6210 { OP_JUMP, "jump", ARG_RELADDR },
6211 { OP_PUSH, "push", ARG_RELADDR },
6212 { OP_POP, "pop", ARG_NON },
6213 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6214 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6215 { OP_REPEAT, "repeat", ARG_SPECIAL },
6216 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6217 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6218 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6219 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6220 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6221 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6222 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6223 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6224 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6225 { OP_PUSH_POS, "push-pos", ARG_NON },
6226 { OP_POP_POS, "pop-pos", ARG_NON },
6227 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6228 { OP_FAIL_POS, "fail-pos", ARG_NON },
6229 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6230 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6231 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6232 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6233 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6234 { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
6235 { OP_ABSENT, "absent", ARG_RELADDR },
6236 { OP_ABSENT_END, "absent-end", ARG_NON },
6237 { OP_CALL, "call", ARG_ABSADDR },
6238 { OP_RETURN, "return", ARG_NON },
6239 { OP_CONDITION, "condition", ARG_SPECIAL },
6240 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6241 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6242 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6243 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6245 "state-check-anychar-ml*", ARG_STATE_CHECK },
6246 { -1, "", ARG_NON }
6247};
6248
6249static const char*
6250op2name(int opcode)
6251{
6252 int i;
6253
6254 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6255 if (opcode == OnigOpInfo[i].opcode)
6256 return OnigOpInfo[i].name;
6257 }
6258 return "";
6259}
6260
6261static int
6262op2arg_type(int opcode)
6263{
6264 int i;
6265
6266 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6267 if (opcode == OnigOpInfo[i].opcode)
6268 return OnigOpInfo[i].arg_type;
6269 }
6270 return ARG_SPECIAL;
6271}
6272
6273# ifdef ONIG_DEBUG_PARSE_TREE
6274static void
6275Indent(FILE* f, int indent)
6276{
6277 int i;
6278 for (i = 0; i < indent; i++) putc(' ', f);
6279}
6280# endif /* ONIG_DEBUG_PARSE_TREE */
6281
6282static void
6283p_string(FILE* f, ptrdiff_t len, UChar* s)
6284{
6285 fputs(":", f);
6286 while (len-- > 0) { fputc(*s++, f); }
6287}
6288
6289static void
6290p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6291{
6292 int x = len * mb_len;
6293
6294 fprintf(f, ":%d:", len);
6295 while (x-- > 0) { fputc(*s++, f); }
6296}
6297
6298extern void
6299onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6300 OnigEncoding enc)
6301{
6302 int i, n, arg_type;
6303 RelAddrType addr;
6305 MemNumType mem;
6307 OnigCodePoint code;
6308 UChar *q;
6309
6310 fprintf(f, "[%s", op2name(*bp));
6311 arg_type = op2arg_type(*bp);
6312 if (arg_type != ARG_SPECIAL) {
6313 bp++;
6314 switch (arg_type) {
6315 case ARG_NON:
6316 break;
6317 case ARG_RELADDR:
6318 GET_RELADDR_INC(addr, bp);
6319 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6320 break;
6321 case ARG_ABSADDR:
6322 GET_ABSADDR_INC(addr, bp);
6323 fprintf(f, ":(%d)", addr);
6324 break;
6325 case ARG_LENGTH:
6327 fprintf(f, ":%d", len);
6328 break;
6329 case ARG_MEMNUM:
6330 mem = *((MemNumType* )bp);
6331 bp += SIZE_MEMNUM;
6332 fprintf(f, ":%d", mem);
6333 break;
6334 case ARG_OPTION:
6335 {
6336 OnigOptionType option = *((OnigOptionType* )bp);
6337 bp += SIZE_OPTION;
6338 fprintf(f, ":%d", option);
6339 }
6340 break;
6341
6342 case ARG_STATE_CHECK:
6343 scn = *((StateCheckNumType* )bp);
6345 fprintf(f, ":%d", scn);
6346 break;
6347 }
6348 }
6349 else {
6350 switch (*bp++) {
6351 case OP_EXACT1:
6354 p_string(f, 1, bp++); break;
6355 case OP_EXACT2:
6356 p_string(f, 2, bp); bp += 2; break;
6357 case OP_EXACT3:
6358 p_string(f, 3, bp); bp += 3; break;
6359 case OP_EXACT4:
6360 p_string(f, 4, bp); bp += 4; break;
6361 case OP_EXACT5:
6362 p_string(f, 5, bp); bp += 5; break;
6363 case OP_EXACTN:
6365 p_len_string(f, len, 1, bp);
6366 bp += len;
6367 break;
6368
6369 case OP_EXACTMB2N1:
6370 p_string(f, 2, bp); bp += 2; break;
6371 case OP_EXACTMB2N2:
6372 p_string(f, 4, bp); bp += 4; break;
6373 case OP_EXACTMB2N3:
6374 p_string(f, 6, bp); bp += 6; break;
6375 case OP_EXACTMB2N:
6377 p_len_string(f, len, 2, bp);
6378 bp += len * 2;
6379 break;
6380 case OP_EXACTMB3N:
6382 p_len_string(f, len, 3, bp);
6383 bp += len * 3;
6384 break;
6385 case OP_EXACTMBN:
6386 {
6387 int mb_len;
6388
6389 GET_LENGTH_INC(mb_len, bp);
6391 fprintf(f, ":%d:%d:", mb_len, len);
6392 n = len * mb_len;
6393 while (n-- > 0) { fputc(*bp++, f); }
6394 }
6395 break;
6396
6397 case OP_EXACT1_IC:
6398 len = enclen(enc, bp, bpend);
6399 p_string(f, len, bp);
6400 bp += len;
6401 break;
6402 case OP_EXACTN_IC:
6404 p_len_string(f, len, 1, bp);
6405 bp += len;
6406 break;
6407
6408 case OP_CCLASS:
6409 n = bitset_on_num((BitSetRef )bp);
6410 bp += SIZE_BITSET;
6411 fprintf(f, ":%d", n);
6412 break;
6413
6414 case OP_CCLASS_NOT:
6415 n = bitset_on_num((BitSetRef )bp);
6416 bp += SIZE_BITSET;
6417 fprintf(f, ":%d", n);
6418 break;
6419
6420 case OP_CCLASS_MB:
6421 case OP_CCLASS_MB_NOT:
6423 q = bp;
6424# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6425 ALIGNMENT_RIGHT(q);
6426# endif
6427 GET_CODE_POINT(code, q);
6428 bp += len;
6429 fprintf(f, ":%d:%d", (int )code, len);
6430 break;
6431
6432 case OP_CCLASS_MIX:
6433 case OP_CCLASS_MIX_NOT:
6434 n = bitset_on_num((BitSetRef )bp);
6435 bp += SIZE_BITSET;
6437 q = bp;
6438# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6439 ALIGNMENT_RIGHT(q);
6440# endif
6441 GET_CODE_POINT(code, q);
6442 bp += len;
6443 fprintf(f, ":%d:%d:%d", n, (int )code, len);
6444 break;
6445
6446 case OP_BACKREFN_IC:
6447 mem = *((MemNumType* )bp);
6448 bp += SIZE_MEMNUM;
6449 fprintf(f, ":%d", mem);
6450 break;
6451
6453 case OP_BACKREF_MULTI:
6454 fputs(" ", f);
6456 for (i = 0; i < len; i++) {
6457 GET_MEMNUM_INC(mem, bp);
6458 if (i > 0) fputs(", ", f);
6459 fprintf(f, "%d", mem);
6460 }
6461 break;
6462
6464 {
6465 OnigOptionType option;
6466 LengthType level;
6467
6468 GET_OPTION_INC(option, bp);
6469 fprintf(f, ":%d", option);
6470 GET_LENGTH_INC(level, bp);
6471 fprintf(f, ":%d", level);
6472
6473 fputs(" ", f);
6475 for (i = 0; i < len; i++) {
6476 GET_MEMNUM_INC(mem, bp);
6477 if (i > 0) fputs(", ", f);
6478 fprintf(f, "%d", mem);
6479 }
6480 }
6481 break;
6482
6483 case OP_REPEAT:
6484 case OP_REPEAT_NG:
6485 {
6486 mem = *((MemNumType* )bp);
6487 bp += SIZE_MEMNUM;
6488 addr = *((RelAddrType* )bp);
6489 bp += SIZE_RELADDR;
6490 fprintf(f, ":%d:%d", mem, addr);
6491 }
6492 break;
6493
6496 addr = *((RelAddrType* )bp);
6497 bp += SIZE_RELADDR;
6498 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6499 p_string(f, 1, bp);
6500 bp += 1;
6501 break;
6502
6503 case OP_LOOK_BEHIND:
6505 fprintf(f, ":%d", len);
6506 break;
6507
6509 GET_RELADDR_INC(addr, bp);
6511 fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6512 break;
6513
6516 scn = *((StateCheckNumType* )bp);
6518 addr = *((RelAddrType* )bp);
6519 bp += SIZE_RELADDR;
6520 fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6521 break;
6522
6523 case OP_CONDITION:
6524 GET_MEMNUM_INC(mem, bp);
6525 GET_RELADDR_INC(addr, bp);
6526 fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6527 break;
6528
6529 default:
6530 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6531 bp[-1]);
6532 }
6533 }
6534 fputs("]", f);
6535 if (nextp) *nextp = bp;
6536}
6537
6538# ifdef ONIG_DEBUG_COMPILE
6539static void
6540print_compiled_byte_code_list(FILE* f, regex_t* reg)
6541{
6542 int ncode;
6543 UChar* bp = reg->p;
6544 UChar* end = reg->p + reg->used;
6545
6546 fprintf(f, "code length: %d", reg->used);
6547
6548 ncode = -1;
6549 while (bp < end) {
6550 ncode++;
6551 if (ncode % 5 == 0)
6552 fprintf(f, "\n%ld:", bp - reg->p);
6553 else
6554 fprintf(f, " %ld:", bp - reg->p);
6555 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6556 }
6557
6558 fprintf(f, "\n");
6559}
6560# endif /* ONIG_DEBUG_COMPILE */
6561
6562# ifdef ONIG_DEBUG_PARSE_TREE
6563static void
6564print_indent_tree(FILE* f, Node* node, int indent)
6565{
6566 int i, type, container_p = 0;
6567 int add = 3;
6568 UChar* p;
6569
6570 Indent(f, indent);
6571 if (IS_NULL(node)) {
6572 fprintf(f, "ERROR: null node!!!\n");
6573 exit (0);
6574 }
6575
6576 type = NTYPE(node);
6577 switch (type) {
6578 case NT_LIST:
6579 case NT_ALT:
6580 if (NTYPE(node) == NT_LIST)
6581 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6582 else
6583 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6584
6585 print_indent_tree(f, NCAR(node), indent + add);
6586 while (IS_NOT_NULL(node = NCDR(node))) {
6587 if (NTYPE(node) != type) {
6588 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6589 exit(0);
6590 }
6591 print_indent_tree(f, NCAR(node), indent + add);
6592 }
6593 break;
6594
6595 case NT_STR:
6596 fprintf(f, "<string%s:%"PRIxPTR">",
6597 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6598 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6599 if (*p >= 0x20 && *p < 0x7f)
6600 fputc(*p, f);
6601 else {
6602 fprintf(f, " 0x%02x", *p);
6603 }
6604 }
6605 break;
6606
6607 case NT_CCLASS:
6608 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6609 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6610 if (NCCLASS(node)->mbuf) {
6611 BBuf* bbuf = NCCLASS(node)->mbuf;
6612 OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6613 OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6614 fprintf(f, "%d", *data++);
6615 for (; data < end; data+=2) {
6616 fprintf(f, ",");
6617 fprintf(f, "%04x-%04x", data[0], data[1]);
6618 }
6619 }
6620 break;
6621
6622 case NT_CTYPE:
6623 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6624 switch (NCTYPE(node)->ctype) {
6625 case ONIGENC_CTYPE_WORD:
6626 if (NCTYPE(node)->not != 0)
6627 fputs("not word", f);
6628 else
6629 fputs("word", f);
6630 break;
6631
6632 default:
6633 fprintf(f, "ERROR: undefined ctype.\n");
6634 exit(0);
6635 }
6636 break;
6637
6638 case NT_CANY:
6639 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6640 break;
6641
6642 case NT_ANCHOR:
6643 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6644 switch (NANCHOR(node)->type) {
6645 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6646 case ANCHOR_END_BUF: fputs("end buf", f); break;
6647 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6648 case ANCHOR_END_LINE: fputs("end line", f); break;
6649 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6650 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6651
6652 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6653 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6654# ifdef USE_WORD_BEGIN_END
6655 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6656 case ANCHOR_WORD_END: fputs("word end", f); break;
6657# endif
6658 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6659 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6660 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6661 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6662 case ANCHOR_KEEP: fputs("keep",f); break;
6663
6664 default:
6665 fprintf(f, "ERROR: undefined anchor type.\n");
6666 break;
6667 }
6668 break;
6669
6670 case NT_BREF:
6671 {
6672 int* p;
6673 BRefNode* br = NBREF(node);
6674 p = BACKREFS_P(br);
6675 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6676 for (i = 0; i < br->back_num; i++) {
6677 if (i > 0) fputs(", ", f);
6678 fprintf(f, "%d", p[i]);
6679 }
6680 }
6681 break;
6682
6683# ifdef USE_SUBEXP_CALL
6684 case NT_CALL:
6685 {
6686 CallNode* cn = NCALL(node);
6687 fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6688 p_string(f, cn->name_end - cn->name, cn->name);
6689 }
6690 break;
6691# endif
6692
6693 case NT_QTFR:
6694 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6695 NQTFR(node)->lower, NQTFR(node)->upper,
6696 (NQTFR(node)->greedy ? "" : "?"));
6697 print_indent_tree(f, NQTFR(node)->target, indent + add);
6698 break;
6699
6700 case NT_ENCLOSE:
6701 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6702 switch (NENCLOSE(node)->type) {
6703 case ENCLOSE_OPTION:
6704 fprintf(f, "option:%d", NENCLOSE(node)->option);
6705 break;
6706 case ENCLOSE_MEMORY:
6707 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6708 break;
6710 fprintf(f, "stop-bt");
6711 break;
6712 case ENCLOSE_CONDITION:
6713 fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6714 break;
6715 case ENCLOSE_ABSENT:
6716 fprintf(f, "absent");
6717 break;
6718
6719 default:
6720 break;
6721 }
6722 fprintf(f, "\n");
6723 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6724 break;
6725
6726 default:
6727 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6728 break;
6729 }
6730
6731 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6732 type != NT_ENCLOSE)
6733 fprintf(f, "\n");
6734
6735 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6736
6737 fflush(f);
6738}
6739
6740static void
6741print_tree(FILE* f, Node* node)
6742{
6743 print_indent_tree(f, node, 0);
6744}
6745# endif /* ONIG_DEBUG_PARSE_TREE */
6746#endif /* ONIG_DEBUG */
#define add(x, y)
Definition: date_strftime.c:23
struct RIMemo * ptr
Definition: debug.c:65
#define d1
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:39
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4322
#define ARG_UNUSED
Definition: nkf.h:181
#define ONIG_INFINITE_DISTANCE
Definition: onigmo.h:85
#define ONIG_OPTION_DONT_CAPTURE_GROUP
Definition: onigmo.h:459
#define ONIGENC_IS_CODE_PRINT(enc, code)
Definition: onigmo.h:378
#define ONIGENC_MBC_MAXLEN_DIST(enc)
Definition: onigmo.h:363
#define ONIGENC_CODE_TO_MBC(enc, code, buf)
Definition: onigmo.h:368
OnigRegexType regex_t
Definition: onigmo.h:803
#define ONIG_IS_OPTION_ON(options, option)
Definition: onigmo.h:476
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
Definition: onigmo.h:685
unsigned int OnigCaseFoldType
Definition: onigmo.h:95
#define ONIGENC_MBC_TO_CODE(enc, p, end)
Definition: onigmo.h:366
#define ONIGERR_INVALID_ARGUMENT
Definition: onigmo.h:640
#define ONIGERR_NEVER_ENDING_RECURSION
Definition: onigmo.h:686
#define ONIGERR_PARSER_BUG
Definition: onigmo.h:631
#define ONIGENC_IS_MBC_ASCII_WORD(enc, s, end)
Definition: onigmo.h:324
#define ONIGERR_TYPE_BUG
Definition: onigmo.h:630
#define UChar
Definition: onigmo.h:76
#define ONIGENC_CODE_TO_MBC_MAXLEN
Definition: onigmo.h:289
#define ONIGENC_CTYPE_WORD
Definition: onigmo.h:306
#define ONIGERR_UNDEFINED_NAME_REFERENCE
Definition: onigmo.h:682
#define ONIGENC_IS_UNDEF(enc)
Definition: onigmo.h:317
#define ONIGENC_CASE_FOLD_DEFAULT
Definition: onigmo.h:131
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN
Definition: onigmo.h:662
#define ONIGENC_IS_CODE_WORD(enc, code)
Definition: onigmo.h:400
unsigned int OnigCodePoint
Definition: onigmo.h:80
#define ONIG_OPTION_IGNORECASE
Definition: onigmo.h:451
#define ONIGERR_MEMORY
Definition: onigmo.h:629
#define ONIG_OPTION_NEGATE_SINGLELINE
Definition: onigmo.h:458
ONIG_EXTERN int onig_name_to_group_numbers(OnigRegex reg, const OnigUChar *name, const OnigUChar *name_end, int **nums)
#define ONIG_OPTION_CAPTURE_GROUP
Definition: onigmo.h:460
#define ONIG_MAX_CAPTURE_HISTORY_GROUP
Definition: onigmo.h:700
#define ONIGERR_UNDEFINED_GROUP_REFERENCE
Definition: onigmo.h:683
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
Definition: onigmo.h:675
#define ONIGENC_MBC_MAXLEN(enc)
Definition: onigmo.h:362
#define ONIGENC_MBC_MINLEN(enc)
Definition: onigmo.h:364
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, s, end)
Definition: onigmo.h:334
#define ONIGENC_MBC_CASE_FOLD(enc, flag, pp, end, buf)
Definition: onigmo.h:332
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, acs)
Definition: onigmo.h:340
#define ONIG_CHAR_TABLE_SIZE
Definition: onigmo.h:753
#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS
Definition: onigmo.h:693
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL
Definition: onigmo.h:598
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
Definition: onigmo.h:595
ONIG_EXTERN int onigenc_init(void)
Definition: regenc.c:36
#define ONIGENC_MBC_CASE_FOLD_MAXLEN
Definition: onigmo.h:290
#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
Definition: onigmo.h:135
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
Definition: onigmo.h:594
#define ONIGERR_INVALID_CONDITION_PATTERN
Definition: onigmo.h:664
unsigned int OnigOptionType
Definition: onigmo.h:445
#define ONIGERR_INVALID_BACKREF
Definition: onigmo.h:674
ONIG_EXTERN int onigenc_strlen(OnigEncoding enc, const OnigUChar *p, const OnigUChar *end)
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: onigmo.h:367
size_t OnigDistance
Definition: onigmo.h:82
#define ONIGENC_CASE_FOLD_MIN
Definition: onigmo.h:130
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET
Definition: onigmo.h:637
#define ONIGENC_IS_MBC_WORD(enc, s, end)
Definition: onigmo.h:322
#define NULL
#define bp()
long int ptrdiff_t
#define xfree
int fputc(int, FILE *)
#define xrealloc
__intptr_t intptr_t
#define numberof(array)
int fprintf(FILE *__restrict__, const char *__restrict__,...) __attribute__((__format__(__printf__
const char size_t n
#define stderr
() void(cc->call !=vm_call_general)
#define xmalloc
uint32_t i
__inline__ const void *__restrict__ size_t len
#define PRIdPTR
int VALUE v
#define PRIxPTR
VALUE ID VALUE old
#define PRIuPTR
#define TRUE
unsigned int size
const struct rb_call_cache * cc
void exit(int __status) __attribute__((__noreturn__))
__inline__ int
if((__builtin_expect(!!(!me), 0)))
int putc(int, FILE *)
VALUE ID id
int fputs(const char *__restrict__, FILE *__restrict__)
int fflush(FILE *)
#define IN_VAR_REPEAT
Definition: regcomp.c:3863
int onig_is_in_code_range(const UChar *p, OnigCodePoint code)
Definition: regcomp.c:6076
int onig_new_without_alloc(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5962
#define RECURSION_EXIST
Definition: regcomp.c:2888
OnigCaseFoldType onig_get_default_case_fold_flag(void)
Definition: regcomp.c:36
#define EXPAND_STRING_MAX_LENGTH
int onig_reg_init(regex_t *reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType *syntax)
Definition: regcomp.c:5918
#define QUANTIFIER_EXPAND_LIMIT_SIZE
Definition: regcomp.c:721
#define RECURSION_INFINITE
Definition: regcomp.c:2889
int onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
Definition: regcomp.c:5998
int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
Definition: regcomp.c:42
#define REPEAT_RANGE_ALLOC
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
#define GET_CHAR_LEN_TOP_ALT_VARLEN
Definition: regcomp.c:2414
int onig_new(regex_t **reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5976
#define ALLOWED_ANCHOR_IN_LB_NOT
#define ALLOWED_TYPE_IN_LB
int onig_bbuf_init(BBuf *buf, OnigDistance size)
Definition: regcomp.c:142
int onig_end(void)
Definition: regcomp.c:6058
#define ALLOWED_ANCHOR_IN_LB
void onig_free_body(regex_t *reg)
Definition: regcomp.c:5630
#define COMP_EM_BASE
size_t onig_region_memsize(const OnigRegion *regs)
Definition: regcomp.c:5672
int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6120
OnigCaseFoldType OnigDefaultCaseFoldFlag
Definition: regcomp.c:33
#define IN_REPEAT
Definition: regcomp.c:3862
int onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6097
#define GET_CHAR_LEN_VARLEN
Definition: regcomp.c:2413
#define IN_NOT
Definition: regcomp.c:3861
int onig_compile(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigErrorInfo *einfo)
Definition: regcomp.c:5704
#define ALLOWED_ENCLOSE_IN_LB
#define IN_CALL
Definition: regcomp.c:3864
int onig_init(void)
Definition: regcomp.c:6004
#define IS_NEED_STR_LEN_OP_EXACT(op)
Definition: regcomp.c:315
size_t onig_memsize(const regex_t *reg)
Definition: regcomp.c:5657
void onig_add_end_call(void(*func)(void))
Definition: regcomp.c:6028
#define COMPILE_INIT_SIZE
#define FOUND_CALLED_NODE
void onig_free(regex_t *reg)
Definition: regcomp.c:5647
#define REGEX_TRANSFER(to, from)
Definition: regcomp.c:5681
#define MAX_NODE_OPT_INFO_REF_COUNT
Definition: regcomp.c:4955
#define IN_RECCALL
Definition: regcomp.c:3865
#define IN_ALT
Definition: regcomp.c:3860
#define ALLOWED_ENCLOSE_IN_LB_NOT
#define CKN_ON
Definition: regcomp.c:722
int onig_compile_ruby(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigErrorInfo *einfo, const char *sourcefile, int sourceline)
Definition: regcomp.c:5713
#define enclen(enc, p, e)
Definition: regenc.h:93
int AbsAddrType
Definition: regint.h:668
#define GET_ALIGNMENT_PAD_SIZE(addr, pad_size)
Definition: regint.h:323
#define SIZE_OP_PUSH_IF_PEEK_NEXT
Definition: regint.h:711
#define ANCHOR_BEGIN_LINE
Definition: regint.h:528
#define SIZE_OP_PUSH_ABSENT_POS
Definition: regint.h:737
#define SIZE_OP_ABSENT_END
Definition: regint.h:739
#define CHECK_NULL_RETURN_MEMERR(p)
Definition: regint.h:301
#define SIZE_OP_POP_STOP_BT
Definition: regint.h:728
#define ANCHOR_PREC_READ_NOT
Definition: regint.h:539
#define USE_SUBEXP_CALL
Definition: regint.h:70
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC
Definition: regint.h:349
#define IS_DYNAMIC_OPTION(option)
Definition: regint.h:403
#define ONIG_OPTIMIZE_MAP
Definition: regint.h:347
#define ONIG_OPTIMIZE_EXACT
Definition: regint.h:343
#define GET_OPTION_INC(option, p)
Definition: regint.h:692
#define BIT_STATUS_ON_ALL(stats)
Definition: regint.h:356
int LengthType
Definition: regint.h:669
short int MemNumType
Definition: regint.h:671
#define OPT_EXACT_MAXLEN
Definition: regint.h:90
#define ANCHOR_BEGIN_POSITION
Definition: regint.h:529
#define SIZE_OP_PUSH_OR_JUMP_EXACT1
Definition: regint.h:710
#define GET_MEMNUM_INC(num, p)
Definition: regint.h:690
#define STACK_POP_LEVEL_ALL
Definition: regint.h:339
#define SIZE_OP_MEMORY_END_PUSH
Definition: regint.h:723
#define ONIG_OPTIMIZE_EXACT_BM
Definition: regint.h:344
#define BITSET_AT(bs, pos)
Definition: regint.h:435
#define SIZE_OP_MEMORY_END
Definition: regint.h:725
#define CHECK_NULL_RETURN(p)
Definition: regint.h:300
#define SIZE_RELADDR
Definition: regint.h:676
#define ANCHOR_LOOK_BEHIND
Definition: regint.h:540
#define SIZE_BITSET
Definition: regint.h:425
#define IS_NOT_NULL(p)
Definition: regint.h:299
#define ANCHOR_END_LINE
Definition: regint.h:532
#define SIZE_OP_ABSENT
Definition: regint.h:738
#define SIZE_OP_PUSH_POS
Definition: regint.h:714
#define SIZE_OP_PUSH
Definition: regint.h:708
#define ANCHOR_BEGIN_BUF
Definition: regint.h:527
#define BBUF_GET_ADD_ADDRESS(buf)
Definition: regint.h:493
#define ANCHOR_WORD_BOUND
Definition: regint.h:534
#define SIZE_OP_RETURN
Definition: regint.h:735
#define SIZE_OP_PUSH_STOP_BT
Definition: regint.h:727
#define SIZE_OP_NULL_CHECK_END
Definition: regint.h:730
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV
Definition: regint.h:345
#define SIZE_OP_MEMORY_START
Definition: regint.h:721
#define SIZE_OPCODE
Definition: regint.h:675
#define STACK_POP_LEVEL_MEM_START
Definition: regint.h:338
#define SIZE_POINTER
Definition: regint.h:684
#define BIT_STATUS_ON_AT(stats, n)
Definition: regint.h:360
#define ANCHOR_WORD_BEGIN
Definition: regint.h:536
#define SIZE_OP_FAIL
Definition: regint.h:720
#define SIZE_OP_MEMORY_END_PUSH_REC
Definition: regint.h:724
#define ANCHOR_ANYCHAR_STAR_ML
Definition: regint.h:544
#define STACK_POP_LEVEL_FREE
Definition: regint.h:337
#define SIZE_OP_POP_POS
Definition: regint.h:716
#define BBUF_INIT(buf, size)
Definition: regint.h:447
#define IS_REPEAT_INFINITE(n)
Definition: regint.h:409
#define SIZE_OP_PUSH_POS_NOT
Definition: regint.h:715
#define WORD_ALIGNMENT_SIZE
Definition: regint.h:321
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
Definition: regint.h:706
#define GET_LENGTH_INC(len, p)
Definition: regint.h:689
#define ANCHOR_LOOK_BEHIND_NOT
Definition: regint.h:541
#define BITSET_SIZE
Definition: regint.h:415
#define SINGLE_BYTE_SIZE
Definition: regint.h:413
unsigned int BitStatusType
Definition: regint.h:352
#define GET_CODE_POINT(code, p)
Definition: regint.h:697
#define IS_NULL(p)
Definition: regint.h:298
#define ANCHOR_SEMI_END_BUF
Definition: regint.h:531
void onig_transfer(regex_t *to, regex_t *from)
#define SIZE_OP_POP
Definition: regint.h:709
#define IS_MULTILINE(option)
Definition: regint.h:382
#define BBUF_ADD1(buf, byte)
Definition: regint.h:492
int RelAddrType
Definition: regint.h:667
#define SIZE_OP_ANYCHAR_STAR
Definition: regint.h:705
#define SIZE_OP_JUMP
Definition: regint.h:707
#define ANCHOR_PREC_READ
Definition: regint.h:538
#define ALIGNMENT_RIGHT(addr)
Definition: regint.h:329
#define ONIG_OPTIMIZE_EXACT_IC
Definition: regint.h:346
#define SIZE_OP_MEMORY_START_PUSH
Definition: regint.h:722
#define IS_NCCLASS_NOT(nd)
Definition: regint.h:796
short int StateCheckNumType
Definition: regint.h:672
#define ONIG_OPTIMIZE_NONE
Definition: regint.h:342
#define ANCHOR_KEEP
Definition: regint.h:546
#define BBUF_ADD(buf, bytes, n)
Definition: regint.h:491
#define ANCHOR_NOT_WORD_BOUND
Definition: regint.h:535
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT
Definition: regint.h:732
#define BBUF_GET_OFFSET_POS(buf)
Definition: regint.h:494
#define SIZE_OP_NULL_CHECK_START
Definition: regint.h:729
#define SIZE_MEMNUM
Definition: regint.h:679
#define SIZE_LENGTH
Definition: regint.h:678
@ OP_EXACTMB3N
Definition: regint.h:563
@ OP_END
Definition: regint.h:551
@ OP_LOOK_BEHIND
Definition: regint.h:644
@ OP_CALL
Definition: regint.h:651
@ OP_ASCII_WORD
Definition: regint.h:590
@ OP_STATE_CHECK_PUSH_OR_JUMP
Definition: regint.h:657
@ OP_PUSH_POS_NOT
Definition: regint.h:640
@ OP_STATE_CHECK_ANYCHAR_STAR
Definition: regint.h:659
@ OP_REPEAT_INC_NG
Definition: regint.h:630
@ OP_STATE_CHECK
Definition: regint.h:658
@ OP_MEMORY_END_REC
Definition: regint.h:617
@ OP_REPEAT_INC
Definition: regint.h:629
@ OP_ANYCHAR_ML
Definition: regint.h:577
@ OP_POP_POS
Definition: regint.h:639
@ OP_WORD_END
Definition: regint.h:588
@ OP_WORD_BEGIN
Definition: regint.h:587
@ OP_POP_STOP_BT
Definition: regint.h:643
@ OP_ANYCHAR_STAR_PEEK_NEXT
Definition: regint.h:580
@ OP_BACKREFN
Definition: regint.h:606
@ OP_PUSH_LOOK_BEHIND_NOT
Definition: regint.h:645
@ OP_PUSH_STOP_BT
Definition: regint.h:642
@ OP_EXACTMBN
Definition: regint.h:564
@ OP_EXACTMB2N
Definition: regint.h:562
@ OP_MEMORY_START
Definition: regint.h:612
@ OP_SET_OPTION
Definition: regint.h:664
@ OP_NULL_CHECK_START
Definition: regint.h:633
@ OP_BEGIN_LINE
Definition: regint.h:599
@ OP_WORD_BOUND
Definition: regint.h:585
@ OP_NOT_ASCII_WORD_BOUND
Definition: regint.h:593
@ OP_ANYCHAR
Definition: regint.h:576
@ OP_SET_OPTION_PUSH
Definition: regint.h:663
@ OP_EXACT4
Definition: regint.h:556
@ OP_EXACT5
Definition: regint.h:557
@ OP_REPEAT
Definition: regint.h:627
@ OP_EXACT2
Definition: regint.h:554
@ OP_MEMORY_END
Definition: regint.h:616
@ OP_ANYCHAR_ML_STAR
Definition: regint.h:579
@ OP_EXACTN
Definition: regint.h:558
@ OP_PUSH_OR_JUMP_EXACT1
Definition: regint.h:625
@ OP_ANYCHAR_STAR
Definition: regint.h:578
@ OP_JUMP
Definition: regint.h:622
@ OP_END_LINE
Definition: regint.h:600
@ OP_MEMORY_END_PUSH_REC
Definition: regint.h:615
@ OP_BEGIN_POSITION
Definition: regint.h:602
@ OP_PUSH_POS
Definition: regint.h:638
@ OP_NOT_ASCII_WORD
Definition: regint.h:591
@ OP_NULL_CHECK_END_MEMST_PUSH
Definition: regint.h:636
@ OP_END_BUF
Definition: regint.h:598
@ OP_EXACT1
Definition: regint.h:553
@ OP_CCLASS
Definition: regint.h:569
@ OP_WORD
Definition: regint.h:583
@ OP_FINISH
Definition: regint.h:550
@ OP_ASCII_WORD_END
Definition: regint.h:595
@ OP_BACKREF1
Definition: regint.h:604
@ OP_PUSH
Definition: regint.h:623
@ OP_BACKREFN_IC
Definition: regint.h:607
@ OP_PUSH_ABSENT_POS
Definition: regint.h:647
@ OP_MEMORY_END_PUSH
Definition: regint.h:614
@ OP_ASCII_WORD_BOUND
Definition: regint.h:592
@ OP_STATE_CHECK_PUSH
Definition: regint.h:656
@ OP_CCLASS_MIX_NOT
Definition: regint.h:574
@ OP_CCLASS_MB
Definition: regint.h:570
@ OP_SEMI_END_BUF
Definition: regint.h:601
@ OP_STATE_CHECK_ANYCHAR_ML_STAR
Definition: regint.h:660
@ OP_CONDITION
Definition: regint.h:654
@ OP_PUSH_IF_PEEK_NEXT
Definition: regint.h:626
@ OP_ABSENT
Definition: regint.h:648
@ OP_NOT_WORD
Definition: regint.h:584
@ OP_ASCII_WORD_BEGIN
Definition: regint.h:594
@ OP_REPEAT_NG
Definition: regint.h:628
@ OP_BACKREF_MULTI_IC
Definition: regint.h:609
@ OP_REPEAT_INC_SG
Definition: regint.h:631
@ OP_EXACT3
Definition: regint.h:555
@ OP_EXACTMB2N1
Definition: regint.h:559
@ OP_CCLASS_MB_NOT
Definition: regint.h:573
@ OP_FAIL_POS
Definition: regint.h:641
@ OP_EXACT1_IC
Definition: regint.h:566
@ OP_EXACTMB2N2
Definition: regint.h:560
@ OP_BEGIN_BUF
Definition: regint.h:597
@ OP_ANYCHAR_ML_STAR_PEEK_NEXT
Definition: regint.h:581
@ OP_RETURN
Definition: regint.h:652
@ OP_MEMORY_START_PUSH
Definition: regint.h:613
@ OP_KEEP
Definition: regint.h:619
@ OP_FAIL
Definition: regint.h:621
@ OP_NOT_WORD_BOUND
Definition: regint.h:586
@ OP_REPEAT_INC_NG_SG
Definition: regint.h:632
@ OP_FAIL_LOOK_BEHIND_NOT
Definition: regint.h:646
@ OP_BACKREF2
Definition: regint.h:605
@ OP_EXACTMB2N3
Definition: regint.h:561
@ OP_NULL_CHECK_END
Definition: regint.h:634
@ OP_BACKREF_MULTI
Definition: regint.h:608
@ OP_CCLASS_NOT
Definition: regint.h:572
@ OP_NULL_CHECK_END_MEMST
Definition: regint.h:635
@ OP_BACKREF_WITH_LEVEL
Definition: regint.h:610
@ OP_CCLASS_MIX
Definition: regint.h:571
@ OP_EXACTN_IC
Definition: regint.h:567
@ OP_ABSENT_END
Definition: regint.h:649
@ OP_POP
Definition: regint.h:624
#define IS_CODE_SB_WORD(enc, code)
Definition: regint.h:876
#define IS_FIND_CONDITION(option)
Definition: regint.h:387
#define SIZE_OP_SET_OPTION_PUSH
Definition: regint.h:719
#define xalloca
Definition: regint.h:213
#define SIZE_STATE_CHECK_NUM
Definition: regint.h:680
void * PointerType
Definition: regint.h:673
#define SIZE_OP_REPEAT_INC
Definition: regint.h:712
#define BIT_STATUS_CLEAR(stats)
Definition: regint.h:355
#define ANCHOR_ANYCHAR_STAR
Definition: regint.h:543
#define ONIG_OPTIMIZE_EXACT_BM_IC
Definition: regint.h:348
#define xmemcpy
Definition: regint.h:202
#define GET_ABSADDR_INC(addr, p)
Definition: regint.h:688
Bits * BitSetRef
Definition: regint.h:423
#define SIZE_ABSADDR
Definition: regint.h:677
#define IS_IGNORECASE(option)
Definition: regint.h:383
#define SIZE_OP_FAIL_POS
Definition: regint.h:717
#define SIZE_OP_CONDITION
Definition: regint.h:736
#define SIZE_OPTION
Definition: regint.h:682
#define BIT_STATUS_ON_AT_SIMPLE(stats, n)
Definition: regint.h:367
#define SIZE_OP_MEMORY_END_REC
Definition: regint.h:726
#define SIZE_OP_CALL
Definition: regint.h:734
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag)
Definition: regint.h:405
#define BIT_STATUS_AT(stats, n)
Definition: regint.h:357
#define SIZE_OP_LOOK_BEHIND
Definition: regint.h:731
#define GET_RELADDR_INC(addr, p)
Definition: regint.h:687
#define ANCHOR_WORD_END
Definition: regint.h:537
#define ANCHOR_END_BUF
Definition: regint.h:530
#define BBUF_WRITE(buf, pos, bytes, n)
Definition: regint.h:477
#define SIZE_OP_SET_OPTION
Definition: regint.h:718
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT
Definition: regint.h:733
Node * onig_node_new_list(Node *left, Node *right)
Definition: regparse.c:1186
Node * onig_node_new_anchor(int type)
Definition: regparse.c:1222
int onig_node_str_cat(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1376
int onig_parse_make_tree(Node **root, const UChar *pattern, const UChar *end, regex_t *reg, ScanEnv *env)
Definition: regparse.c:6611
void onig_scan_env_set_error_string(ScanEnv *env, int ecode ARG_UNUSED, UChar *arg, UChar *arg_end)
Definition: regparse.c:6638
Node * onig_node_list_add(Node *list, Node *x)
Definition: regparse.c:1192
void onig_node_free(Node *node)
Definition: regparse.c:1062
Node * onig_node_new_enclose(int type)
Definition: regparse.c:1347
Node * onig_node_new_alt(Node *left, Node *right)
Definition: regparse.c:1210
Node * onig_node_new_str(const UChar *s, const UChar *end)
Definition: regparse.c:1481
int onig_node_str_set(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1412
int onig_names_free(regex_t *reg)
Definition: regparse.c:525
void onig_reduce_nested_quantifier(Node *pnode, Node *cnode)
Definition: regparse.c:2204
int onig_renumber_name_table(regex_t *reg, GroupNumRemap *map)
Definition: regparse.c:611
#define NST_RECURSION
Definition: regparse.h:135
#define IS_ENCLOSE_MAX_FIXED(en)
Definition: regparse.h:153
#define NST_ADDR_FIXED
Definition: regparse.h:137
#define IS_BACKREF_NAME_REF(bn)
Definition: regparse.h:163
#define IS_SYNTAX_BV(syn, bvm)
Definition: regparse.h:332
#define IS_QUANTIFIER_IN_REPEAT(qn)
Definition: regparse.h:165
#define ENCLOSE_OPTION
Definition: regparse.h:95
#define NST_MEM_BACKREFED
Definition: regparse.h:133
#define NT_CANY
Definition: regparse.h:41
#define NSTR(node)
Definition: regparse.h:76
#define IS_CALL_RECURSION(cn)
Definition: regparse.h:161
#define NSTRING_IS_AMBIG(node)
Definition: regparse.h:115
#define NT_ENCLOSE
Definition: regparse.h:44
#define NENCLOSE(node)
Definition: regparse.h:81
#define NT_QTFR
Definition: regparse.h:43
#define ENCLOSE_MEMORY
Definition: regparse.h:94
#define NT_CALL
Definition: regparse.h:48
#define IS_ENCLOSE_ADDR_FIXED(en)
Definition: regparse.h:148
#define NBREF(node)
Definition: regparse.h:79
#define NST_MAX_FIXED
Definition: regparse.h:129
#define IS_ENCLOSE_CLEN_FIXED(en)
Definition: regparse.h:154
#define NT_ANCHOR
Definition: regparse.h:45
#define IS_ENCLOSE_NAME_REF(en)
Definition: regparse.h:158
#define ANCHOR_END_BUF_MASK
Definition: regparse.h:92
#define NST_IN_REPEAT
Definition: regparse.h:140
#define ENCLOSE_ABSENT
Definition: regparse.h:98
#define IS_ENCLOSE_CALLED(en)
Definition: regparse.h:147
#define IS_BACKREF_NEST_LEVEL(bn)
Definition: regparse.h:164
#define NT_CTYPE
Definition: regparse.h:40
#define NCTYPE(node)
Definition: regparse.h:78
#define NULL_NODE
Definition: regparse.h:283
#define IS_ENCLOSE_MARK2(en)
Definition: regparse.h:151
#define NQ_TARGET_IS_EMPTY_MEM
Definition: regparse.h:124
#define NST_CLEN_FIXED
Definition: regparse.h:130
#define SET_CALL_RECURSION(node)
Definition: regparse.h:160
#define IS_ENCLOSE_RECURSION(en)
Definition: regparse.h:149
#define ENCLOSE_CONDITION
Definition: regparse.h:97
#define SET_ENCLOSE_STATUS(node, f)
Definition: regparse.h:144
#define IS_ENCLOSE_MIN_FIXED(en)
Definition: regparse.h:152
#define NCCLASS(node)
Definition: regparse.h:77
#define SCANENV_MEM_NODES(senv)
Definition: regparse.h:286
#define NSTRING_LEN(node)
Definition: regparse.h:108
#define NST_MIN_FIXED
Definition: regparse.h:128
#define ENCLOSE_STOP_BACKTRACK
Definition: regparse.h:96
#define IS_ENCLOSE_NAMED_GROUP(en)
Definition: regparse.h:157
#define NQ_TARGET_IS_EMPTY
Definition: regparse.h:123
#define NT_CCLASS
Definition: regparse.h:39
#define NST_STOP_BT_SIMPLE_REPEAT
Definition: regparse.h:134
#define NSTRING_SET_AMBIG(node)
Definition: regparse.h:111
#define NTYPE2BIT(type)
Definition: regparse.h:51
#define NST_CALLED
Definition: regparse.h:136
#define SET_NTYPE(node, ntype)
Definition: regparse.h:70
#define NQ_TARGET_IS_EMPTY_REC
Definition: regparse.h:125
#define IS_NODE_TYPE_SIMPLE(type)
Definition: regparse.h:65
#define NT_LIST
Definition: regparse.h:46
#define IS_ENCLOSE_MARK1(en)
Definition: regparse.h:150
#define CLEAR_ENCLOSE_STATUS(node, f)
Definition: regparse.h:145
#define NT_BREF
Definition: regparse.h:42
#define NCDR(node)
Definition: regparse.h:87
#define NST_MARK1
Definition: regparse.h:131
#define ANCHOR_ANYCHAR_STAR_MASK
Definition: regparse.h:91
#define NCAR(node)
Definition: regparse.h:86
#define NSTRING_IS_DONT_GET_OPT_INFO(node)
Definition: regparse.h:116
#define NTYPE(node)
Definition: regparse.h:69
#define NT_STR
Definition: regparse.h:38
#define NQTFR(node)
Definition: regparse.h:80
#define NT_ALT
Definition: regparse.h:47
#define NSTRING_IS_RAW(node)
Definition: regparse.h:114
#define NST_MARK2
Definition: regparse.h:132
#define NCALL(node)
Definition: regparse.h:84
#define BACKREFS_P(br)
Definition: regparse.h:119
#define NSTRING_SET_DONT_GET_OPT_INFO(node)
Definition: regparse.h:112
#define NANCHOR(node)
Definition: regparse.h:82
#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en)
Definition: regparse.h:155
#define f
Definition: regint.h:441
UChar * p
Definition: regint.h:442
unsigned int used
Definition: regint.h:443
int ascii_range
Definition: regparse.h:249
int type
Definition: regparse.h:246
int char_len
Definition: regparse.h:248
struct _Node * target
Definition: regparse.h:247
int back_static[NODE_BACKREFS_SIZE]
Definition: regparse.h:239
int state
Definition: regparse.h:237
int nest_level
Definition: regparse.h:241
int back_num
Definition: regparse.h:238
int * back_dynamic
Definition: regparse.h:240
BitSet bs
Definition: regint.h:807
BBuf * mbuf
Definition: regint.h:808
UChar * name
Definition: regparse.h:227
struct _Node * target
Definition: regparse.h:229
UChar * name_end
Definition: regparse.h:228
UnsetAddrList * unset_addr_list
Definition: regparse.h:230
int group_num
Definition: regparse.h:226
OnigDistance min_len
Definition: regparse.h:204
OnigOptionType option
Definition: regparse.h:200
int opt_count
Definition: regparse.h:207
int char_len
Definition: regparse.h:206
AbsAddrType call_addr
Definition: regparse.h:201
OnigDistance max_len
Definition: regparse.h:205
struct _Node * target
Definition: regparse.h:202
int regnum
Definition: regparse.h:199
OnigDistance min
Definition: regcomp.c:4345
OnigDistance max
Definition: regcomp.c:4346
MinMaxLen len
Definition: regcomp.c:4381
OptAncInfo anc
Definition: regcomp.c:4383
OptMapInfo map
Definition: regcomp.c:4388
OptExactInfo exm
Definition: regcomp.c:4385
OptExactInfo expr
Definition: regcomp.c:4386
OptExactInfo exb
Definition: regcomp.c:4384
const char * name
Definition: onigmo.h:162
struct OnigEndCallListItem * next
Definition: regint.h:880
void(* func)(void)
Definition: regint.h:881
OnigUChar * par
Definition: onigmo.h:740
OnigUChar * par_end
Definition: onigmo.h:741
OnigEncoding enc
Definition: onigmo.h:739
OnigOptionType options
Definition: onigmo.h:483
int right_anchor
Definition: regcomp.c:4359
int left_anchor
Definition: regcomp.c:4358
OnigEncoding enc
Definition: regcomp.c:4351
MinMaxLen mmd
Definition: regcomp.c:4350
ScanEnv * scan_env
Definition: regcomp.c:4354
OnigCaseFoldType case_fold_flag
Definition: regcomp.c:4353
OnigOptionType options
Definition: regcomp.c:4352
OptAncInfo anc
Definition: regcomp.c:4364
int ignore_case
Definition: regcomp.c:4367
MinMaxLen mmd
Definition: regcomp.c:4363
UChar s[OPT_EXACT_MAXLEN]
Definition: regcomp.c:4369
int reach_end
Definition: regcomp.c:4366
UChar map[ONIG_CHAR_TABLE_SIZE]
Definition: regcomp.c:4377
int value
Definition: regcomp.c:4376
OptAncInfo anc
Definition: regcomp.c:4374
MinMaxLen mmd
Definition: regcomp.c:4373
int lower
Definition: regparse.h:183
struct _Node * target
Definition: regparse.h:182
int target_empty_info
Definition: regparse.h:186
struct _Node * head_exact
Definition: regparse.h:187
int greedy
Definition: regparse.h:185
int is_referred
Definition: regparse.h:189
int upper
Definition: regparse.h:184
struct _Node * next_head_exact
Definition: regparse.h:188
int state
Definition: regparse.h:181
UChar * error
Definition: regparse.h:301
BitStatusType bt_mem_end
Definition: regparse.h:297
int num_call
Definition: regparse.h:307
Node ** mem_nodes_dynamic
Definition: regparse.h:314
BitStatusType bt_mem_start
Definition: regparse.h:296
int sourceline
Definition: regparse.h:325
int num_mem
Definition: regparse.h:308
UnsetAddrList * unset_addr_list
Definition: regparse.h:305
int num_named
Definition: regparse.h:310
UChar * error_end
Definition: regparse.h:302
BitStatusType backrefed_mem
Definition: regparse.h:298
OnigEncoding enc
Definition: regparse.h:293
BitStatusType capture_history
Definition: regparse.h:295
const char * sourcefile
Definition: regparse.h:324
const OnigSyntaxType * syntax
Definition: regparse.h:294
UChar * s
Definition: regparse.h:172
unsigned int flag
Definition: regparse.h:174
UChar * end
Definition: regparse.h:173
int capa
Definition: regparse.h:175
UChar buf[NODE_STR_BUF_SIZE]
Definition: regparse.h:176
struct _Node * target
Definition: regparse.h:214
int offset
Definition: regparse.h:213
UnsetAddr * us
Definition: regparse.h:220
OnigDistance dmin
Definition: onigmo.h:793
unsigned char * exact_end
Definition: onigmo.h:789
OnigEncoding enc
Definition: onigmo.h:776
unsigned int capture_history
Definition: onigmo.h:766
unsigned int bt_mem_start
Definition: onigmo.h:767
OnigCaseFoldType case_fold_flag
Definition: onigmo.h:779
unsigned int used
Definition: onigmo.h:758
struct re_pattern_buffer * chain
Definition: onigmo.h:797
int num_null_check
Definition: onigmo.h:763
int repeat_range_alloc
Definition: onigmo.h:770
int num_comb_exp_check
Definition: onigmo.h:764
OnigRepeatRange * repeat_range
Definition: onigmo.h:774
int * int_map_backward
Definition: onigmo.h:792
unsigned int bt_mem_end
Definition: onigmo.h:768
int stack_pop_level
Definition: onigmo.h:769
OnigDistance anchor_dmax
Definition: onigmo.h:786
unsigned char map[ONIG_CHAR_TABLE_SIZE]
Definition: onigmo.h:790
unsigned int alloc
Definition: onigmo.h:759
OnigOptionType options
Definition: onigmo.h:772
unsigned char * p
Definition: onigmo.h:757
unsigned char * exact
Definition: onigmo.h:788
OnigDistance dmax
Definition: onigmo.h:794
OnigDistance anchor_dmin
Definition: onigmo.h:785
OnigPosition * beg
Definition: onigmo.h:719
int allocated
Definition: onigmo.h:717
OnigPosition * end
Definition: onigmo.h:720
#define env