12#include "ruby/internal/config.h"
19#include "internal/encoding.h"
20#include "internal/hash.h"
21#include "internal/imemo.h"
22#include "internal/re.h"
23#include "internal/string.h"
24#include "internal/object.h"
25#include "internal/ractor.h"
26#include "internal/variable.h"
34typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN];
35#define errcpy(err, msg) strlcpy((err), (msg), ONIG_MAX_ERROR_MESSAGE_LEN)
37#define BEG(no) (regs->beg[(no)])
38#define END(no) (regs->end[(no)])
41static const char casetable[] = {
42 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
43 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
44 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
45 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
47 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
49 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
51 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
53 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
55 '\100',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
57 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
59 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
61 '\170',
'\171',
'\172',
'\133',
'\134',
'\135',
'\136',
'\137',
63 '\140',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
65 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
67 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
69 '\170',
'\171',
'\172',
'\173',
'\174',
'\175',
'\176',
'\177',
70 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
71 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
72 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
73 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
74 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
75 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
76 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
77 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
78 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
79 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
80 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\327',
81 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\337',
82 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
83 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
84 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\367',
85 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\377',
88# error >>> "You lose. You will need a translation table for your character set." <<<
92rb_memcicmp(
const void *x,
const void *y,
long len)
94 const unsigned char *p1 = x, *p2 = y;
98 if ((tmp = casetable[(
unsigned)*p1++] - casetable[(
unsigned)*p2++]))
106rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
108 const unsigned char *y;
110 if ((y = memmem(ys, n, xs, m)) != NULL)
117rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
119 const unsigned char *x = xs, *xe = xs + m;
120 const unsigned char *y = ys, *ye = ys + n;
121#define VALUE_MAX ((VALUE)~(VALUE)0)
125 rb_bug(
"!!too long pattern string!!");
127 if (!(y = memchr(y, *x, n - m + 1)))
131 for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
151rb_memsearch_qs(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
153 const unsigned char *x = xs, *xe = xs + m;
154 const unsigned char *y = ys;
155 VALUE i, qstable[256];
158 for (i = 0; i < 256; ++i)
161 qstable[*x] = xe - x;
163 for (; y + m <= ys + n; y += *(qstable + y[m])) {
164 if (*xs == *y && memcmp(xs, y, m) == 0)
170static inline unsigned int
171rb_memsearch_qs_utf8_hash(
const unsigned char *x)
173 register const unsigned int mix = 8353;
174 register unsigned int h = *x;
199 return (
unsigned char)h;
203rb_memsearch_qs_utf8(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
205 const unsigned char *x = xs, *xe = xs + m;
206 const unsigned char *y = ys;
207 VALUE i, qstable[512];
210 for (i = 0; i < 512; ++i) {
213 for (; x < xe; ++x) {
214 qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
217 for (; y + m <= ys + n; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
218 if (*xs == *y && memcmp(xs, y, m) == 0)
225rb_memsearch_with_char_size(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n,
int char_size)
227 const unsigned char *x = xs, x0 = *xs, *y = ys;
229 for (n -= m; n >= 0; n -= char_size, y += char_size) {
230 if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
237rb_memsearch_wchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
239 return rb_memsearch_with_char_size(xs, m, ys, n, 2);
243rb_memsearch_qchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
245 return rb_memsearch_with_char_size(xs, m, ys, n, 4);
251 const unsigned char *x = x0, *y = y0;
253 if (m > n)
return -1;
255 return memcmp(x0, y0, m) == 0 ? 0 : -1;
261 const unsigned char *ys = memchr(y, *x, n);
270 return rb_memsearch_ss(x0, m, y0, n);
272 else if (enc == rb_utf8_encoding()){
273 return rb_memsearch_qs_utf8(x0, m, y0, n);
277 return rb_memsearch_wchar(x0, m, y0, n);
280 return rb_memsearch_qchar(x0, m, y0, n);
282 return rb_memsearch_qs(x0, m, y0, n);
285#define REG_ENCODING_NONE FL_USER6
287#define KCODE_FIXED FL_USER4
289#define ARG_REG_OPTION_MASK \
290 (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
291#define ARG_ENCODING_FIXED 16
292#define ARG_ENCODING_NONE 32
301 val = ONIG_OPTION_IGNORECASE;
304 val = ONIG_OPTION_EXTEND;
307 val = ONIG_OPTION_MULTILINE;
316enum { OPTBUF_SIZE = 4 };
319option_to_str(
char str[OPTBUF_SIZE],
int options)
322 if (options & ONIG_OPTION_MULTILINE) *p++ =
'm';
323 if (options & ONIG_OPTION_IGNORECASE) *p++ =
'i';
324 if (options & ONIG_OPTION_EXTEND) *p++ =
'x';
330rb_char_to_option_kcode(
int c,
int *option,
int *kcode)
336 *kcode = rb_ascii8bit_encindex();
337 return (*option = ARG_ENCODING_NONE);
339 *kcode = ENCINDEX_EUC_JP;
342 *kcode = ENCINDEX_Windows_31J;
345 *kcode = rb_utf8_encindex();
349 return (*option = char_to_option(c));
351 *option = ARG_ENCODING_FIXED;
356rb_reg_check(
VALUE re)
364rb_reg_expr_str(
VALUE str,
const char *s,
long len,
367 const char *p, *pend;
372 p = s; pend = p +
len;
376 c = rb_enc_ascget(p, pend, &clen, enc);
379 p += mbclen(p, pend, enc);
403 int unicode_p = rb_enc_unicode_p(enc);
406 c = rb_enc_ascget(p, pend, &clen, enc);
407 if (c ==
'\\' && p+clen < pend) {
408 int n = clen + mbclen(p+clen, pend, enc);
414 clen = rb_enc_precise_mbclen(p, pend, enc);
416 c = (
unsigned char)*p;
422 rb_str_buf_cat_escaped_char(str, c, unicode_p);
429 else if (c == term) {
437 else if (!rb_enc_isspace(c, enc)) {
441 snprintf(b,
sizeof(b),
"\\x%02X", c);
457 rb_encoding *resenc = rb_default_internal_encoding();
458 if (resenc == NULL) resenc = rb_default_external_encoding();
460 if (re && rb_enc_asciicompat(enc)) {
461 rb_enc_copy(str, re);
464 rb_enc_associate(str, rb_usascii_encoding());
468 rb_reg_expr_str(str, RSTRING_PTR(src_str), RSTRING_LEN(src_str), enc, resenc,
'/');
473 char opts[OPTBUF_SIZE];
475 if (*option_to_str(opts,
RREGEXP_PTR(re)->options))
477 if (
RBASIC(re)->flags & REG_ENCODING_NONE)
503rb_reg_source(
VALUE re)
524rb_reg_inspect(
VALUE re)
529 return rb_reg_desc(re);
532static VALUE rb_reg_str_with_term(
VALUE re,
int term);
564 return rb_reg_str_with_term(re,
'/');
568rb_reg_str_with_term(
VALUE re,
int term)
571 const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
573 char optbuf[OPTBUF_SIZE + 1];
578 rb_enc_copy(str, re);
581 const UChar *ptr = (UChar *)RSTRING_PTR(src_str);
582 long len = RSTRING_LEN(src_str);
584 if (
len >= 4 && ptr[0] ==
'(' && ptr[1] ==
'?') {
587 if ((
len -= 2) > 0) {
589 opt = char_to_option((
int )*ptr);
599 if (
len > 1 && *ptr ==
'-') {
603 opt = char_to_option((
int )*ptr);
618 if (*ptr ==
':' && ptr[
len-1] ==
')') {
625 err = onig_new(&rp, ptr, ptr +
len, options,
626 enc, OnigDefaultSyntax, NULL);
639 if ((options & embeddable) != embeddable) {
641 option_to_str(optbuf + 1, ~options);
646 if (rb_enc_asciicompat(enc)) {
647 rb_reg_expr_str(str, (
char*)ptr,
len, enc, NULL, term);
655 rb_enc_associate(str, rb_usascii_encoding());
659 s = RSTRING_PTR(str);
660 e = RSTRING_END(str);
665 rb_str_resize(str, RSTRING_LEN(str) - n);
667 rb_reg_expr_str(str, (
char*)ptr,
len, enc, NULL, term);
670 rb_enc_copy(str, re);
677NORETURN(
static void rb_reg_raise(
const char *err,
VALUE re));
680rb_reg_raise(
const char *err,
VALUE re)
682 VALUE desc = rb_reg_desc(re);
688rb_enc_reg_error_desc(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err)
690 char opts[OPTBUF_SIZE + 1];
692 rb_encoding *resenc = rb_default_internal_encoding();
693 if (resenc == NULL) resenc = rb_default_external_encoding();
695 rb_enc_associate(desc, enc);
697 rb_reg_expr_str(desc, s,
len, enc, resenc,
'/');
699 option_to_str(opts + 1, options);
704NORETURN(
static void rb_enc_reg_raise(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err));
707rb_enc_reg_raise(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err)
709 rb_exc_raise(rb_enc_reg_error_desc(s,
len, enc, options, err));
713rb_reg_error_desc(
VALUE str,
int options,
const char *err)
715 return rb_enc_reg_error_desc(RSTRING_PTR(str), RSTRING_LEN(str),
716 rb_enc_get(str), options, err);
719NORETURN(
static void rb_reg_raise_str(
VALUE str,
int options,
const char *err));
722rb_reg_raise_str(
VALUE str,
int options,
const char *err)
724 rb_exc_raise(rb_reg_error_desc(str, options, err));
742rb_reg_casefold_p(
VALUE re)
745 return RBOOL(
RREGEXP_PTR(re)->options & ONIG_OPTION_IGNORECASE);
787rb_reg_options_m(
VALUE re)
794reg_names_iter(
const OnigUChar *name,
const OnigUChar *name_end,
795 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
798 rb_ary_push(ary, rb_enc_str_new((
const char *)name, name_end-name, regex->enc));
816rb_reg_names(
VALUE re)
820 ary = rb_ary_new_capa(onig_number_of_names(
RREGEXP_PTR(re)));
821 onig_foreach_name(
RREGEXP_PTR(re), reg_names_iter, (
void*)ary);
826reg_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
827 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
833 for (i = 0; i < back_num; i++)
834 rb_ary_store(ary, i,
INT2NUM(back_refs[i]));
836 rb_hash_aset(hash,
rb_str_new((
const char*)name, name_end-name),ary);
860rb_reg_named_captures(
VALUE re)
863 VALUE hash = rb_hash_new_with_size(onig_number_of_names(reg));
864 onig_foreach_name(reg, reg_named_captures_iter, (
void*)hash);
869onig_new_with_source(
regex_t** reg,
const UChar* pattern,
const UChar* pattern_end,
871 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
876 if (IS_NULL(*reg))
return ONIGERR_MEMORY;
878 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
881 r = onig_compile_ruby(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
891make_regexp(
const char *s,
long len,
rb_encoding *enc,
int flags, onig_errmsg_buffer err,
892 const char *sourcefile,
int sourceline)
905 r = onig_new_with_source(&rp, (UChar*)s, (UChar*)(s +
len), flags,
906 enc, OnigDefaultSyntax, &einfo, sourcefile, sourceline);
908 onig_error_code_to_str((UChar*)err, r, &einfo);
967match_alloc(
VALUE klass)
984 if (to->allocated)
return 0;
987 if (to->allocated)
return 0;
988 return ONIGERR_MEMORY;
997pair_byte_cmp(
const void *pair1,
const void *pair2)
999 long diff = ((
pair_t*)pair1)->byte_pos - ((
pair_t*)pair2)->byte_pos;
1000#if SIZEOF_LONG > SIZEOF_INT
1001 return diff ? diff > 0 ? 1 : -1 : 0;
1008update_char_offset(
VALUE match)
1012 int i, num_regs, num_pos;
1022 num_regs = rm->
regs.num_regs;
1029 enc = rb_enc_get(
RMATCH(match)->str);
1031 for (i = 0; i < num_regs; i++) {
1040 for (i = 0; i < num_regs; i++) {
1043 pairs[num_pos++].byte_pos = BEG(i);
1044 pairs[num_pos++].byte_pos = END(i);
1046 qsort(pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1048 s = p = RSTRING_PTR(
RMATCH(match)->str);
1050 for (i = 0; i < num_pos; i++) {
1051 q = s + pairs[i].byte_pos;
1053 pairs[i].char_pos = c;
1057 for (i = 0; i < num_regs; i++) {
1065 key.byte_pos = BEG(i);
1066 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1069 key.byte_pos = END(i);
1070 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1076match_check(
VALUE match)
1078 if (!
RMATCH(match)->regexp) {
1095 rm = RMATCH_EXT(obj);
1099 if (RMATCH_EXT(orig)->char_offset_num_allocated) {
1125match_regexp(
VALUE match)
1129 regexp =
RMATCH(match)->regexp;
1130 if (
NIL_P(regexp)) {
1160match_names(
VALUE match)
1164 return rb_ary_new_capa(0);
1165 return rb_reg_names(
RMATCH(match)->regexp);
1181match_size(
VALUE match)
1187static int name_to_backref_number(
struct re_registers *,
VALUE,
const char*,
const char*);
1188NORETURN(
static void name_to_backref_error(
VALUE name));
1191name_to_backref_error(
VALUE name)
1193 rb_raise(
rb_eIndexError,
"undefined group name reference: % "PRIsVALUE,
1200 if (i < 0 || regs->num_regs <= i)
1205match_backref_number(
VALUE match,
VALUE backref)
1217 else if (!RB_TYPE_P(backref,
T_STRING)) {
1222 num = name_to_backref_number(regs, regexp, name, name + RSTRING_LEN(backref));
1225 name_to_backref_error(backref);
1234 return match_backref_number(match, backref);
1249 int i = match_backref_number(match, n);
1253 backref_number_check(regs, i);
1258 update_char_offset(match);
1259 return rb_assoc_new(
LONG2NUM(RMATCH_EXT(match)->char_offset[i].beg),
1260 LONG2NUM(RMATCH_EXT(match)->char_offset[i].end));
1284 int i = match_backref_number(match, n);
1288 backref_number_check(regs, i);
1308 int i = match_backref_number(match, n);
1312 backref_number_check(regs, i);
1317 update_char_offset(match);
1318 return LONG2NUM(RMATCH_EXT(match)->char_offset[i].beg);
1334 int i = match_backref_number(match, n);
1338 backref_number_check(regs, i);
1343 update_char_offset(match);
1344 return LONG2NUM(RMATCH_EXT(match)->char_offset[i].end);
1376 int i = match_backref_number(match, n);
1379 backref_number_check(regs, i);
1381 long start = BEG(i), end = END(i);
1385 return rb_str_subseq(
RMATCH(match)->str, start, end - start);
1420 int i = match_backref_number(match, n);
1424 backref_number_check(regs, i);
1429 update_char_offset(match);
1431 &RMATCH_EXT(match)->char_offset[i];
1435#define MATCH_BUSY FL_USER2
1440 FL_SET(match, MATCH_BUSY);
1444rb_match_unbusy(
VALUE match)
1450rb_match_count(
VALUE match)
1453 if (
NIL_P(match))
return -1;
1455 if (!regs)
return -1;
1456 return regs->num_regs;
1467 int err = onig_region_resize(&rmatch->
regs, 1);
1468 if (err) rb_memerror();
1469 rmatch->
regs.beg[0] = pos;
1470 rmatch->
regs.end[0] = pos +
len;
1474rb_backref_set_string(
VALUE string,
long pos,
long len)
1480 match_set_string(match,
string, pos,
len);
1514rb_reg_fixed_encoding_p(
VALUE re)
1516 return RBOOL(
FL_TEST(re, KCODE_FIXED));
1520rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
1521 rb_encoding **fixed_enc, onig_errmsg_buffer err,
int options);
1529 "incompatible encoding regexp match (%s regexp with %s string)",
1530 rb_enc_name(rb_enc_get(re)),
1531 rb_enc_name(rb_enc_get(
str)));
1548 int cr = str_coderange(
str);
1551 rb_raise(rb_eArgError,
1552 "invalid byte sequence in %s",
1553 rb_enc_name(rb_enc_get(
str)));
1557 enc = rb_enc_get(
str);
1564 else if (!rb_enc_asciicompat(enc)) {
1565 reg_enc_error(re,
str);
1567 else if (rb_reg_fixed_encoding_p(re)) {
1570 reg_enc_error(re,
str);
1574 else if (warn && (
RBASIC(re)->flags & REG_ENCODING_NONE) &&
1575 enc != rb_ascii8bit_encoding() &&
1577 rb_warn(
"historical binary regexp match /.../n against %s string",
1593 if (reg->enc == enc)
return reg;
1598 const char *pattern = RSTRING_PTR(src_str);
1600 onig_errmsg_buffer err =
"";
1601 unescaped = rb_reg_preprocess(
1602 pattern, pattern + RSTRING_LEN(src_str), enc,
1603 &fixed_enc, err, 0);
1605 if (
NIL_P(unescaped)) {
1606 rb_raise(rb_eArgError,
"regexp preprocess failed: %s", err);
1610 rb_hrtime_t timelimit = reg->timelimit;
1617 if (
RREGEXP(re)->usecnt == 0) {
1619 r = onig_new_without_alloc(&tmp_reg, (UChar *)ptr, (UChar *)(ptr +
len),
1621 OnigDefaultSyntax, &einfo);
1625 onig_free_body(&tmp_reg);
1628 onig_free_body(reg);
1634 r = onig_new(®, (UChar *)ptr, (UChar *)(ptr +
len),
1636 OnigDefaultSyntax, &einfo);
1640 onig_error_code_to_str((UChar*)err, r, &einfo);
1641 rb_reg_raise(err, re);
1644 reg->timelimit = timelimit;
1659 if (!tmpreg)
RREGEXP(re)->usecnt++;
1661 OnigPosition result = match(reg,
str, regs, args);
1663 if (!tmpreg)
RREGEXP(re)->usecnt--;
1669 onig_region_free(regs, 0);
1671 if (result != ONIG_MISMATCH) {
1672 onig_errmsg_buffer err =
"";
1673 onig_error_code_to_str((UChar*)err, (
int)result);
1674 rb_reg_raise(err, re);
1688 enc = rb_reg_prepare_enc(re,
str, 0);
1694 range = RSTRING_LEN(
str) - pos;
1697 if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos < RSTRING_LEN(
str)) {
1698 string = (UChar*)RSTRING_PTR(
str);
1701 p = onigenc_get_right_adjust_char_head(enc,
string,
string + pos,
string + RSTRING_LEN(
str));
1704 p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,
string,
string + pos,
string + RSTRING_LEN(
str));
1728 (UChar *)(ptr +
len),
1729 (UChar *)(ptr + args->pos),
1730 (UChar *)(ptr + args->range),
1737rb_reg_search_set_match(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str,
VALUE *set_match)
1739 long len = RSTRING_LEN(str);
1740 if (pos >
len || pos < 0) {
1747 .range = reverse ? 0 :
len,
1754 if (result == ONIG_MISMATCH) {
1756 return ONIG_MISMATCH;
1759 if (set_backref_str) {
1772 if (set_match) *set_match = match;
1778rb_reg_search0(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str)
1780 return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, NULL);
1786 return rb_reg_search0(re, str, pos, reverse, 1);
1799 (UChar *)(ptr +
len),
1834 if (nth >= regs->num_regs) {
1838 nth += regs->num_regs;
1839 if (nth <= 0)
return Qnil;
1841 return RBOOL(BEG(nth) != -1);
1848 long start, end,
len;
1854 if (nth >= regs->num_regs) {
1858 nth += regs->num_regs;
1859 if (nth <= 0)
return Qnil;
1862 if (start == -1)
return Qnil;
1865 str = rb_str_subseq(
RMATCH(match)->str, start,
len);
1902 if (BEG(0) == -1)
return Qnil;
1903 str = rb_str_subseq(
RMATCH(match)->str, 0, BEG(0));
1936 if (BEG(0) == -1)
return Qnil;
1937 str =
RMATCH(match)->str;
1939 str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
1944match_last_index(
VALUE match)
1949 if (
NIL_P(match))
return -1;
1952 if (BEG(0) == -1)
return -1;
1954 for (i=regs->num_regs-1; BEG(i) == -1 && i > 0; i--)
1962 int i = match_last_index(match);
1963 if (i <= 0)
return Qnil;
1965 return rb_str_subseq(
RMATCH(match)->str, BEG(i), END(i) - BEG(i));
1969rb_reg_last_defined(
VALUE match)
1971 int i = match_last_index(match);
1972 if (i < 0)
return Qnil;
1977last_match_getter(
ID _x,
VALUE *_y)
1983prematch_getter(
ID _x,
VALUE *_y)
1989postmatch_getter(
ID _x,
VALUE *_y)
1995last_paren_match_getter(
ID _x,
VALUE *_y)
2001match_array(
VALUE match,
int start)
2011 target =
RMATCH(match)->str;
2013 for (i=start; i<regs->num_regs; i++) {
2014 if (regs->beg[i] == -1) {
2015 rb_ary_push(ary,
Qnil);
2018 VALUE str = rb_str_subseq(target, regs->beg[i], regs->end[i]-regs->beg[i]);
2019 rb_ary_push(ary, str);
2041match_to_a(
VALUE match)
2043 return match_array(match, 0);
2063match_captures(
VALUE match)
2065 return match_array(match, 1);
2069name_to_backref_number(
struct re_registers *regs,
VALUE regexp,
const char* name,
const char* name_end)
2071 if (
NIL_P(regexp))
return -1;
2072 return onig_name_to_backref_number(
RREGEXP_PTR(regexp),
2073 (
const unsigned char *)name, (
const unsigned char *)name_end, regs);
2076#define NAME_TO_NUMBER(regs, re, name, name_ptr, name_end) \
2078 !rb_enc_compatible(RREGEXP_SRC(re), (name)) ? 0 : \
2079 name_to_backref_number((regs), (re), (name_ptr), (name_end)))
2089 else if (!RB_TYPE_P(name,
T_STRING)) {
2092 num = NAME_TO_NUMBER(regs, re, name,
2093 RSTRING_PTR(name), RSTRING_END(name));
2095 name_to_backref_error(name);
2101match_ary_subseq(
VALUE match,
long beg,
long len,
VALUE result)
2104 long j, end = olen < beg+
len ? olen : beg+
len;
2105 if (
NIL_P(result)) result = rb_ary_new_capa(
len);
2106 if (
len == 0)
return result;
2108 for (j = beg; j < end; j++) {
2111 if (beg +
len > j) {
2112 rb_ary_resize(result,
RARRAY_LEN(result) + (beg +
len) - j);
2132 return match_ary_subseq(match, beg,
len, result);
2175match_aref(
int argc,
VALUE *argv,
VALUE match)
2182 if (
NIL_P(length)) {
2187 int num = namev_to_backref_number(
RMATCH_REGS(match),
RMATCH(match)->regexp, idx);
2192 return match_ary_aref(match, idx,
Qnil);
2205 if (beg < 0)
return Qnil;
2207 else if (beg > num_regs) {
2210 if (beg+
len > num_regs) {
2211 len = num_regs - beg;
2213 return match_ary_subseq(match, beg,
len,
Qnil);
2244match_values_at(
int argc,
VALUE *argv,
VALUE match)
2252 for (i=0; i<argc; i++) {
2257 int num = namev_to_backref_number(
RMATCH_REGS(match),
RMATCH(match)->regexp, argv[i]);
2262 match_ary_aref(match, argv[i], result);
2289match_to_s(
VALUE match)
2298match_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2299 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
2301 struct MEMO *memo = MEMO_CAST(arg);
2302 VALUE hash = memo->v1;
2303 VALUE match = memo->v2;
2304 long symbolize = memo->u3.state;
2306 VALUE key = rb_enc_str_new((
const char *)name, name_end-name, regex->enc);
2308 if (symbolize > 0) {
2309 key = rb_str_intern(key);
2317 for (i = 0; i < back_num; i++) {
2320 rb_hash_aset(hash, key, value);
2326 rb_hash_aset(hash, key,
Qnil);
2365match_named_captures(
int argc,
VALUE *argv,
VALUE match)
2372 return rb_hash_new();
2375 VALUE symbolize_names = 0;
2380 static ID keyword_ids[1];
2382 VALUE symbolize_names_val;
2384 if (!keyword_ids[0]) {
2387 rb_get_kwargs(opt, keyword_ids, 0, 1, &symbolize_names_val);
2388 if (!UNDEF_P(symbolize_names_val) &&
RTEST(symbolize_names_val)) {
2389 symbolize_names = 1;
2393 hash = rb_hash_new();
2394 memo = MEMO_NEW(hash, match, symbolize_names);
2396 onig_foreach_name(
RREGEXP(
RMATCH(match)->regexp)->ptr, match_named_captures_iter, (
void*)memo);
2418match_deconstruct_keys(
VALUE match,
VALUE keys)
2426 return rb_hash_new_with_size(0);
2430 h = rb_hash_new_with_size(onig_number_of_names(
RREGEXP_PTR(
RMATCH(match)->regexp)));
2433 memo = MEMO_NEW(h, match, 1);
2435 onig_foreach_name(
RREGEXP_PTR(
RMATCH(match)->regexp), match_named_captures_iter, (
void*)memo);
2443 return rb_hash_new_with_size(0);
2457 RSTRING_PTR(name), RSTRING_END(name));
2484match_string(
VALUE match)
2487 return RMATCH(match)->str;
2496match_inspect_name_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2497 int back_num,
int *back_refs,
OnigRegex regex,
void *arg0)
2502 for (i = 0; i < back_num; i++) {
2503 arg[back_refs[i]].name = name;
2504 arg[back_refs[i]].len = name_end - name;
2531match_inspect(
VALUE match)
2537 int num_regs = regs->num_regs;
2542 return rb_sprintf(
"#<%"PRIsVALUE
":%p>", cname, (
void*)match);
2544 else if (
NIL_P(regexp)) {
2545 return rb_sprintf(
"#<%"PRIsVALUE
": %"PRIsVALUE
">",
2553 match_inspect_name_iter, names);
2558 for (i = 0; i < num_regs; i++) {
2565 rb_str_catf(str,
"%d", i);
2583read_escaped_byte(
const char **pp,
const char *end, onig_errmsg_buffer err)
2585 const char *p = *pp;
2587 int meta_prefix = 0, ctrl_prefix = 0;
2590 if (p == end || *p++ !=
'\\') {
2591 errcpy(err,
"too short escaped multibyte character");
2597 errcpy(err,
"too short escape sequence");
2601 case '\\': code =
'\\';
break;
2602 case 'n': code =
'\n';
break;
2603 case 't': code =
'\t';
break;
2604 case 'r': code =
'\r';
break;
2605 case 'f': code =
'\f';
break;
2606 case 'v': code =
'\013';
break;
2607 case 'a': code =
'\007';
break;
2608 case 'e': code =
'\033';
break;
2611 case '0':
case '1':
case '2':
case '3':
2612 case '4':
case '5':
case '6':
case '7':
2621 errcpy(err,
"invalid hex escape");
2629 errcpy(err,
"duplicate meta escape");
2633 if (p+1 < end && *p++ ==
'-' && (*p & 0x80) == 0) {
2643 errcpy(err,
"too short meta escape");
2647 if (p == end || *p++ !=
'-') {
2648 errcpy(err,
"too short control escape");
2653 errcpy(err,
"duplicate control escape");
2657 if (p < end && (*p & 0x80) == 0) {
2667 errcpy(err,
"too short control escape");
2671 errcpy(err,
"unexpected escape sequence");
2674 if (code < 0 || 0xff < code) {
2675 errcpy(err,
"invalid escape code");
2689unescape_escaped_nonascii(
const char **pp,
const char *end,
rb_encoding *enc,
2692 const char *p = *pp;
2694 unsigned char *area =
ALLOCA_N(
unsigned char, chmaxlen);
2695 char *chbuf = (
char *)area;
2700 memset(chbuf, 0, chmaxlen);
2702 byte = read_escaped_byte(&p, end, err);
2707 area[chlen++] = byte;
2708 while (chlen < chmaxlen &&
2710 byte = read_escaped_byte(&p, end, err);
2714 area[chlen++] = byte;
2717 l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
2719 errcpy(err,
"invalid multibyte escape");
2722 if (1 < chlen || (area[0] & 0x80)) {
2727 else if (*encp != enc) {
2728 errcpy(err,
"escaped non ASCII character in UTF-8 regexp");
2734 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", area[0]&0xff);
2742check_unicode_range(
unsigned long code, onig_errmsg_buffer err)
2744 if ((0xd800 <= code && code <= 0xdfff) ||
2746 errcpy(err,
"invalid Unicode range");
2753append_utf8(
unsigned long uv,
2756 if (check_unicode_range(uv, err) != 0)
2760 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", (
int)uv);
2770 *encp = rb_utf8_encoding();
2771 else if (*encp != rb_utf8_encoding()) {
2772 errcpy(err,
"UTF-8 character in non UTF-8 regexp");
2780unescape_unicode_list(
const char **pp,
const char *end,
2783 const char *p = *pp;
2784 int has_unicode = 0;
2788 while (p < end &&
ISSPACE(*p)) p++;
2791 code = ruby_scan_hex(p, end-p, &
len);
2795 errcpy(err,
"invalid Unicode range");
2799 if (append_utf8(code, buf, encp, err) != 0)
2803 while (p < end &&
ISSPACE(*p)) p++;
2806 if (has_unicode == 0) {
2807 errcpy(err,
"invalid Unicode list");
2817unescape_unicode_bmp(
const char **pp,
const char *end,
2820 const char *p = *pp;
2825 errcpy(err,
"invalid Unicode escape");
2828 code = ruby_scan_hex(p, 4, &
len);
2830 errcpy(err,
"invalid Unicode escape");
2833 if (append_utf8(code, buf, encp, err) != 0)
2840unescape_nonascii0(
const char **pp,
const char *end,
rb_encoding *enc,
2842 onig_errmsg_buffer err,
int options,
int recurse)
2844 const char *p = *pp;
2847 int in_char_class = 0;
2849 int extended_mode = options & ONIG_OPTION_EXTEND;
2853 int chlen = rb_enc_precise_mbclen(p, end, enc);
2856 errcpy(err,
"invalid multibyte character");
2860 if (1 < chlen || (*p & 0x80)) {
2866 else if (*encp != enc) {
2867 errcpy(err,
"non ASCII character in UTF-8 regexp");
2876 errcpy(err,
"too short escape sequence");
2879 chlen = rb_enc_precise_mbclen(p, end, enc);
2881 goto invalid_multibyte;
2890 case '1':
case '2':
case '3':
2891 case '4':
case '5':
case '6':
case '7':
2893 size_t len = end-(p-1), octlen;
2894 if (ruby_scan_oct(p-1,
len < 3 ?
len : 3, &octlen) <= 0177) {
2910 if (rb_is_usascii_enc(enc)) {
2911 const char *pbeg = p;
2912 int byte = read_escaped_byte(&p, end, err);
2913 if (
byte == -1)
return -1;
2918 if (unescape_escaped_nonascii(&p, end, enc, buf, encp, err) != 0)
2925 errcpy(err,
"too short escape sequence");
2931 if (unescape_unicode_list(&p, end, buf, encp, err) != 0)
2933 if (p == end || *p++ !=
'}') {
2934 errcpy(err,
"invalid Unicode list");
2941 if (unescape_unicode_bmp(&p, end, buf, encp, err) != 0)
2963 if (extended_mode && !in_char_class) {
2965 while ((p < end) && ((c = *p++) !=
'\n')) {
2966 if ((c & 0x80) && !*encp && enc == rb_utf8_encoding()) {
2979 if (in_char_class) {
2986 if (!in_char_class && recurse) {
2987 if (--parens == 0) {
2994 if (!in_char_class && p + 1 < end && *p ==
'?') {
2995 if (*(p+1) ==
'#') {
2997 const char *orig_p = p;
3000 while (cont && (p < end)) {
3003 if (!(c & 0x80))
break;
3004 if (!*encp && enc == rb_utf8_encoding()) {
3010 chlen = rb_enc_precise_mbclen(p, end, enc);
3012 goto invalid_multibyte;
3033 int local_extend = 0;
3040 for(s = p+1; s < end; s++) {
3043 local_extend = invert ? -1 : 1;
3050 if (local_extend == 0 ||
3051 (local_extend == -1 && !extended_mode) ||
3052 (local_extend == 1 && extended_mode)) {
3059 int local_options = options;
3060 if (local_extend == 1) {
3061 local_options |= ONIG_OPTION_EXTEND;
3064 local_options &= ~ONIG_OPTION_EXTEND;
3068 int ret = unescape_nonascii0(&p, end, enc, buf, encp,
3071 if (ret < 0)
return ret;
3076 extended_mode = local_extend == 1;
3093 else if (!in_char_class && recurse) {
3111unescape_nonascii(
const char *p,
const char *end,
rb_encoding *enc,
3113 onig_errmsg_buffer err,
int options)
3115 return unescape_nonascii0(&p, end, enc, buf, encp, has_property,
3120rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
3121 rb_encoding **fixed_enc, onig_errmsg_buffer err,
int options)
3124 int has_property = 0;
3126 buf = rb_str_buf_new(0);
3128 if (rb_enc_asciicompat(enc))
3132 rb_enc_associate(buf, enc);
3135 if (unescape_nonascii(p, end, enc, buf, fixed_enc, &has_property, err, options) != 0)
3138 if (has_property && !*fixed_enc) {
3143 rb_enc_associate(buf, *fixed_enc);
3150rb_reg_check_preprocess(
VALUE str)
3153 onig_errmsg_buffer err =
"";
3159 p = RSTRING_PTR(str);
3160 end = p + RSTRING_LEN(str);
3161 enc = rb_enc_get(str);
3163 buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err, 0);
3167 return rb_reg_error_desc(str, 0, err);
3173rb_reg_preprocess_dregexp(
VALUE ary,
int options)
3177 onig_errmsg_buffer err =
"";
3183 rb_raise(rb_eArgError,
"no arguments given");
3192 src_enc = rb_enc_get(str);
3193 if (options & ARG_ENCODING_NONE &&
3194 src_enc != ascii8bit) {
3196 rb_raise(
rb_eRegexpError,
"/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
3198 src_enc = ascii8bit;
3202 p = RSTRING_PTR(str);
3203 end = p + RSTRING_LEN(str);
3205 buf = rb_reg_preprocess(p, end, src_enc, &fixed_enc, err, options);
3208 rb_raise(rb_eArgError,
"%s", err);
3210 if (fixed_enc != 0) {
3211 if (regexp_enc != 0 && regexp_enc != fixed_enc) {
3212 rb_raise(
rb_eRegexpError,
"encoding mismatch in dynamic regexp : %s and %s",
3213 rb_enc_name(regexp_enc), rb_enc_name(fixed_enc));
3215 regexp_enc = fixed_enc;
3221 rb_str_buf_append(result, str);
3224 rb_enc_associate(result, regexp_enc);
3231rb_reg_initialize_check(
VALUE obj)
3241 int options, onig_errmsg_buffer err,
3242 const char *sourcefile,
int sourceline)
3249 rb_reg_initialize_check(obj);
3251 if (rb_enc_dummy_p(enc)) {
3252 errcpy(err,
"can't make regexp with dummy encoding");
3256 unescaped = rb_reg_preprocess(s, s+
len, enc, &fixed_enc, err, options);
3257 if (
NIL_P(unescaped))
3261 if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
3262 (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
3263 errcpy(err,
"incompatible character encoding");
3266 if (fixed_enc != a_enc) {
3267 options |= ARG_ENCODING_FIXED;
3271 else if (!(options & ARG_ENCODING_FIXED)) {
3272 enc = rb_usascii_encoding();
3275 rb_enc_associate((
VALUE)re, enc);
3276 if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
3279 if (options & ARG_ENCODING_NONE) {
3283 re->
ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc,
3284 options & ARG_REG_OPTION_MASK, err,
3285 sourcefile, sourceline);
3286 if (!re->
ptr)
return -1;
3295 if (regenc != enc) {
3296 str = rb_enc_associate(rb_str_dup(str), enc = regenc);
3302rb_reg_initialize_str(
VALUE obj,
VALUE str,
int options, onig_errmsg_buffer err,
3303 const char *sourcefile,
int sourceline)
3306 rb_encoding *str_enc = rb_enc_get(str), *enc = str_enc;
3307 if (options & ARG_ENCODING_NONE) {
3309 if (enc != ascii8bit) {
3311 errcpy(err,
"/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
3317 ret = rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
3318 options, err, sourcefile, sourceline);
3319 if (ret == 0) reg_set_source(obj, str, str_enc);
3324rb_reg_s_alloc(
VALUE klass)
3344 return rb_reg_init_str(rb_reg_alloc(), s, options);
3348rb_reg_init_str(
VALUE re,
VALUE s,
int options)
3350 onig_errmsg_buffer err =
"";
3352 if (rb_reg_initialize_str(re, s, options, err, NULL, 0) != 0) {
3353 rb_reg_raise_str(s, options, err);
3362 onig_errmsg_buffer err =
"";
3364 if (rb_reg_initialize(re, RSTRING_PTR(s), RSTRING_LEN(s),
3365 enc, options, err, NULL, 0) != 0) {
3366 rb_reg_raise_str(s, options, err);
3368 reg_set_source(re, s, enc);
3374rb_reg_new_ary(
VALUE ary,
int opt)
3384 VALUE re = rb_reg_alloc();
3385 onig_errmsg_buffer err =
"";
3387 if (rb_reg_initialize(re, s,
len, enc, options, err, NULL, 0) != 0) {
3388 rb_enc_reg_raise(s,
len, enc, options, err);
3402rb_reg_compile(
VALUE str,
int options,
const char *sourcefile,
int sourceline)
3404 VALUE re = rb_reg_alloc();
3405 onig_errmsg_buffer err =
"";
3408 if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
3409 rb_set_errinfo(rb_reg_error_desc(str, options, err));
3416static VALUE reg_cache;
3423 && memcmp(
RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
3429static st_index_t reg_hash(
VALUE re);
3441rb_reg_hash(
VALUE re)
3443 st_index_t hashval = reg_hash(re);
3476 if (re1 == re2)
return Qtrue;
3478 rb_reg_check(re1); rb_reg_check(re2);
3498match_hash(
VALUE match)
3505 hashval =
rb_hash_uint(hashval, reg_hash(match_regexp(match)));
3528 if (match1 == match2)
return Qtrue;
3532 if (!rb_reg_equal(match_regexp(match1), match_regexp(match2)))
return Qfalse;
3535 if (regs1->num_regs != regs2->num_regs)
return Qfalse;
3536 if (memcmp(regs1->beg, regs2->beg, regs1->num_regs *
sizeof(*regs1->beg)))
return Qfalse;
3537 if (memcmp(regs1->end, regs2->end, regs1->num_regs *
sizeof(*regs1->end)))
return Qfalse;
3542reg_operand(
VALUE s,
int check)
3564 *strp = str = reg_operand(str, TRUE);
3575 return rb_reg_search_set_match(re, str, pos, 0, 1, set_match);
3637 long pos = reg_match_pos(re, &str, 0, NULL);
3638 if (pos < 0)
return Qnil;
3668 str = reg_operand(str, FALSE);
3674 return RBOOL(start >= 0);
3751rb_reg_match_m(
int argc,
VALUE *argv,
VALUE re)
3756 if (
rb_scan_args(argc, argv,
"11", &str, &initpos) == 2) {
3763 pos = reg_match_pos(re, &str, pos, &result);
3792rb_reg_match_m_p(
int argc,
VALUE *argv,
VALUE re)
3795 return rb_reg_match_p(re, argv[0], pos);
3806 if (pos < 0)
return Qfalse;
3812 pos = beg - RSTRING_PTR(str);
3818 .range = RSTRING_LEN(str),
3831str_to_option(
VALUE str)
3837 if (
NIL_P(str))
return -1;
3839 for (
long i = 0; i <
len; ++i) {
3840 int f = char_to_option(ptr[i]);
3842 rb_raise(rb_eArgError,
"unknown regexp option: %"PRIsVALUE, str);
3850set_timeout(rb_hrtime_t *hrt,
VALUE timeout)
3852 double timeout_d =
NIL_P(timeout) ? 0.0 :
NUM2DBL(timeout);
3853 if (!
NIL_P(timeout) && timeout_d <= 0) {
3854 rb_raise(rb_eArgError,
"invalid timeout: %"PRIsVALUE, timeout);
3856 double2hrtime(hrt, timeout_d);
3865 rb_reg_initialize_check(copy);
3866 if ((r = onig_reg_copy(&re,
RREGEXP_PTR(orig))) != 0) {
3873 rb_enc_copy(copy, orig);
3888void rb_warn_deprecated_to_remove(
const char *removal,
const char *fmt,
const char *suggest, ...);
3945rb_reg_initialize_m(
int argc,
VALUE *argv,
VALUE self)
3948 VALUE re = reg_extract_args(argc, argv, &args);
3957 set_timeout(&
RREGEXP_PTR(self)->timelimit, args.timeout);
3972 args->timeout =
Qnil;
3973 if (!
NIL_P(kwargs)) {
3974 static ID keywords[1];
3995 else if ((f = str_to_option(opts)) >= 0) flags = f;
3996 else if (rb_bool_expected(opts,
"ignorecase", FALSE))
3997 flags = ONIG_OPTION_IGNORECASE;
4003 args->flags = flags;
4010 if (enc && rb_enc_get(str) != enc)
4011 rb_reg_init_str_enc(self, str, enc, flags);
4013 rb_reg_init_str(self, str, flags);
4026 s = RSTRING_PTR(str);
4027 send = s + RSTRING_LEN(str);
4029 c = rb_enc_ascget(s, send, &clen, enc);
4031 s += mbclen(s, send, enc);
4035 case '[':
case ']':
case '{':
case '}':
4036 case '(':
case ')':
case '|':
case '-':
4037 case '*':
case '.':
case '\\':
4038 case '?':
case '+':
case '^':
case '$':
4040 case '\t':
case '\f':
case '\v':
case '\n':
case '\r':
4047 rb_enc_associate(tmp, rb_usascii_encoding());
4054 rb_enc_associate(tmp, rb_usascii_encoding());
4057 rb_enc_copy(tmp, str);
4059 t = RSTRING_PTR(tmp);
4061 const char *p = RSTRING_PTR(str);
4062 memcpy(t, p, s - p);
4066 c = rb_enc_ascget(s, send, &clen, enc);
4068 int n = mbclen(s, send, enc);
4076 case '[':
case ']':
case '{':
case '}':
4077 case '(':
case ')':
case '|':
case '-':
4078 case '*':
case '.':
case '\\':
4079 case '?':
case '+':
case '^':
case '$':
4081 t += rb_enc_mbcput(
'\\', t, enc);
4084 t += rb_enc_mbcput(
'\\', t, enc);
4085 t += rb_enc_mbcput(
' ', t, enc);
4088 t += rb_enc_mbcput(
'\\', t, enc);
4089 t += rb_enc_mbcput(
't', t, enc);
4092 t += rb_enc_mbcput(
'\\', t, enc);
4093 t += rb_enc_mbcput(
'n', t, enc);
4096 t += rb_enc_mbcput(
'\\', t, enc);
4097 t += rb_enc_mbcput(
'r', t, enc);
4100 t += rb_enc_mbcput(
'\\', t, enc);
4101 t += rb_enc_mbcput(
'f', t, enc);
4104 t += rb_enc_mbcput(
'\\', t, enc);
4105 t += rb_enc_mbcput(
'v', t, enc);
4108 t += rb_enc_mbcput(c, t, enc);
4110 rb_str_resize(tmp, t - RSTRING_PTR(tmp));
4143 options =
RREGEXP_PTR(re)->options & ARG_REG_OPTION_MASK;
4144 if (
RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
4145 if (
RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE;
4150rb_check_regexp_type(
VALUE re)
4176 return rb_check_regexp_type(re);
4189 else if (argc == 1) {
4190 VALUE arg = rb_ary_entry(args0, 0);
4191 VALUE re = rb_check_regexp_type(arg);
4196 quoted = rb_reg_s_quote(
Qnil, arg);
4202 VALUE source = rb_str_buf_new(0);
4205 int has_asciionly = 0;
4209 for (i = 0; i < argc; i++) {
4211 VALUE e = rb_ary_entry(args0, i);
4216 v = rb_check_regexp_type(e);
4219 if (!rb_enc_asciicompat(enc)) {
4220 if (!has_ascii_incompat)
4221 has_ascii_incompat = enc;
4222 else if (has_ascii_incompat != enc)
4223 rb_raise(rb_eArgError,
"incompatible encodings: %s and %s",
4224 rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
4226 else if (rb_reg_fixed_encoding_p(v)) {
4227 if (!has_ascii_compat_fixed)
4228 has_ascii_compat_fixed = enc;
4229 else if (has_ascii_compat_fixed != enc)
4230 rb_raise(rb_eArgError,
"incompatible encodings: %s and %s",
4231 rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
4236 v = rb_reg_str_with_term(v, -1);
4241 enc = rb_enc_get(e);
4242 if (!rb_enc_asciicompat(enc)) {
4243 if (!has_ascii_incompat)
4244 has_ascii_incompat = enc;
4245 else if (has_ascii_incompat != enc)
4246 rb_raise(rb_eArgError,
"incompatible encodings: %s and %s",
4247 rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
4253 if (!has_ascii_compat_fixed)
4254 has_ascii_compat_fixed = enc;
4255 else if (has_ascii_compat_fixed != enc)
4256 rb_raise(rb_eArgError,
"incompatible encodings: %s and %s",
4257 rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
4259 v = rb_reg_s_quote(
Qnil, e);
4261 if (has_ascii_incompat) {
4262 if (has_asciionly) {
4263 rb_raise(rb_eArgError,
"ASCII incompatible encoding: %s",
4264 rb_enc_name(has_ascii_incompat));
4266 if (has_ascii_compat_fixed) {
4267 rb_raise(rb_eArgError,
"incompatible encodings: %s and %s",
4268 rb_enc_name(has_ascii_incompat), rb_enc_name(has_ascii_compat_fixed));
4273 rb_enc_copy(source, v);
4278 if (has_ascii_incompat) {
4279 result_enc = has_ascii_incompat;
4281 else if (has_ascii_compat_fixed) {
4282 result_enc = has_ascii_compat_fixed;
4285 result_enc = rb_ascii8bit_encoding();
4288 rb_enc_associate(source, result_enc);
4332 !
NIL_P(v = rb_check_array_type(rb_ary_entry(args, 0)))) {
4333 return rb_reg_s_union(self, v);
4335 return rb_reg_s_union(self, args);
4360rb_reg_s_linear_time_p(
int argc,
VALUE *argv,
VALUE self)
4363 VALUE re = reg_extract_args(argc, argv, &args);
4366 re =
reg_init_args(rb_reg_alloc(), args.str, args.enc, args.flags);
4369 return RBOOL(onig_check_linear_time(
RREGEXP_PTR(re)));
4378 return reg_copy(copy, re);
4389 int acompat = rb_enc_asciicompat(str_enc);
4391#define ASCGET(s,e,cl) (acompat ? (*(cl)=1,ISASCII((s)[0])?(s)[0]:-1) : rb_enc_ascget((s), (e), (cl), str_enc))
4398 int c = ASCGET(s, e, &clen);
4402 s += mbclen(s, e, str_enc);
4408 if (c !=
'\\' || s == e)
continue;
4411 val = rb_str_buf_new(ss-p);
4413 rb_enc_str_buf_cat(val, p, ss-p, str_enc);
4415 c = ASCGET(s, e, &clen);
4417 s += mbclen(s, e, str_enc);
4418 rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
4426 case '1':
case '2':
case '3':
case '4':
4427 case '5':
case '6':
case '7':
case '8':
case '9':
4428 if (!
NIL_P(regexp) && onig_noname_group_capture_is_active(
RREGEXP_PTR(regexp))) {
4437 if (s < e && ASCGET(s, e, &clen) ==
'<') {
4438 char *name, *name_end;
4440 name_end = name = s + clen;
4441 while (name_end < e) {
4442 c = ASCGET(name_end, e, &clen);
4443 if (c ==
'>')
break;
4444 name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
4447 VALUE n = rb_str_subseq(str, (
long)(name - RSTRING_PTR(str)),
4448 (
long)(name_end - name));
4449 if ((no = NAME_TO_NUMBER(regs, regexp, n, name, name_end)) < 1) {
4450 name_to_backref_error(n);
4452 p = s = name_end + clen;
4460 rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
4469 rb_enc_str_buf_cat(val, RSTRING_PTR(src), BEG(0), src_enc);
4473 rb_enc_str_buf_cat(val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0), src_enc);
4477 no = regs->num_regs-1;
4478 while (BEG(no) == -1 && no > 0) no--;
4479 if (no == 0)
continue;
4483 rb_enc_str_buf_cat(val, s-clen, clen, str_enc);
4487 rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
4492 if (no >= regs->num_regs)
continue;
4493 if (BEG(no) == -1)
continue;
4494 rb_enc_str_buf_cat(val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no), src_enc);
4498 if (!val)
return str;
4500 rb_enc_str_buf_cat(val, p, e-p, str_enc);
4507ignorecase_getter(
ID _x,
VALUE *_y)
4530get_LAST_MATCH_INFO(
ID _x,
VALUE *_y)
4532 return match_getter();
4583rb_reg_s_last_match(
int argc,
VALUE *argv,
VALUE _)
4589 n = match_backref_number(match, argv[0]);
4592 return match_getter();
4596re_warn(
const char *s)
4602rb_hrtime_t rb_reg_match_time_limit = 0;
4606rb_reg_check_timeout(
regex_t *reg,
void *end_time_)
4608 rb_hrtime_t *end_time = (rb_hrtime_t *)end_time_;
4610 if (*end_time == 0) {
4614 rb_hrtime_t timelimit = reg->timelimit;
4618 timelimit = rb_reg_match_time_limit;
4622 *end_time = rb_hrtime_add(timelimit, rb_hrtime_now());
4626 *end_time = RB_HRTIME_MAX;
4630 if (*end_time < rb_hrtime_now()) {
4632 rb_raise(rb_eRegexpTimeoutError,
"regexp match timeout");
4646rb_reg_s_timeout_get(
VALUE dummy)
4648 double d = hrtime2double(rb_reg_match_time_limit);
4649 if (d == 0.0)
return Qnil;
4667rb_reg_s_timeout_set(
VALUE dummy,
VALUE timeout)
4669 rb_ractor_ensure_main_ractor(
"can not access Regexp.timeout from non-main Ractors");
4671 set_timeout(&rb_reg_match_time_limit, timeout);
4692rb_reg_timeout_get(
VALUE re)
4695 double d = hrtime2double(
RREGEXP_PTR(re)->timelimit);
4696 if (d == 0.0)
return Qnil;
4723 onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
4724 onig_set_warn_func(re_warn);
4725 onig_set_verb_warn_func(re_warn);
4733 rb_gvar_ractor_local(
"$~");
4734 rb_gvar_ractor_local(
"$&");
4735 rb_gvar_ractor_local(
"$`");
4736 rb_gvar_ractor_local(
"$'");
4737 rb_gvar_ractor_local(
"$+");
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define rb_str_new2
Old name of rb_str_new_cstr.
#define NEWOBJ_OF
Old name of RB_NEWOBJ_OF.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define REALLOC_N
Old name of RB_REALLOC_N.
#define OBJ_INIT_COPY(obj, orig)
Old name of RB_OBJ_INIT_COPY.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define rb_str_buf_new2
Old name of rb_str_buf_new_cstr.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define FIX2INT
Old name of RB_FIX2INT.
#define NUM2DBL
Old name of rb_num2dbl.
#define rb_str_new3
Old name of rb_str_new_shared.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define rb_exc_new3
Old name of rb_exc_new_str.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define NUM2INT
Old name of RB_NUM2INT.
#define INT2NUM
Old name of RB_INT2NUM.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
#define DBL2NUM
Old name of rb_float_new.
#define T_MATCH
Old name of RUBY_T_MATCH.
#define FL_TEST
Old name of RB_FL_TEST.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define scan_oct(s, l, e)
Old name of ruby_scan_oct.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define rb_str_new4
Old name of rb_str_new_frozen.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
VALUE rb_eStandardError
StandardError exception.
VALUE rb_eRegexpError
RegexpError exception.
#define ruby_verbose
This variable controls whether the interpreter is in debug mode.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
void rb_warn(const char *fmt,...)
Identical to rb_warning(), except it reports unless $VERBOSE is nil.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_check_convert_type(VALUE val, int type, const char *name, const char *mid)
Identical to rb_convert_type(), except it returns RUBY_Qnil instead of raising exceptions,...
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_class_new_instance(int argc, const VALUE *argv, VALUE klass)
Allocates, then initialises an instance of the given class.
VALUE rb_cMatch
MatchData class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_cRegexp
Regexp class.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
static int rb_enc_mbminlen(rb_encoding *enc)
Queries the minimum number of bytes that the passed encoding needs to represent a character.
VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts)
Identical to rb_reg_new(), except it additionally takes an encoding.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
#define RGENGC_WB_PROTECTED_MATCH
This is a compile-time flag to enable/disable write barrier for struct RMatch.
#define RGENGC_WB_PROTECTED_REGEXP
This is a compile-time flag to enable/disable write barrier for struct RRegexp.
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
#define rb_check_frozen
Just another name of rb_check_frozen.
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_lastline_get(void)
Queries the last line, or the $_.
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_last_match(VALUE md)
This just returns the argument, stringified.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_reg_match_post(VALUE md)
The portion of the original string after the given match.
VALUE rb_reg_nth_defined(int n, VALUE md)
Identical to rb_reg_nth_match(), except it just returns Boolean.
VALUE rb_reg_match_pre(VALUE md)
The portion of the original string before the given match.
VALUE rb_reg_new_str(VALUE src, int opts)
Identical to rb_reg_new(), except it takes the expression in Ruby's string instead of C's.
VALUE rb_reg_match_last(VALUE md)
The portion of the original string that captured at the very last.
VALUE rb_reg_match2(VALUE re)
Identical to rb_reg_match(), except it matches against rb_lastline_get() (or, the $_).
VALUE rb_reg_new(const char *src, long len, int opts)
Creates a new Regular expression.
#define rb_hash_uint(h, i)
Just another name of st_hash_uint.
#define rb_hash_end(h)
Just another name of st_hash_end.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
#define rb_str_buf_cat
Just another name of rb_str_cat.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
st_index_t rb_hash_start(st_index_t i)
Starts a series of hashing.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
VALUE rb_class_path(VALUE mod)
Identical to rb_mod_name(), except it returns #<Class: ...> style inspection for anonymous modules.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
void rb_define_const(VALUE klass, const char *name, VALUE val)
Defines a Ruby level constant under a namespace.
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
regex_t * rb_reg_prepare_re(VALUE re, VALUE str)
Exercises various checks and preprocesses so that the given regular expression can be applied to the ...
long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir)
Tell us if this is a wrong idea, but it seems this function has no usage at all.
OnigPosition rb_reg_onig_match(VALUE re, VALUE str, OnigPosition(*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args), void *args, struct re_registers *regs)
Runs a regular expression match using function match.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_quote(VALUE str)
Escapes any characters that would have special meaning in a regular expression.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
int rb_reg_region_copy(struct re_registers *dst, const struct re_registers *src)
Duplicates a match data.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_virtual_variable(const char *q, type *w, void_type *e)
Define a function-backended global variable.
#define RARRAY_LEN
Just another name of rb_array_len.
#define RARRAY_AREF(a, i)
#define RBASIC(obj)
Convenient casting macro.
#define RMATCH(obj)
Convenient casting macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
#define RREGEXP(obj)
Convenient casting macro.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define RREGEXP_PTR(obj)
Convenient accessor macro.
static long RREGEXP_SRC_LEN(VALUE rexp)
Convenient getter function.
static char * RREGEXP_SRC_PTR(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
Regular expression execution context.
VALUE regexp
The expression of this match.
VALUE str
The target string that the match was made against.
Ruby's regular expression.
struct RBasic basic
Basic part, including flags and class.
const VALUE src
Source code of this expression.
unsigned long usecnt
Reference count.
struct re_pattern_buffer * ptr
The pattern buffer.
struct rmatch_offset * char_offset
Capture group offsets, in C array.
int char_offset_num_allocated
Number of rmatch_offset that ::rmatch::char_offset holds.
struct re_registers regs
"Registers" of a match.
Represents the region of a capture group.
long beg
Beginning of a group.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
#define SIZEOF_VALUE
Identical to sizeof(VALUE), except it is a macro that can also be used inside of preprocessor directi...
uintptr_t VALUE
Type that represents a Ruby object.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.