Ruby 2.7.7p221 (2022-11-24 revision 168ec2b1e5ad0e4688e963d9de019557c78feed9)
strscan.c
Go to the documentation of this file.
1/*
2 $Id$
3
4 Copyright (c) 1999-2006 Minero Aoki
5
6 This program is free software.
7 You can distribute/modify this program under the terms of
8 the Ruby License. For details, see the file COPYING.
9*/
10
11#include "ruby/ruby.h"
12#include "ruby/re.h"
13#include "ruby/encoding.h"
14
15#ifdef RUBY_EXTCONF_H
16# include RUBY_EXTCONF_H
17#endif
18
19#ifdef HAVE_ONIG_REGION_MEMSIZE
20extern size_t onig_region_memsize(const struct re_registers *regs);
21#endif
22
23#include <stdbool.h>
24
25#define STRSCAN_VERSION "1.0.3"
26
27/* =======================================================================
28 Data Type Definitions
29 ======================================================================= */
30
31static VALUE StringScanner;
32static VALUE ScanError;
33static ID id_byteslice;
34
36{
37 /* multi-purpose flags */
38 unsigned long flags;
39#define FLAG_MATCHED (1 << 0)
40
41 /* the string to scan */
43
44 /* scan pointers */
45 long prev; /* legal only when MATCHED_P(s) */
46 long curr; /* always legal */
47
48 /* the regexp register; legal only when MATCHED_P(s) */
50
51 /* regexp used for last scan */
53
54 /* anchor mode */
56};
57
58#define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
59#define MATCHED(s) (s)->flags |= FLAG_MATCHED
60#define CLEAR_MATCH_STATUS(s) (s)->flags &= ~FLAG_MATCHED
61
62#define S_PBEG(s) (RSTRING_PTR((s)->str))
63#define S_LEN(s) (RSTRING_LEN((s)->str))
64#define S_PEND(s) (S_PBEG(s) + S_LEN(s))
65#define CURPTR(s) (S_PBEG(s) + (s)->curr)
66#define S_RESTLEN(s) (S_LEN(s) - (s)->curr)
67
68#define EOS_P(s) ((s)->curr >= RSTRING_LEN(p->str))
69
70#define GET_SCANNER(obj,var) do {\
71 (var) = check_strscan(obj);\
72 if (NIL_P((var)->str)) rb_raise(rb_eArgError, "uninitialized StringScanner object");\
73} while (0)
74
75/* =======================================================================
76 Function Prototypes
77 ======================================================================= */
78
79static inline long minl _((const long n, const long x));
80static VALUE extract_range _((struct strscanner *p, long beg_i, long end_i));
81static VALUE extract_beg_len _((struct strscanner *p, long beg_i, long len));
82
83static struct strscanner *check_strscan _((VALUE obj));
84static void strscan_mark _((void *p));
85static void strscan_free _((void *p));
86static size_t strscan_memsize _((const void *p));
87static VALUE strscan_s_allocate _((VALUE klass));
88static VALUE strscan_initialize _((int argc, VALUE *argv, VALUE self));
89static VALUE strscan_init_copy _((VALUE vself, VALUE vorig));
90
91static VALUE strscan_s_mustc _((VALUE self));
92static VALUE strscan_terminate _((VALUE self));
93static VALUE strscan_clear _((VALUE self));
94static VALUE strscan_get_string _((VALUE self));
95static VALUE strscan_set_string _((VALUE self, VALUE str));
96static VALUE strscan_concat _((VALUE self, VALUE str));
97static VALUE strscan_get_pos _((VALUE self));
98static VALUE strscan_set_pos _((VALUE self, VALUE pos));
99static VALUE strscan_do_scan _((VALUE self, VALUE regex,
100 int succptr, int getstr, int headonly));
101static VALUE strscan_scan _((VALUE self, VALUE re));
102static VALUE strscan_match_p _((VALUE self, VALUE re));
103static VALUE strscan_skip _((VALUE self, VALUE re));
104static VALUE strscan_check _((VALUE self, VALUE re));
105static VALUE strscan_scan_full _((VALUE self, VALUE re,
106 VALUE succp, VALUE getp));
107static VALUE strscan_scan_until _((VALUE self, VALUE re));
108static VALUE strscan_skip_until _((VALUE self, VALUE re));
109static VALUE strscan_check_until _((VALUE self, VALUE re));
110static VALUE strscan_search_full _((VALUE self, VALUE re,
111 VALUE succp, VALUE getp));
112static void adjust_registers_to_matched _((struct strscanner *p));
113static VALUE strscan_getch _((VALUE self));
114static VALUE strscan_get_byte _((VALUE self));
115static VALUE strscan_getbyte _((VALUE self));
116static VALUE strscan_peek _((VALUE self, VALUE len));
117static VALUE strscan_peep _((VALUE self, VALUE len));
118static VALUE strscan_unscan _((VALUE self));
119static VALUE strscan_bol_p _((VALUE self));
120static VALUE strscan_eos_p _((VALUE self));
121static VALUE strscan_empty_p _((VALUE self));
122static VALUE strscan_rest_p _((VALUE self));
123static VALUE strscan_matched_p _((VALUE self));
124static VALUE strscan_matched _((VALUE self));
125static VALUE strscan_matched_size _((VALUE self));
126static VALUE strscan_aref _((VALUE self, VALUE idx));
127static VALUE strscan_pre_match _((VALUE self));
128static VALUE strscan_post_match _((VALUE self));
129static VALUE strscan_rest _((VALUE self));
130static VALUE strscan_rest_size _((VALUE self));
131
132static VALUE strscan_inspect _((VALUE self));
133static VALUE inspect1 _((struct strscanner *p));
134static VALUE inspect2 _((struct strscanner *p));
135
136/* =======================================================================
137 Utils
138 ======================================================================= */
139
140static VALUE
141str_new(struct strscanner *p, const char *ptr, long len)
142{
144 rb_enc_copy(str, p->str);
145 return str;
146}
147
148static inline long
149minl(const long x, const long y)
150{
151 return (x < y) ? x : y;
152}
153
154static VALUE
155extract_range(struct strscanner *p, long beg_i, long end_i)
156{
157 if (beg_i > S_LEN(p)) return Qnil;
158 end_i = minl(end_i, S_LEN(p));
159 return str_new(p, S_PBEG(p) + beg_i, end_i - beg_i);
160}
161
162static VALUE
163extract_beg_len(struct strscanner *p, long beg_i, long len)
164{
165 if (beg_i > S_LEN(p)) return Qnil;
166 len = minl(len, S_LEN(p) - beg_i);
167 return str_new(p, S_PBEG(p) + beg_i, len);
168}
169
170/* =======================================================================
171 Constructor
172 ======================================================================= */
173
174static void
175strscan_mark(void *ptr)
176{
177 struct strscanner *p = ptr;
178 rb_gc_mark(p->str);
179}
180
181static void
182strscan_free(void *ptr)
183{
184 struct strscanner *p = ptr;
185 onig_region_free(&(p->regs), 0);
186 ruby_xfree(p);
187}
188
189static size_t
190strscan_memsize(const void *ptr)
191{
192 const struct strscanner *p = ptr;
193 size_t size = sizeof(*p) - sizeof(p->regs);
194#ifdef HAVE_ONIG_REGION_MEMSIZE
196#endif
197 return size;
198}
199
200static const rb_data_type_t strscanner_type = {
201 "StringScanner",
202 {strscan_mark, strscan_free, strscan_memsize},
204};
205
206static VALUE
207strscan_s_allocate(VALUE klass)
208{
209 struct strscanner *p;
210 VALUE obj = TypedData_Make_Struct(klass, struct strscanner, &strscanner_type, p);
211
213 onig_region_init(&(p->regs));
214 p->str = Qnil;
215 return obj;
216}
217
218/*
219 * call-seq:
220 * StringScanner.new(string, fixed_anchor: false)
221 * StringScanner.new(string, dup = false)
222 *
223 * Creates a new StringScanner object to scan over the given +string+.
224 *
225 * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
226 * the string. Otherwise, +\A+ always matches the current position.
227 *
228 * +dup+ argument is obsolete and not used now.
229 */
230static VALUE
231strscan_initialize(int argc, VALUE *argv, VALUE self)
232{
233 struct strscanner *p;
234 VALUE str, options;
235
236 p = check_strscan(self);
237 rb_scan_args(argc, argv, "11", &str, &options);
238 options = rb_check_hash_type(options);
239 if (!NIL_P(options)) {
240 VALUE fixed_anchor;
241 ID keyword_ids[1];
242 keyword_ids[0] = rb_intern("fixed_anchor");
243 rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
244 if (fixed_anchor == Qundef) {
245 p->fixed_anchor_p = false;
246 }
247 else {
248 p->fixed_anchor_p = RTEST(fixed_anchor);
249 }
250 }
251 else {
252 p->fixed_anchor_p = false;
253 }
255 p->str = str;
256
257 return self;
258}
259
260static struct strscanner *
261check_strscan(VALUE obj)
262{
263 return rb_check_typeddata(obj, &strscanner_type);
264}
265
266/*
267 * call-seq:
268 * dup
269 * clone
270 *
271 * Duplicates a StringScanner object.
272 */
273static VALUE
274strscan_init_copy(VALUE vself, VALUE vorig)
275{
276 struct strscanner *self, *orig;
277
278 self = check_strscan(vself);
279 orig = check_strscan(vorig);
280 if (self != orig) {
281 self->flags = orig->flags;
282 self->str = orig->str;
283 self->prev = orig->prev;
284 self->curr = orig->curr;
285 if (rb_reg_region_copy(&self->regs, &orig->regs))
286 rb_memerror();
287 RB_GC_GUARD(vorig);
288 }
289
290 return vself;
291}
292
293/* =======================================================================
294 Instance Methods
295 ======================================================================= */
296
297/*
298 * call-seq: StringScanner.must_C_version
299 *
300 * This method is defined for backward compatibility.
301 */
302static VALUE
303strscan_s_mustc(VALUE self)
304{
305 return self;
306}
307
308/*
309 * Reset the scan pointer (index 0) and clear matching data.
310 */
311static VALUE
312strscan_reset(VALUE self)
313{
314 struct strscanner *p;
315
316 GET_SCANNER(self, p);
317 p->curr = 0;
319 return self;
320}
321
322/*
323 * call-seq:
324 * terminate
325 * clear
326 *
327 * Sets the scan pointer to the end of the string and clear matching data.
328 */
329static VALUE
330strscan_terminate(VALUE self)
331{
332 struct strscanner *p;
333
334 GET_SCANNER(self, p);
335 p->curr = S_LEN(p);
337 return self;
338}
339
340/*
341 * Equivalent to #terminate.
342 * This method is obsolete; use #terminate instead.
343 */
344static VALUE
345strscan_clear(VALUE self)
346{
347 rb_warning("StringScanner#clear is obsolete; use #terminate instead");
348 return strscan_terminate(self);
349}
350
351/*
352 * Returns the string being scanned.
353 */
354static VALUE
355strscan_get_string(VALUE self)
356{
357 struct strscanner *p;
358
359 GET_SCANNER(self, p);
360 return p->str;
361}
362
363/*
364 * call-seq: string=(str)
365 *
366 * Changes the string being scanned to +str+ and resets the scanner.
367 * Returns +str+.
368 */
369static VALUE
370strscan_set_string(VALUE self, VALUE str)
371{
372 struct strscanner *p = check_strscan(self);
373
375 p->str = str;
376 p->curr = 0;
378 return str;
379}
380
381/*
382 * call-seq:
383 * concat(str)
384 * <<(str)
385 *
386 * Appends +str+ to the string being scanned.
387 * This method does not affect scan pointer.
388 *
389 * s = StringScanner.new("Fri Dec 12 1975 14:39")
390 * s.scan(/Fri /)
391 * s << " +1000 GMT"
392 * s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT"
393 * s.scan(/Dec/) # -> "Dec"
394 */
395static VALUE
396strscan_concat(VALUE self, VALUE str)
397{
398 struct strscanner *p;
399
400 GET_SCANNER(self, p);
402 rb_str_append(p->str, str);
403 return self;
404}
405
406/*
407 * Returns the byte position of the scan pointer. In the 'reset' position, this
408 * value is zero. In the 'terminated' position (i.e. the string is exhausted),
409 * this value is the bytesize of the string.
410 *
411 * In short, it's a 0-based index into bytes of the string.
412 *
413 * s = StringScanner.new('test string')
414 * s.pos # -> 0
415 * s.scan_until /str/ # -> "test str"
416 * s.pos # -> 8
417 * s.terminate # -> #<StringScanner fin>
418 * s.pos # -> 11
419 */
420static VALUE
421strscan_get_pos(VALUE self)
422{
423 struct strscanner *p;
424
425 GET_SCANNER(self, p);
426 return INT2FIX(p->curr);
427}
428
429/*
430 * Returns the character position of the scan pointer. In the 'reset' position, this
431 * value is zero. In the 'terminated' position (i.e. the string is exhausted),
432 * this value is the size of the string.
433 *
434 * In short, it's a 0-based index into the string.
435 *
436 * s = StringScanner.new("abcädeföghi")
437 * s.charpos # -> 0
438 * s.scan_until(/ä/) # -> "abcä"
439 * s.pos # -> 5
440 * s.charpos # -> 4
441 */
442static VALUE
443strscan_get_charpos(VALUE self)
444{
445 struct strscanner *p;
446 VALUE substr;
447
448 GET_SCANNER(self, p);
449
450 substr = rb_funcall(p->str, id_byteslice, 2, INT2FIX(0), INT2NUM(p->curr));
451
452 return rb_str_length(substr);
453}
454
455/*
456 * call-seq: pos=(n)
457 *
458 * Sets the byte position of the scan pointer.
459 *
460 * s = StringScanner.new('test string')
461 * s.pos = 7 # -> 7
462 * s.rest # -> "ring"
463 */
464static VALUE
465strscan_set_pos(VALUE self, VALUE v)
466{
467 struct strscanner *p;
468 long i;
469
470 GET_SCANNER(self, p);
471 i = NUM2INT(v);
472 if (i < 0) i += S_LEN(p);
473 if (i < 0) rb_raise(rb_eRangeError, "index out of range");
474 if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range");
475 p->curr = i;
476 return INT2NUM(i);
477}
478
479static inline UChar *
480match_target(struct strscanner *p)
481{
482 if (p->fixed_anchor_p) {
483 return (UChar *)S_PBEG(p);
484 }
485 else
486 {
487 return (UChar *)CURPTR(p);
488 }
489}
490
491static inline void
492set_registers(struct strscanner *p, size_t length)
493{
494 const int at = 0;
495 OnigRegion *regs = &(p->regs);
497 if (onig_region_set(regs, at, 0, 0)) return;
498 if (p->fixed_anchor_p) {
499 regs->beg[at] = p->curr;
500 regs->end[at] = p->curr + length;
501 }
502 else
503 {
504 regs->end[at] = length;
505 }
506}
507
508static inline void
509succ(struct strscanner *p)
510{
511 if (p->fixed_anchor_p) {
512 p->curr = p->regs.end[0];
513 }
514 else
515 {
516 p->curr += p->regs.end[0];
517 }
518}
519
520static inline long
521last_match_length(struct strscanner *p)
522{
523 if (p->fixed_anchor_p) {
524 return p->regs.end[0] - p->prev;
525 }
526 else
527 {
528 return p->regs.end[0];
529 }
530}
531
532static inline long
533adjust_register_position(struct strscanner *p, long position)
534{
535 if (p->fixed_anchor_p) {
536 return position;
537 }
538 else {
539 return p->prev + position;
540 }
541}
542
543static VALUE
544strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
545{
546 struct strscanner *p;
547
548 if (headonly) {
549 if (!RB_TYPE_P(pattern, T_REGEXP)) {
550 StringValue(pattern);
551 }
552 }
553 else {
554 Check_Type(pattern, T_REGEXP);
555 }
556 GET_SCANNER(self, p);
557
559 if (S_RESTLEN(p) < 0) {
560 return Qnil;
561 }
562
563 if (RB_TYPE_P(pattern, T_REGEXP)) {
565 regex_t *re;
566 long ret;
567 int tmpreg;
568
569 p->regex = pattern;
570 re = rb_reg_prepare_re(pattern, p->str);
571 tmpreg = re != RREGEXP_PTR(pattern);
572 if (!tmpreg) RREGEXP(pattern)->usecnt++;
573
574 if (headonly) {
575 ret = onig_match(re,
576 match_target(p),
577 (UChar* )(CURPTR(p) + S_RESTLEN(p)),
578 (UChar* )CURPTR(p),
579 &(p->regs),
581 }
582 else {
583 ret = onig_search(re,
584 match_target(p),
585 (UChar* )(CURPTR(p) + S_RESTLEN(p)),
586 (UChar* )CURPTR(p),
587 (UChar* )(CURPTR(p) + S_RESTLEN(p)),
588 &(p->regs),
590 }
591 if (!tmpreg) RREGEXP(pattern)->usecnt--;
592 if (tmpreg) {
593 if (RREGEXP(pattern)->usecnt) {
594 onig_free(re);
595 }
596 else {
597 onig_free(RREGEXP_PTR(pattern));
598 RREGEXP_PTR(pattern) = re;
599 }
600 }
601
602 if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
603 if (ret < 0) {
604 /* not matched */
605 return Qnil;
606 }
607 }
608 else {
609 rb_enc_check(p->str, pattern);
610 if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
611 return Qnil;
612 }
613 if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
614 return Qnil;
615 }
616 set_registers(p, RSTRING_LEN(pattern));
617 }
618
619 MATCHED(p);
620 p->prev = p->curr;
621
622 if (succptr) {
623 succ(p);
624 }
625 {
626 const long length = last_match_length(p);
627 if (getstr) {
628 return extract_beg_len(p, p->prev, length);
629 }
630 else {
631 return INT2FIX(length);
632 }
633 }
634}
635
636/*
637 * call-seq: scan(pattern) => String
638 *
639 * Tries to match with +pattern+ at the current position. If there's a match,
640 * the scanner advances the "scan pointer" and returns the matched string.
641 * Otherwise, the scanner returns +nil+.
642 *
643 * s = StringScanner.new('test string')
644 * p s.scan(/\w+/) # -> "test"
645 * p s.scan(/\w+/) # -> nil
646 * p s.scan(/\s+/) # -> " "
647 * p s.scan("str") # -> "str"
648 * p s.scan(/\w+/) # -> "ing"
649 * p s.scan(/./) # -> nil
650 *
651 */
652static VALUE
653strscan_scan(VALUE self, VALUE re)
654{
655 return strscan_do_scan(self, re, 1, 1, 1);
656}
657
658/*
659 * call-seq: match?(pattern)
660 *
661 * Tests whether the given +pattern+ is matched from the current scan pointer.
662 * Returns the length of the match, or +nil+. The scan pointer is not advanced.
663 *
664 * s = StringScanner.new('test string')
665 * p s.match?(/\w+/) # -> 4
666 * p s.match?(/\w+/) # -> 4
667 * p s.match?("test") # -> 4
668 * p s.match?(/\s+/) # -> nil
669 */
670static VALUE
671strscan_match_p(VALUE self, VALUE re)
672{
673 return strscan_do_scan(self, re, 0, 0, 1);
674}
675
676/*
677 * call-seq: skip(pattern)
678 *
679 * Attempts to skip over the given +pattern+ beginning with the scan pointer.
680 * If it matches, the scan pointer is advanced to the end of the match, and the
681 * length of the match is returned. Otherwise, +nil+ is returned.
682 *
683 * It's similar to #scan, but without returning the matched string.
684 *
685 * s = StringScanner.new('test string')
686 * p s.skip(/\w+/) # -> 4
687 * p s.skip(/\w+/) # -> nil
688 * p s.skip(/\s+/) # -> 1
689 * p s.skip("st") # -> 2
690 * p s.skip(/\w+/) # -> 4
691 * p s.skip(/./) # -> nil
692 *
693 */
694static VALUE
695strscan_skip(VALUE self, VALUE re)
696{
697 return strscan_do_scan(self, re, 1, 0, 1);
698}
699
700/*
701 * call-seq: check(pattern)
702 *
703 * This returns the value that #scan would return, without advancing the scan
704 * pointer. The match register is affected, though.
705 *
706 * s = StringScanner.new("Fri Dec 12 1975 14:39")
707 * s.check /Fri/ # -> "Fri"
708 * s.pos # -> 0
709 * s.matched # -> "Fri"
710 * s.check /12/ # -> nil
711 * s.matched # -> nil
712 *
713 * Mnemonic: it "checks" to see whether a #scan will return a value.
714 */
715static VALUE
716strscan_check(VALUE self, VALUE re)
717{
718 return strscan_do_scan(self, re, 0, 1, 1);
719}
720
721/*
722 * call-seq: scan_full(pattern, advance_pointer_p, return_string_p)
723 *
724 * Tests whether the given +pattern+ is matched from the current scan pointer.
725 * Advances the scan pointer if +advance_pointer_p+ is true.
726 * Returns the matched string if +return_string_p+ is true.
727 * The match register is affected.
728 *
729 * "full" means "#scan with full parameters".
730 */
731static VALUE
732strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
733{
734 return strscan_do_scan(self, re, RTEST(s), RTEST(f), 1);
735}
736
737/*
738 * call-seq: scan_until(pattern)
739 *
740 * Scans the string _until_ the +pattern+ is matched. Returns the substring up
741 * to and including the end of the match, advancing the scan pointer to that
742 * location. If there is no match, +nil+ is returned.
743 *
744 * s = StringScanner.new("Fri Dec 12 1975 14:39")
745 * s.scan_until(/1/) # -> "Fri Dec 1"
746 * s.pre_match # -> "Fri Dec "
747 * s.scan_until(/XYZ/) # -> nil
748 */
749static VALUE
750strscan_scan_until(VALUE self, VALUE re)
751{
752 return strscan_do_scan(self, re, 1, 1, 0);
753}
754
755/*
756 * call-seq: exist?(pattern)
757 *
758 * Looks _ahead_ to see if the +pattern+ exists _anywhere_ in the string,
759 * without advancing the scan pointer. This predicates whether a #scan_until
760 * will return a value.
761 *
762 * s = StringScanner.new('test string')
763 * s.exist? /s/ # -> 3
764 * s.scan /test/ # -> "test"
765 * s.exist? /s/ # -> 2
766 * s.exist? /e/ # -> nil
767 */
768static VALUE
769strscan_exist_p(VALUE self, VALUE re)
770{
771 return strscan_do_scan(self, re, 0, 0, 0);
772}
773
774/*
775 * call-seq: skip_until(pattern)
776 *
777 * Advances the scan pointer until +pattern+ is matched and consumed. Returns
778 * the number of bytes advanced, or +nil+ if no match was found.
779 *
780 * Look ahead to match +pattern+, and advance the scan pointer to the _end_
781 * of the match. Return the number of characters advanced, or +nil+ if the
782 * match was unsuccessful.
783 *
784 * It's similar to #scan_until, but without returning the intervening string.
785 *
786 * s = StringScanner.new("Fri Dec 12 1975 14:39")
787 * s.skip_until /12/ # -> 10
788 * s #
789 */
790static VALUE
791strscan_skip_until(VALUE self, VALUE re)
792{
793 return strscan_do_scan(self, re, 1, 0, 0);
794}
795
796/*
797 * call-seq: check_until(pattern)
798 *
799 * This returns the value that #scan_until would return, without advancing the
800 * scan pointer. The match register is affected, though.
801 *
802 * s = StringScanner.new("Fri Dec 12 1975 14:39")
803 * s.check_until /12/ # -> "Fri Dec 12"
804 * s.pos # -> 0
805 * s.matched # -> 12
806 *
807 * Mnemonic: it "checks" to see whether a #scan_until will return a value.
808 */
809static VALUE
810strscan_check_until(VALUE self, VALUE re)
811{
812 return strscan_do_scan(self, re, 0, 1, 0);
813}
814
815/*
816 * call-seq: search_full(pattern, advance_pointer_p, return_string_p)
817 *
818 * Scans the string _until_ the +pattern+ is matched.
819 * Advances the scan pointer if +advance_pointer_p+, otherwise not.
820 * Returns the matched string if +return_string_p+ is true, otherwise
821 * returns the number of bytes advanced.
822 * This method does affect the match register.
823 */
824static VALUE
825strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f)
826{
827 return strscan_do_scan(self, re, RTEST(s), RTEST(f), 0);
828}
829
830static void
831adjust_registers_to_matched(struct strscanner *p)
832{
833 onig_region_clear(&(p->regs));
834 if (p->fixed_anchor_p) {
835 onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
836 }
837 else {
838 onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
839 }
840}
841
842/*
843 * Scans one character and returns it.
844 * This method is multibyte character sensitive.
845 *
846 * s = StringScanner.new("ab")
847 * s.getch # => "a"
848 * s.getch # => "b"
849 * s.getch # => nil
850 *
851 * $KCODE = 'EUC'
852 * s = StringScanner.new("\244\242")
853 * s.getch # => "\244\242" # Japanese hira-kana "A" in EUC-JP
854 * s.getch # => nil
855 */
856static VALUE
857strscan_getch(VALUE self)
858{
859 struct strscanner *p;
860 long len;
861
862 GET_SCANNER(self, p);
864 if (EOS_P(p))
865 return Qnil;
866
868 len = minl(len, S_RESTLEN(p));
869 p->prev = p->curr;
870 p->curr += len;
871 MATCHED(p);
872 adjust_registers_to_matched(p);
873 return extract_range(p,
874 adjust_register_position(p, p->regs.beg[0]),
875 adjust_register_position(p, p->regs.end[0]));
876}
877
878/*
879 * Scans one byte and returns it.
880 * This method is not multibyte character sensitive.
881 * See also: #getch.
882 *
883 * s = StringScanner.new('ab')
884 * s.get_byte # => "a"
885 * s.get_byte # => "b"
886 * s.get_byte # => nil
887 *
888 * $KCODE = 'EUC'
889 * s = StringScanner.new("\244\242")
890 * s.get_byte # => "\244"
891 * s.get_byte # => "\242"
892 * s.get_byte # => nil
893 */
894static VALUE
895strscan_get_byte(VALUE self)
896{
897 struct strscanner *p;
898
899 GET_SCANNER(self, p);
901 if (EOS_P(p))
902 return Qnil;
903
904 p->prev = p->curr;
905 p->curr++;
906 MATCHED(p);
907 adjust_registers_to_matched(p);
908 return extract_range(p,
909 adjust_register_position(p, p->regs.beg[0]),
910 adjust_register_position(p, p->regs.end[0]));
911}
912
913/*
914 * Equivalent to #get_byte.
915 * This method is obsolete; use #get_byte instead.
916 */
917static VALUE
918strscan_getbyte(VALUE self)
919{
920 rb_warning("StringScanner#getbyte is obsolete; use #get_byte instead");
921 return strscan_get_byte(self);
922}
923
924/*
925 * call-seq: peek(len)
926 *
927 * Extracts a string corresponding to <tt>string[pos,len]</tt>, without
928 * advancing the scan pointer.
929 *
930 * s = StringScanner.new('test string')
931 * s.peek(7) # => "test st"
932 * s.peek(7) # => "test st"
933 *
934 */
935static VALUE
936strscan_peek(VALUE self, VALUE vlen)
937{
938 struct strscanner *p;
939 long len;
940
941 GET_SCANNER(self, p);
942
943 len = NUM2LONG(vlen);
944 if (EOS_P(p))
945 return str_new(p, "", 0);
946
947 len = minl(len, S_RESTLEN(p));
948 return extract_beg_len(p, p->curr, len);
949}
950
951/*
952 * Equivalent to #peek.
953 * This method is obsolete; use #peek instead.
954 */
955static VALUE
956strscan_peep(VALUE self, VALUE vlen)
957{
958 rb_warning("StringScanner#peep is obsolete; use #peek instead");
959 return strscan_peek(self, vlen);
960}
961
962/*
963 * Sets the scan pointer to the previous position. Only one previous position is
964 * remembered, and it changes with each scanning operation.
965 *
966 * s = StringScanner.new('test string')
967 * s.scan(/\w+/) # => "test"
968 * s.unscan
969 * s.scan(/../) # => "te"
970 * s.scan(/\d/) # => nil
971 * s.unscan # ScanError: unscan failed: previous match record not exist
972 */
973static VALUE
974strscan_unscan(VALUE self)
975{
976 struct strscanner *p;
977
978 GET_SCANNER(self, p);
979 if (! MATCHED_P(p))
980 rb_raise(ScanError, "unscan failed: previous match record not exist");
981 p->curr = p->prev;
983 return self;
984}
985
986/*
987 * Returns +true+ iff the scan pointer is at the beginning of the line.
988 *
989 * s = StringScanner.new("test\ntest\n")
990 * s.bol? # => true
991 * s.scan(/te/)
992 * s.bol? # => false
993 * s.scan(/st\n/)
994 * s.bol? # => true
995 * s.terminate
996 * s.bol? # => true
997 */
998static VALUE
999strscan_bol_p(VALUE self)
1000{
1001 struct strscanner *p;
1002
1003 GET_SCANNER(self, p);
1004 if (CURPTR(p) > S_PEND(p)) return Qnil;
1005 if (p->curr == 0) return Qtrue;
1006 return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse;
1007}
1008
1009/*
1010 * Returns +true+ if the scan pointer is at the end of the string.
1011 *
1012 * s = StringScanner.new('test string')
1013 * p s.eos? # => false
1014 * s.scan(/test/)
1015 * p s.eos? # => false
1016 * s.terminate
1017 * p s.eos? # => true
1018 */
1019static VALUE
1020strscan_eos_p(VALUE self)
1021{
1022 struct strscanner *p;
1023
1024 GET_SCANNER(self, p);
1025 return EOS_P(p) ? Qtrue : Qfalse;
1026}
1027
1028/*
1029 * Equivalent to #eos?.
1030 * This method is obsolete, use #eos? instead.
1031 */
1032static VALUE
1033strscan_empty_p(VALUE self)
1034{
1035 rb_warning("StringScanner#empty? is obsolete; use #eos? instead");
1036 return strscan_eos_p(self);
1037}
1038
1039/*
1040 * Returns true iff there is more data in the string. See #eos?.
1041 * This method is obsolete; use #eos? instead.
1042 *
1043 * s = StringScanner.new('test string')
1044 * s.eos? # These two
1045 * s.rest? # are opposites.
1046 */
1047static VALUE
1048strscan_rest_p(VALUE self)
1049{
1050 struct strscanner *p;
1051
1052 GET_SCANNER(self, p);
1053 return EOS_P(p) ? Qfalse : Qtrue;
1054}
1055
1056/*
1057 * Returns +true+ iff the last match was successful.
1058 *
1059 * s = StringScanner.new('test string')
1060 * s.match?(/\w+/) # => 4
1061 * s.matched? # => true
1062 * s.match?(/\d+/) # => nil
1063 * s.matched? # => false
1064 */
1065static VALUE
1066strscan_matched_p(VALUE self)
1067{
1068 struct strscanner *p;
1069
1070 GET_SCANNER(self, p);
1071 return MATCHED_P(p) ? Qtrue : Qfalse;
1072}
1073
1074/*
1075 * Returns the last matched string.
1076 *
1077 * s = StringScanner.new('test string')
1078 * s.match?(/\w+/) # -> 4
1079 * s.matched # -> "test"
1080 */
1081static VALUE
1082strscan_matched(VALUE self)
1083{
1084 struct strscanner *p;
1085
1086 GET_SCANNER(self, p);
1087 if (! MATCHED_P(p)) return Qnil;
1088 return extract_range(p,
1089 adjust_register_position(p, p->regs.beg[0]),
1090 adjust_register_position(p, p->regs.end[0]));
1091}
1092
1093/*
1094 * Returns the size of the most recent match (see #matched), or +nil+ if there
1095 * was no recent match.
1096 *
1097 * s = StringScanner.new('test string')
1098 * s.check /\w+/ # -> "test"
1099 * s.matched_size # -> 4
1100 * s.check /\d+/ # -> nil
1101 * s.matched_size # -> nil
1102 */
1103static VALUE
1104strscan_matched_size(VALUE self)
1105{
1106 struct strscanner *p;
1107
1108 GET_SCANNER(self, p);
1109 if (! MATCHED_P(p)) return Qnil;
1110 return INT2NUM(p->regs.end[0] - p->regs.beg[0]);
1111}
1112
1113static int
1114name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end, rb_encoding *enc)
1115{
1116 int num;
1117
1119 (const unsigned char* )name, (const unsigned char* )name_end, regs);
1120 if (num >= 1) {
1121 return num;
1122 }
1123 else {
1124 rb_enc_raise(enc, rb_eIndexError, "undefined group name reference: %.*s",
1125 rb_long2int(name_end - name), name);
1126 }
1127
1129}
1130
1131/*
1132 * call-seq: [](n)
1133 *
1134 * Returns the n-th subgroup in the most recent match.
1135 *
1136 * s = StringScanner.new("Fri Dec 12 1975 14:39")
1137 * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1138 * s[0] # -> "Fri Dec 12 "
1139 * s[1] # -> "Fri"
1140 * s[2] # -> "Dec"
1141 * s[3] # -> "12"
1142 * s.post_match # -> "1975 14:39"
1143 * s.pre_match # -> ""
1144 *
1145 * s.reset
1146 * s.scan(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /) # -> "Fri Dec 12 "
1147 * s[0] # -> "Fri Dec 12 "
1148 * s[1] # -> "Fri"
1149 * s[2] # -> "Dec"
1150 * s[3] # -> "12"
1151 * s[:wday] # -> "Fri"
1152 * s[:month] # -> "Dec"
1153 * s[:day] # -> "12"
1154 * s.post_match # -> "1975 14:39"
1155 * s.pre_match # -> ""
1156 */
1157static VALUE
1158strscan_aref(VALUE self, VALUE idx)
1159{
1160 const char *name;
1161 struct strscanner *p;
1162 long i;
1163
1164 GET_SCANNER(self, p);
1165 if (! MATCHED_P(p)) return Qnil;
1166
1167 switch (TYPE(idx)) {
1168 case T_SYMBOL:
1169 idx = rb_sym2str(idx);
1170 /* fall through */
1171 case T_STRING:
1172 if (!p->regex) return Qnil;
1173 RSTRING_GETMEM(idx, name, i);
1174 i = name_to_backref_number(&(p->regs), p->regex, name, name + i, rb_enc_get(idx));
1175 break;
1176 default:
1177 i = NUM2LONG(idx);
1178 }
1179
1180 if (i < 0)
1181 i += p->regs.num_regs;
1182 if (i < 0) return Qnil;
1183 if (i >= p->regs.num_regs) return Qnil;
1184 if (p->regs.beg[i] == -1) return Qnil;
1185
1186 return extract_range(p,
1187 adjust_register_position(p, p->regs.beg[i]),
1188 adjust_register_position(p, p->regs.end[i]));
1189}
1190
1191/*
1192 * call-seq: size
1193 *
1194 * Returns the amount of subgroups in the most recent match.
1195 * The full match counts as a subgroup.
1196 *
1197 * s = StringScanner.new("Fri Dec 12 1975 14:39")
1198 * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1199 * s.size # -> 4
1200 */
1201static VALUE
1202strscan_size(VALUE self)
1203{
1204 struct strscanner *p;
1205
1206 GET_SCANNER(self, p);
1207 if (! MATCHED_P(p)) return Qnil;
1208 return INT2FIX(p->regs.num_regs);
1209}
1210
1211/*
1212 * call-seq: captures
1213 *
1214 * Returns the subgroups in the most recent match (not including the full match).
1215 * If nothing was priorly matched, it returns nil.
1216 *
1217 * s = StringScanner.new("Fri Dec 12 1975 14:39")
1218 * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1219 * s.captures # -> ["Fri", "Dec", "12"]
1220 * s.scan(/(\w+) (\w+) (\d+) /) # -> nil
1221 * s.captures # -> nil
1222 */
1223static VALUE
1224strscan_captures(VALUE self)
1225{
1226 struct strscanner *p;
1227 int i, num_regs;
1228 VALUE new_ary;
1229
1230 GET_SCANNER(self, p);
1231 if (! MATCHED_P(p)) return Qnil;
1232
1233 num_regs = p->regs.num_regs;
1234 new_ary = rb_ary_new2(num_regs);
1235
1236 for (i = 1; i < num_regs; i++) {
1237 VALUE str = extract_range(p,
1238 adjust_register_position(p, p->regs.beg[i]),
1239 adjust_register_position(p, p->regs.end[i]));
1240 rb_ary_push(new_ary, str);
1241 }
1242
1243 return new_ary;
1244}
1245
1246/*
1247 * call-seq:
1248 * scanner.values_at( i1, i2, ... iN ) -> an_array
1249 *
1250 * Returns the subgroups in the most recent match at the given indices.
1251 * If nothing was priorly matched, it returns nil.
1252 *
1253 * s = StringScanner.new("Fri Dec 12 1975 14:39")
1254 * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1255 * s.values_at 0, -1, 5, 2 # -> ["Fri Dec 12 ", "12", nil, "Dec"]
1256 * s.scan(/(\w+) (\w+) (\d+) /) # -> nil
1257 * s.values_at 0, -1, 5, 2 # -> nil
1258 */
1259
1260static VALUE
1261strscan_values_at(int argc, VALUE *argv, VALUE self)
1262{
1263 struct strscanner *p;
1264 long i;
1265 VALUE new_ary;
1266
1267 GET_SCANNER(self, p);
1268 if (! MATCHED_P(p)) return Qnil;
1269
1270 new_ary = rb_ary_new2(argc);
1271 for (i = 0; i<argc; i++) {
1272 rb_ary_push(new_ary, strscan_aref(self, argv[i]));
1273 }
1274
1275 return new_ary;
1276}
1277
1278/*
1279 * Returns the <i><b>pre</b>-match</i> (in the regular expression sense) of the last scan.
1280 *
1281 * s = StringScanner.new('test string')
1282 * s.scan(/\w+/) # -> "test"
1283 * s.scan(/\s+/) # -> " "
1284 * s.pre_match # -> "test"
1285 * s.post_match # -> "string"
1286 */
1287static VALUE
1288strscan_pre_match(VALUE self)
1289{
1290 struct strscanner *p;
1291
1292 GET_SCANNER(self, p);
1293 if (! MATCHED_P(p)) return Qnil;
1294 return extract_range(p,
1295 0,
1296 adjust_register_position(p, p->regs.beg[0]));
1297}
1298
1299/*
1300 * Returns the <i><b>post</b>-match</i> (in the regular expression sense) of the last scan.
1301 *
1302 * s = StringScanner.new('test string')
1303 * s.scan(/\w+/) # -> "test"
1304 * s.scan(/\s+/) # -> " "
1305 * s.pre_match # -> "test"
1306 * s.post_match # -> "string"
1307 */
1308static VALUE
1309strscan_post_match(VALUE self)
1310{
1311 struct strscanner *p;
1312
1313 GET_SCANNER(self, p);
1314 if (! MATCHED_P(p)) return Qnil;
1315 return extract_range(p,
1316 adjust_register_position(p, p->regs.end[0]),
1317 S_LEN(p));
1318}
1319
1320/*
1321 * Returns the "rest" of the string (i.e. everything after the scan pointer).
1322 * If there is no more data (eos? = true), it returns <tt>""</tt>.
1323 */
1324static VALUE
1325strscan_rest(VALUE self)
1326{
1327 struct strscanner *p;
1328
1329 GET_SCANNER(self, p);
1330 if (EOS_P(p)) {
1331 return str_new(p, "", 0);
1332 }
1333 return extract_range(p, p->curr, S_LEN(p));
1334}
1335
1336/*
1337 * <tt>s.rest_size</tt> is equivalent to <tt>s.rest.size</tt>.
1338 */
1339static VALUE
1340strscan_rest_size(VALUE self)
1341{
1342 struct strscanner *p;
1343 long i;
1344
1345 GET_SCANNER(self, p);
1346 if (EOS_P(p)) {
1347 return INT2FIX(0);
1348 }
1349 i = S_RESTLEN(p);
1350 return INT2FIX(i);
1351}
1352
1353/*
1354 * <tt>s.restsize</tt> is equivalent to <tt>s.rest_size</tt>.
1355 * This method is obsolete; use #rest_size instead.
1356 */
1357static VALUE
1358strscan_restsize(VALUE self)
1359{
1360 rb_warning("StringScanner#restsize is obsolete; use #rest_size instead");
1361 return strscan_rest_size(self);
1362}
1363
1364#define INSPECT_LENGTH 5
1365
1366/*
1367 * Returns a string that represents the StringScanner object, showing:
1368 * - the current position
1369 * - the size of the string
1370 * - the characters surrounding the scan pointer
1371 *
1372 * s = StringScanner.new("Fri Dec 12 1975 14:39")
1373 * s.inspect # -> '#<StringScanner 0/21 @ "Fri D...">'
1374 * s.scan_until /12/ # -> "Fri Dec 12"
1375 * s.inspect # -> '#<StringScanner 10/21 "...ec 12" @ " 1975...">'
1376 */
1377static VALUE
1378strscan_inspect(VALUE self)
1379{
1380 struct strscanner *p;
1381 VALUE a, b;
1382
1383 p = check_strscan(self);
1384 if (NIL_P(p->str)) {
1385 a = rb_sprintf("#<%"PRIsVALUE" (uninitialized)>", rb_obj_class(self));
1386 return a;
1387 }
1388 if (EOS_P(p)) {
1389 a = rb_sprintf("#<%"PRIsVALUE" fin>", rb_obj_class(self));
1390 return a;
1391 }
1392 if (p->curr == 0) {
1393 b = inspect2(p);
1394 a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld @ %"PRIsVALUE">",
1395 rb_obj_class(self),
1396 p->curr, S_LEN(p),
1397 b);
1398 return a;
1399 }
1400 a = inspect1(p);
1401 b = inspect2(p);
1402 a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld %"PRIsVALUE" @ %"PRIsVALUE">",
1403 rb_obj_class(self),
1404 p->curr, S_LEN(p),
1405 a, b);
1406 return a;
1407}
1408
1409static VALUE
1410inspect1(struct strscanner *p)
1411{
1412 VALUE str;
1413 long len;
1414
1415 if (p->curr == 0) return rb_str_new2("");
1416 if (p->curr > INSPECT_LENGTH) {
1417 str = rb_str_new_cstr("...");
1419 }
1420 else {
1421 str = rb_str_new(0, 0);
1422 len = p->curr;
1423 }
1424 rb_str_cat(str, CURPTR(p) - len, len);
1425 return rb_str_dump(str);
1426}
1427
1428static VALUE
1429inspect2(struct strscanner *p)
1430{
1431 VALUE str;
1432 long len;
1433
1434 if (EOS_P(p)) return rb_str_new2("");
1435 len = S_RESTLEN(p);
1436 if (len > INSPECT_LENGTH) {
1438 rb_str_cat2(str, "...");
1439 }
1440 else {
1441 str = rb_str_new(CURPTR(p), len);
1442 }
1443 return rb_str_dump(str);
1444}
1445
1446/*
1447 * call-seq:
1448 * scanner.fixed_anchor? -> true or false
1449 *
1450 * Whether +scanner+ uses fixed anchor mode or not.
1451 *
1452 * If fixed anchor mode is used, +\A+ always matches the beginning of
1453 * the string. Otherwise, +\A+ always matches the current position.
1454 */
1455static VALUE
1456strscan_fixed_anchor_p(VALUE self)
1457{
1458 struct strscanner *p;
1459 p = check_strscan(self);
1460 return p->fixed_anchor_p ? Qtrue : Qfalse;
1461}
1462
1463/* =======================================================================
1464 Ruby Interface
1465 ======================================================================= */
1466
1467/*
1468 * Document-class: StringScanner
1469 *
1470 * StringScanner provides for lexical scanning operations on a String. Here is
1471 * an example of its usage:
1472 *
1473 * s = StringScanner.new('This is an example string')
1474 * s.eos? # -> false
1475 *
1476 * p s.scan(/\w+/) # -> "This"
1477 * p s.scan(/\w+/) # -> nil
1478 * p s.scan(/\s+/) # -> " "
1479 * p s.scan(/\s+/) # -> nil
1480 * p s.scan(/\w+/) # -> "is"
1481 * s.eos? # -> false
1482 *
1483 * p s.scan(/\s+/) # -> " "
1484 * p s.scan(/\w+/) # -> "an"
1485 * p s.scan(/\s+/) # -> " "
1486 * p s.scan(/\w+/) # -> "example"
1487 * p s.scan(/\s+/) # -> " "
1488 * p s.scan(/\w+/) # -> "string"
1489 * s.eos? # -> true
1490 *
1491 * p s.scan(/\s+/) # -> nil
1492 * p s.scan(/\w+/) # -> nil
1493 *
1494 * Scanning a string means remembering the position of a <i>scan pointer</i>,
1495 * which is just an index. The point of scanning is to move forward a bit at
1496 * a time, so matches are sought after the scan pointer; usually immediately
1497 * after it.
1498 *
1499 * Given the string "test string", here are the pertinent scan pointer
1500 * positions:
1501 *
1502 * t e s t s t r i n g
1503 * 0 1 2 ... 1
1504 * 0
1505 *
1506 * When you #scan for a pattern (a regular expression), the match must occur
1507 * at the character after the scan pointer. If you use #scan_until, then the
1508 * match can occur anywhere after the scan pointer. In both cases, the scan
1509 * pointer moves <i>just beyond</i> the last character of the match, ready to
1510 * scan again from the next character onwards. This is demonstrated by the
1511 * example above.
1512 *
1513 * == Method Categories
1514 *
1515 * There are other methods besides the plain scanners. You can look ahead in
1516 * the string without actually scanning. You can access the most recent match.
1517 * You can modify the string being scanned, reset or terminate the scanner,
1518 * find out or change the position of the scan pointer, skip ahead, and so on.
1519 *
1520 * === Advancing the Scan Pointer
1521 *
1522 * - #getch
1523 * - #get_byte
1524 * - #scan
1525 * - #scan_until
1526 * - #skip
1527 * - #skip_until
1528 *
1529 * === Looking Ahead
1530 *
1531 * - #check
1532 * - #check_until
1533 * - #exist?
1534 * - #match?
1535 * - #peek
1536 *
1537 * === Finding Where we Are
1538 *
1539 * - #beginning_of_line? (#bol?)
1540 * - #eos?
1541 * - #rest?
1542 * - #rest_size
1543 * - #pos
1544 *
1545 * === Setting Where we Are
1546 *
1547 * - #reset
1548 * - #terminate
1549 * - #pos=
1550 *
1551 * === Match Data
1552 *
1553 * - #matched
1554 * - #matched?
1555 * - #matched_size
1556 * - []
1557 * - #pre_match
1558 * - #post_match
1559 *
1560 * === Miscellaneous
1561 *
1562 * - <<
1563 * - #concat
1564 * - #string
1565 * - #string=
1566 * - #unscan
1567 *
1568 * There are aliases to several of the methods.
1569 */
1570void
1572{
1573#undef rb_intern
1574 ID id_scanerr = rb_intern("ScanError");
1575 VALUE tmp;
1576
1577 id_byteslice = rb_intern("byteslice");
1578
1579 StringScanner = rb_define_class("StringScanner", rb_cObject);
1580 ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
1581 if (!rb_const_defined(rb_cObject, id_scanerr)) {
1582 rb_const_set(rb_cObject, id_scanerr, ScanError);
1583 }
1585 rb_obj_freeze(tmp);
1586 rb_const_set(StringScanner, rb_intern("Version"), tmp);
1587 tmp = rb_str_new2("$Id$");
1588 rb_obj_freeze(tmp);
1589 rb_const_set(StringScanner, rb_intern("Id"), tmp);
1590
1591 rb_define_alloc_func(StringScanner, strscan_s_allocate);
1592 rb_define_private_method(StringScanner, "initialize", strscan_initialize, -1);
1593 rb_define_private_method(StringScanner, "initialize_copy", strscan_init_copy, 1);
1594 rb_define_singleton_method(StringScanner, "must_C_version", strscan_s_mustc, 0);
1595 rb_define_method(StringScanner, "reset", strscan_reset, 0);
1596 rb_define_method(StringScanner, "terminate", strscan_terminate, 0);
1597 rb_define_method(StringScanner, "clear", strscan_clear, 0);
1598 rb_define_method(StringScanner, "string", strscan_get_string, 0);
1599 rb_define_method(StringScanner, "string=", strscan_set_string, 1);
1600 rb_define_method(StringScanner, "concat", strscan_concat, 1);
1601 rb_define_method(StringScanner, "<<", strscan_concat, 1);
1602 rb_define_method(StringScanner, "pos", strscan_get_pos, 0);
1603 rb_define_method(StringScanner, "pos=", strscan_set_pos, 1);
1604 rb_define_method(StringScanner, "charpos", strscan_get_charpos, 0);
1605 rb_define_method(StringScanner, "pointer", strscan_get_pos, 0);
1606 rb_define_method(StringScanner, "pointer=", strscan_set_pos, 1);
1607
1608 rb_define_method(StringScanner, "scan", strscan_scan, 1);
1609 rb_define_method(StringScanner, "skip", strscan_skip, 1);
1610 rb_define_method(StringScanner, "match?", strscan_match_p, 1);
1611 rb_define_method(StringScanner, "check", strscan_check, 1);
1612 rb_define_method(StringScanner, "scan_full", strscan_scan_full, 3);
1613
1614 rb_define_method(StringScanner, "scan_until", strscan_scan_until, 1);
1615 rb_define_method(StringScanner, "skip_until", strscan_skip_until, 1);
1616 rb_define_method(StringScanner, "exist?", strscan_exist_p, 1);
1617 rb_define_method(StringScanner, "check_until", strscan_check_until, 1);
1618 rb_define_method(StringScanner, "search_full", strscan_search_full, 3);
1619
1620 rb_define_method(StringScanner, "getch", strscan_getch, 0);
1621 rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0);
1622 rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0);
1623 rb_define_method(StringScanner, "peek", strscan_peek, 1);
1624 rb_define_method(StringScanner, "peep", strscan_peep, 1);
1625
1626 rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
1627
1628 rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
1629 rb_alias(StringScanner, rb_intern("bol?"), rb_intern("beginning_of_line?"));
1630 rb_define_method(StringScanner, "eos?", strscan_eos_p, 0);
1631 rb_define_method(StringScanner, "empty?", strscan_empty_p, 0);
1632 rb_define_method(StringScanner, "rest?", strscan_rest_p, 0);
1633
1634 rb_define_method(StringScanner, "matched?", strscan_matched_p, 0);
1635 rb_define_method(StringScanner, "matched", strscan_matched, 0);
1636 rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0);
1637 rb_define_method(StringScanner, "[]", strscan_aref, 1);
1638 rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0);
1639 rb_define_method(StringScanner, "post_match", strscan_post_match, 0);
1640 rb_define_method(StringScanner, "size", strscan_size, 0);
1641 rb_define_method(StringScanner, "captures", strscan_captures, 0);
1642 rb_define_method(StringScanner, "values_at", strscan_values_at, -1);
1643
1644 rb_define_method(StringScanner, "rest", strscan_rest, 0);
1645 rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0);
1646 rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
1647
1648 rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
1649
1650 rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
1651}
struct RIMemo * ptr
Definition: debug.c:65
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:872
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:990
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:891
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1020
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
void rb_memerror(void)
Definition: gc.c:9611
VALUE rb_define_class(const char *, VALUE)
Defines a top-level class.
Definition: class.c:662
VALUE rb_define_class_under(VALUE, const char *, VALUE)
Defines a class under the namespace of outer.
Definition: class.c:711
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *)
Definition: class.c:1904
VALUE rb_cObject
Object class.
Definition: ruby.h:2012
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:2671
VALUE rb_eStandardError
Definition: error.c:921
VALUE rb_eRangeError
Definition: error.c:928
void * rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type)
Definition: error.c:891
void rb_enc_raise(rb_encoding *enc, VALUE exc, const char *fmt,...)
Definition: error.c:2652
VALUE rb_eIndexError
Definition: error.c:926
VALUE rb_obj_class(VALUE)
Equivalent to Object#class in Ruby.
Definition: object.c:217
VALUE rb_obj_freeze(VALUE)
Make the object unmodifiable.
Definition: object.c:1080
const char * name
Definition: nkf.c:208
ONIG_EXTERN int onig_region_set(OnigRegion *region, int at, int beg, int end)
Definition: regexec.c:305
ONIG_EXTERN void onig_region_init(OnigRegion *region)
Definition: regexec.c:320
ONIG_EXTERN void onig_region_free(OnigRegion *region, int free_self)
Definition: regexec.c:343
#define UChar
Definition: onigmo.h:76
ONIG_EXTERN OnigPosition onig_search(OnigRegex, const OnigUChar *str, const OnigUChar *end, const OnigUChar *start, const OnigUChar *range, OnigRegion *region, OnigOptionType option)
ONIG_EXTERN void onig_free(OnigRegex)
ONIG_EXTERN OnigPosition onig_match(OnigRegex, const OnigUChar *str, const OnigUChar *end, const OnigUChar *at, OnigRegion *region, OnigOptionType option)
ONIG_EXTERN void onig_region_clear(OnigRegion *region)
Definition: regexec.c:235
ONIG_EXTERN int onig_name_to_backref_number(OnigRegex reg, const OnigUChar *name, const OnigUChar *name_end, const OnigRegion *region)
#define ONIG_OPTION_NONE
Definition: onigmo.h:450
#define rb_str_new2
#define RREGEXP(obj)
int memcmp(const void *, const void *, size_t)
Definition: memcmp.c:7
use StringValue() instead")))
#define RSTRING_LEN(str)
#define _(args)
#define RTEST(v)
void rb_define_private_method(VALUE, const char *, VALUE(*)(), int)
#define T_STRING
#define Qundef
#define rb_str_cat2
const VALUE VALUE obj
#define TYPE(x)
#define UNREACHABLE
#define RSTRING_PTR(str)
#define rb_str_new(str, len)
#define NIL_P(v)
VALUE rb_str_cat(VALUE, const char *, long)
Definition: string.c:2812
const char size_t n
VALUE rb_ary_push(VALUE, VALUE)
Definition: array.c:1195
VALUE rb_sym2str(VALUE)
Definition: symbol.c:784
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
uint32_t i
#define RSTRING_GETMEM(str, ptrvar, lenvar)
__inline__ const void *__restrict__ size_t len
#define INT2NUM(x)
void rb_alias(VALUE, ID, ID)
Definition: vm_method.c:1598
#define NUM2INT(x)
void rb_define_singleton_method(VALUE, const char *, VALUE(*)(), int)
#define rb_long2int(n)
#define RB_GC_GUARD(v)
#define RUBY_TYPED_FREE_IMMEDIATELY
#define RREGEXP_PTR(r)
#define PRIsVALUE
#define rb_funcall(recv, mid, argc,...)
int VALUE v
#define rb_scan_args(argc, argvp, fmt,...)
void rb_gc_mark(VALUE)
Definition: gc.c:5228
#define rb_intern(str)
void rb_const_set(VALUE, ID, VALUE)
Definition: variable.c:2756
unsigned int size
#define Qtrue
VALUE rb_str_dump(VALUE)
Definition: string.c:6042
VALUE rb_str_append(VALUE, VALUE)
Definition: string.c:2965
#define Qnil
#define Qfalse
VALUE rb_check_hash_type(VALUE)
Definition: hash.c:1852
#define RB_TYPE_P(obj, type)
#define INT2FIX(i)
#define T_SYMBOL
#define TypedData_Make_Struct(klass, type, data_type, sval)
const VALUE * argv
void void ruby_xfree(void *)
Definition: gc.c:10183
#define Check_Type(v, t)
VALUE rb_sprintf(const char *,...) __attribute__((format(printf
unsigned long ID
const char *void rb_warning(const char *,...) __attribute__((format(printf
#define NUM2LONG(x)
void rb_define_method(VALUE, const char *, VALUE(*)(), int)
#define rb_ary_new2
VALUE rb_str_length(VALUE)
Definition: string.c:1843
#define rb_str_new_cstr(str)
int rb_const_defined(VALUE, ID)
Definition: variable.c:2686
#define T_REGEXP
int rb_reg_region_copy(struct re_registers *, const struct re_registers *)
Definition: re.c:946
regex_t * rb_reg_prepare_re(VALUE re, VALUE str)
Definition: re.c:1499
size_t onig_region_memsize(const OnigRegion *regs)
Definition: regcomp.c:5672
unsigned long VALUE
Definition: ruby.h:102
#define f
#define S_LEN(s)
Definition: strscan.c:63
#define S_PBEG(s)
Definition: strscan.c:62
#define STRSCAN_VERSION
Definition: strscan.c:25
#define S_RESTLEN(s)
Definition: strscan.c:66
#define S_PEND(s)
Definition: strscan.c:64
#define CLEAR_MATCH_STATUS(s)
Definition: strscan.c:60
#define CURPTR(s)
Definition: strscan.c:65
#define GET_SCANNER(obj, var)
Definition: strscan.c:70
#define MATCHED_P(s)
Definition: strscan.c:58
#define EOS_P(s)
Definition: strscan.c:68
#define INSPECT_LENGTH
Definition: strscan.c:1364
void Init_strscan(void)
Definition: strscan.c:1571
#define MATCHED(s)
Definition: strscan.c:59
OnigPosition * beg
Definition: onigmo.h:719
int num_regs
Definition: onigmo.h:718
OnigPosition * end
Definition: onigmo.h:720
struct re_registers regs
Definition: strscan.c:49
VALUE str
Definition: strscan.c:42
unsigned long flags
Definition: strscan.c:38
bool fixed_anchor_p
Definition: strscan.c:55
long curr
Definition: strscan.c:46
VALUE regex
Definition: strscan.c:52
long prev
Definition: strscan.c:45