14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/encoding.h"
32#include "internal/error.h"
33#include "internal/gc.h"
34#include "internal/numeric.h"
35#include "internal/object.h"
36#include "internal/proc.h"
37#include "internal/re.h"
38#include "internal/sanitizers.h"
39#include "internal/string.h"
40#include "internal/transcode.h"
45#include "ruby_assert.h"
48#if defined HAVE_CRYPT_R
49# if defined HAVE_CRYPT_H
52#elif !defined HAVE_CRYPT
53# include "missing/crypt.h"
54# define HAVE_CRYPT_R 1
57#define BEG(no) (regs->beg[(no)])
58#define END(no) (regs->end[(no)])
61#undef rb_usascii_str_new
65#undef rb_usascii_str_new_cstr
66#undef rb_utf8_str_new_cstr
67#undef rb_enc_str_new_cstr
68#undef rb_external_str_new_cstr
69#undef rb_locale_str_new_cstr
70#undef rb_str_dup_frozen
71#undef rb_str_buf_new_cstr
101#define RUBY_MAX_CHAR_LEN 16
102#define STR_SHARED_ROOT FL_USER5
103#define STR_BORROWED FL_USER6
104#define STR_TMPLOCK FL_USER7
105#define STR_NOFREE FL_USER18
106#define STR_FAKESTR FL_USER19
108#define STR_SET_NOEMBED(str) do {\
109 FL_SET((str), STR_NOEMBED);\
110 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
112#define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
114#define STR_SET_LEN(str, n) do { \
115 RSTRING(str)->len = (n); \
119str_enc_fastpath(
VALUE str)
123 case ENCINDEX_ASCII_8BIT:
125 case ENCINDEX_US_ASCII:
132#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
133#define TERM_FILL(ptr, termlen) do {\
134 char *const term_fill_ptr = (ptr);\
135 const int term_fill_len = (termlen);\
136 *term_fill_ptr = '\0';\
137 if (UNLIKELY(term_fill_len > 1))\
138 memset(term_fill_ptr, 0, term_fill_len);\
141#define RESIZE_CAPA(str,capacity) do {\
142 const int termlen = TERM_LEN(str);\
143 RESIZE_CAPA_TERM(str,capacity,termlen);\
145#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
146 if (STR_EMBED_P(str)) {\
147 if (str_embed_capa(str) < capacity + termlen) {\
148 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
149 const long tlen = RSTRING_LEN(str);\
150 memcpy(tmp, RSTRING_PTR(str), tlen);\
151 RSTRING(str)->as.heap.ptr = tmp;\
152 RSTRING(str)->len = tlen;\
153 STR_SET_NOEMBED(str);\
154 RSTRING(str)->as.heap.aux.capa = (capacity);\
158 assert(!FL_TEST((str), STR_SHARED)); \
159 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
160 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
161 RSTRING(str)->as.heap.aux.capa = (capacity);\
165#define STR_SET_SHARED(str, shared_str) do { \
166 if (!FL_TEST(str, STR_FAKESTR)) { \
167 assert(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
168 assert(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
169 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
170 FL_SET((str), STR_SHARED); \
171 FL_SET((shared_str), STR_SHARED_ROOT); \
172 if (RBASIC_CLASS((shared_str)) == 0) \
173 FL_SET_RAW((shared_str), STR_BORROWED); \
177#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
178#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
181#define STR_ENC_GET(str) get_encoding(str)
183#if !defined SHARABLE_MIDDLE_SUBSTRING
184# define SHARABLE_MIDDLE_SUBSTRING 0
186#if !SHARABLE_MIDDLE_SUBSTRING
187#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
189#define SHARABLE_SUBSTRING_P(beg, len, end) 1
194str_embed_capa(
VALUE str)
196 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
200rb_str_reembeddable_p(
VALUE str)
202 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
206rb_str_embed_size(
long capa)
212rb_str_size_as_embedded(
VALUE str)
215 if (STR_EMBED_P(str)) {
216 real_size = rb_str_embed_size(
RSTRING(str)->
len) + TERM_LEN(str);
220 else if (rb_str_reembeddable_p(str)) {
221 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
224 real_size =
sizeof(
struct RString);
230STR_EMBEDDABLE_P(
long len,
long termlen)
232 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
237static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
238static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
240static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
241static inline void str_modifiable(
VALUE str);
245str_make_independent(
VALUE str)
247 long len = RSTRING_LEN(str);
248 int termlen = TERM_LEN(str);
249 str_make_independent_expand((str),
len, 0L, termlen);
252static inline int str_dependent_p(
VALUE str);
255rb_str_make_independent(
VALUE str)
257 if (str_dependent_p(str)) {
258 str_make_independent(str);
263rb_str_make_embedded(
VALUE str)
268 char *buf =
RSTRING(str)->as.heap.ptr;
272 STR_SET_LEN(str,
len);
275 memcpy(RSTRING_PTR(str), buf,
len);
279 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
283rb_debug_rstring_null_ptr(
const char *func)
285 fprintf(stderr,
"%s is returning NULL!! "
286 "SIGSEGV is highly expected to follow immediately.\n"
287 "If you could reproduce, attach your debugger here, "
288 "and look at the passed string.\n",
293static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
296get_encoding(
VALUE str)
302mustnot_broken(
VALUE str)
304 if (is_broken_string(str)) {
305 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
310mustnot_wchar(
VALUE str)
314 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
320static VALUE register_fstring(
VALUE str,
bool copy);
327#define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
335fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data,
int existing)
345 if (rb_objspace_garbage_object_p(str)) {
357 rb_enc_copy(new_str, str);
370 if (STR_SHARED_P(str)) {
372 str_make_independent(str);
375 if (!BARE_STRING_P(str)) {
379 RBASIC(str)->flags |= RSTRING_FSTR;
381 *key = *value = arg->fstr = str;
395 if (
FL_TEST(str, RSTRING_FSTR))
398 bare = BARE_STRING_P(str);
400 if (STR_EMBED_P(str)) {
405 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
412 rb_str_resize(str, RSTRING_LEN(str));
414 fstr = register_fstring(str, FALSE);
417 str_replace_shared_without_enc(str, fstr);
425register_fstring(
VALUE str,
bool copy)
432 st_table *frozen_strings = rb_vm_fstring_table();
435 st_update(frozen_strings, (st_data_t)str, fstr_update_callback, (st_data_t)&args);
436 }
while (UNDEF_P(args.fstr));
448setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
464 return (
VALUE)fake_str;
473 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
482rb_fstring_new(
const char *ptr,
long len)
485 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), FALSE);
492 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), FALSE);
496rb_fstring_cstr(
const char *
ptr)
498 return rb_fstring_new(
ptr, strlen(
ptr));
502fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg)
512 const char *aptr, *bptr;
515 return (alen != blen ||
517 memcmp(aptr, bptr, alen) != 0);
521single_byte_optimizable(
VALUE str)
529 enc = STR_ENC_GET(str);
540static inline const char *
541search_nonascii(
const char *p,
const char *e)
543 const uintptr_t *s, *t;
545#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
546# if SIZEOF_UINTPTR_T == 8
547# define NONASCII_MASK UINT64_C(0x8080808080808080)
548# elif SIZEOF_UINTPTR_T == 4
549# define NONASCII_MASK UINT32_C(0x80808080)
551# error "don't know what to do."
554# if SIZEOF_UINTPTR_T == 8
555# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
556# elif SIZEOF_UINTPTR_T == 4
557# define NONASCII_MASK 0x80808080UL
559# error "don't know what to do."
563 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
564#if !UNALIGNED_WORD_ACCESS
565 if ((uintptr_t)p % SIZEOF_VOIDP) {
566 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
571 case 7:
if (p[-7]&0x80)
return p-7;
572 case 6:
if (p[-6]&0x80)
return p-6;
573 case 5:
if (p[-5]&0x80)
return p-5;
574 case 4:
if (p[-4]&0x80)
return p-4;
576 case 3:
if (p[-3]&0x80)
return p-3;
577 case 2:
if (p[-2]&0x80)
return p-2;
578 case 1:
if (p[-1]&0x80)
return p-1;
583#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
584#define aligned_ptr(value) \
585 __builtin_assume_aligned((value), sizeof(uintptr_t))
587#define aligned_ptr(value) (uintptr_t *)(value)
590 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
593 if (*s & NONASCII_MASK) {
594#ifdef WORDS_BIGENDIAN
595 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
597 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
607 case 7:
if (e[-7]&0x80)
return e-7;
608 case 6:
if (e[-6]&0x80)
return e-6;
609 case 5:
if (e[-5]&0x80)
return e-5;
610 case 4:
if (e[-4]&0x80)
return e-4;
612 case 3:
if (e[-3]&0x80)
return e-3;
613 case 2:
if (e[-2]&0x80)
return e-2;
614 case 1:
if (e[-1]&0x80)
return e-1;
622 const char *e = p +
len;
624 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
626 p = search_nonascii(p, e);
630 if (rb_enc_asciicompat(enc)) {
631 p = search_nonascii(p, e);
634 int ret = rb_enc_precise_mbclen(p, e, enc);
638 p = search_nonascii(p, e);
644 int ret = rb_enc_precise_mbclen(p, e, enc);
660 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
663 p = search_nonascii(p, e);
667 else if (rb_enc_asciicompat(enc)) {
668 p = search_nonascii(p, e);
674 int ret = rb_enc_precise_mbclen(p, e, enc);
681 p = search_nonascii(p, e);
687 int ret = rb_enc_precise_mbclen(p, e, enc);
712 rb_enc_set_index(str1, rb_enc_get_index(str2));
720rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
725 str_enc_copy(dest, src);
726 if (RSTRING_LEN(dest) == 0) {
727 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
738 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
739 search_nonascii(RSTRING_PTR(dest), RSTRING_END(dest)))
750rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
752 str_enc_copy(dest, src);
759 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
765 return enc_coderange_scan(str, enc);
774 cr = enc_coderange_scan(str, get_encoding(str));
785 if (!rb_enc_asciicompat(enc))
787 else if (is_ascii_string(str))
793str_mod_check(
VALUE s,
const char *p,
long len)
795 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
801str_capacity(
VALUE str,
const int termlen)
803 if (STR_EMBED_P(str)) {
804 return str_embed_capa(str) - termlen;
806 else if (
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
810 return RSTRING(str)->as.heap.aux.capa;
817 return str_capacity(str, TERM_LEN(str));
821must_not_null(
const char *
ptr)
824 rb_raise(rb_eArgError,
"NULL pointer given");
831 size_t size = rb_str_embed_size(
capa);
833 assert(rb_gc_size_allocatable_p(size));
842str_alloc_heap(
VALUE klass)
851empty_str_alloc(
VALUE klass)
853 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
854 VALUE str = str_alloc_embed(klass, 0);
855 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
860str_new0(
VALUE klass,
const char *
ptr,
long len,
int termlen)
865 rb_raise(rb_eArgError,
"negative string size (or size too big)");
868 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
870 if (STR_EMBEDDABLE_P(
len, termlen)) {
871 str = str_alloc_embed(klass,
len + termlen);
877 str = str_alloc_heap(klass);
883 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
886 memcpy(RSTRING_PTR(str),
ptr,
len);
888 STR_SET_LEN(str,
len);
889 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
896 return str_new0(klass,
ptr,
len, 1);
917 rb_enc_associate_index(str, rb_utf8_encindex());
929 rb_enc_associate(str, enc);
941 __msan_unpoison_string(
ptr);
957 rb_enc_associate_index(str, rb_utf8_encindex());
966 rb_raise(rb_eArgError,
"wchar encoding given");
968 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
972str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
977 rb_raise(rb_eArgError,
"negative string size (or size too big)");
981 rb_encoding *enc = rb_enc_get_from_index(encindex);
985 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
986 str = str_alloc_heap(klass);
990 RBASIC(str)->flags |= STR_NOFREE;
992 rb_enc_associate_index(str, encindex);
1020static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1022 int ecflags,
VALUE ecopts);
1027 int encidx = rb_enc_to_index(enc);
1028 if (rb_enc_get_index(str) == encidx)
1029 return is_ascii_string(str);
1040 if (!to)
return str;
1041 if (!from) from = rb_enc_get(str);
1042 if (from == to)
return str;
1043 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1044 rb_is_ascii8bit_enc(to)) {
1045 if (STR_ENC_GET(str) != to) {
1046 str = rb_str_dup(str);
1047 rb_enc_associate(str, to);
1053 newstr = str_cat_conv_enc_opts(rb_str_buf_new(
len), 0,
ptr,
len,
1054 from, to, ecflags, ecopts);
1055 if (
NIL_P(newstr)) {
1063rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1068 olen = RSTRING_LEN(newstr);
1069 if (ofs < -olen || olen < ofs)
1071 if (ofs < 0) ofs += olen;
1073 STR_SET_LEN(newstr, ofs);
1074 return rb_str_cat(newstr,
ptr,
len);
1077 rb_str_modify(newstr);
1078 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1086 STR_SET_LEN(str, 0);
1087 rb_enc_associate(str, enc);
1088 rb_str_cat(str,
ptr,
len);
1093str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1095 int ecflags,
VALUE ecopts)
1100 VALUE econv_wrapper;
1101 const unsigned char *start, *sp;
1102 unsigned char *dest, *dp;
1103 size_t converted_output = (size_t)ofs;
1108 RBASIC_CLEAR_CLASS(econv_wrapper);
1110 if (!ec)
return Qnil;
1113 sp = (
unsigned char*)
ptr;
1115 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1116 (dp = dest + converted_output),
1120 size_t converted_input = sp - start;
1121 size_t rest =
len - converted_input;
1122 converted_output = dp - dest;
1123 rb_str_set_len(newstr, converted_output);
1124 if (converted_input && converted_output &&
1125 rest < (LONG_MAX / converted_output)) {
1126 rest = (rest * converted_output) / converted_input;
1131 olen += rest < 2 ? 2 : rest;
1132 rb_str_resize(newstr, olen);
1138 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1139 rb_str_set_len(newstr,
len);
1140 rb_enc_associate(newstr, to);
1159 const int eidx = rb_enc_to_index(eenc);
1162 return rb_enc_str_new(
ptr,
len, eenc);
1166 if ((eidx == rb_ascii8bit_encindex()) ||
1167 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1171 ienc = rb_default_internal_encoding();
1172 if (!ienc || eenc == ienc) {
1173 return rb_enc_str_new(
ptr,
len, eenc);
1177 if ((eidx == rb_ascii8bit_encindex()) ||
1178 (eidx == rb_usascii_encindex()) ||
1179 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1180 return rb_enc_str_new(
ptr,
len, ienc);
1183 str = rb_enc_str_new(NULL, 0, ienc);
1186 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1187 rb_str_initialize(str,
ptr,
len, eenc);
1195 int eidx = rb_enc_to_index(eenc);
1196 if (eidx == rb_usascii_encindex() &&
1197 !is_ascii_string(str)) {
1198 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1201 rb_enc_associate_index(str, eidx);
1236rb_filesystem_str_new_cstr(
const char *
ptr)
1260str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1262 const int termlen = TERM_LEN(str);
1267 if (str_embed_capa(str2) >=
len + termlen) {
1268 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1269 STR_SET_EMBED(str2);
1270 memcpy(ptr2, RSTRING_PTR(str),
len);
1271 TERM_FILL(ptr2+
len, termlen);
1275 if (STR_SHARED_P(str)) {
1276 root =
RSTRING(str)->as.heap.aux.shared;
1280 root = rb_str_new_frozen(str);
1284 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1286 rb_fatal(
"about to free a possible shared root");
1288 char *ptr2 = STR_HEAP_PTR(str2);
1290 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1293 FL_SET(str2, STR_NOEMBED);
1295 STR_SET_SHARED(str2, root);
1298 STR_SET_LEN(str2,
len);
1306 str_replace_shared_without_enc(str2, str);
1307 rb_enc_cr_str_exact_copy(str2, str);
1314 return str_replace_shared(str_alloc_heap(klass), str);
1331rb_str_new_frozen_String(
VALUE orig)
1338rb_str_tmp_frozen_acquire(
VALUE orig)
1341 return str_new_frozen_buffer(0, orig, FALSE);
1345rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1350 if (STR_EMBED_P(tmp)) {
1359 assert(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1363 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1364 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1369 STR_SET_LEN(tmp, 0);
1377 return str_new_frozen_buffer(klass, orig, TRUE);
1383 assert(!STR_EMBED_P(orig));
1384 assert(!STR_SHARED_P(orig));
1386 VALUE str = str_alloc_heap(klass);
1387 STR_SET_LEN(str, RSTRING_LEN(orig));
1388 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1389 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1390 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1391 RBASIC(orig)->flags &= ~STR_NOFREE;
1392 STR_SET_SHARED(orig, str);
1399str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1403 long len = RSTRING_LEN(orig);
1404 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1406 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1407 str = str_new0(klass, RSTRING_PTR(orig),
len, termlen);
1408 assert(STR_EMBED_P(str));
1413 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1414 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1417 assert(ofs + rest <= RSTRING_LEN(
shared));
1420 if ((ofs > 0) || (rest > 0) ||
1423 str = str_new_shared(klass,
shared);
1424 assert(!STR_EMBED_P(str));
1425 RSTRING(str)->as.heap.ptr += ofs;
1426 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1434 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1435 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1437 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1438 STR_SET_LEN(str, RSTRING_LEN(orig));
1439 TERM_FILL(RSTRING_END(str), TERM_LEN(orig));
1442 str = heap_str_make_shared(klass, orig);
1446 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1458str_new_empty_String(
VALUE str)
1461 rb_enc_copy(v, str);
1465#define STR_BUF_MIN_SIZE 63
1470 if (STR_EMBEDDABLE_P(
capa, 1)) {
1478 RSTRING(str)->as.heap.ptr[0] =
'\0';
1489 str = rb_str_buf_new(
len);
1498 return str_new(0, 0,
len);
1504 if (
FL_TEST(str, RSTRING_FSTR)) {
1505 st_data_t fstr = (st_data_t)str;
1509 st_delete(rb_vm_fstring_table(), &fstr, NULL);
1510 RB_DEBUG_COUNTER_INC(obj_str_fstr);
1515 if (STR_EMBED_P(str)) {
1516 RB_DEBUG_COUNTER_INC(obj_str_embed);
1518 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1519 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1520 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1523 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1524 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1528RUBY_FUNC_EXPORTED
size_t
1529rb_str_memsize(
VALUE str)
1531 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1532 return STR_HEAP_SIZE(str);
1542 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1545static inline void str_discard(
VALUE str);
1546static void str_shared_replace(
VALUE str,
VALUE str2);
1551 if (str != str2) str_shared_replace(str, str2);
1562 enc = STR_ENC_GET(str2);
1567 STR_SET_LEN(str, RSTRING_LEN(str2));
1569 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1571 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1572 rb_enc_associate(str, enc);
1576 if (STR_EMBED_P(str2)) {
1577 assert(!
FL_TEST(str2, STR_SHARED));
1578 long len = RSTRING_LEN(str2);
1579 assert(
len + termlen <= str_embed_capa(str2));
1581 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1582 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1583 RSTRING(str2)->as.heap.ptr = new_ptr;
1584 STR_SET_LEN(str2,
len);
1586 STR_SET_NOEMBED(str2);
1589 STR_SET_NOEMBED(str);
1591 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1593 if (
FL_TEST(str2, STR_SHARED)) {
1595 STR_SET_SHARED(str,
shared);
1598 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1602 STR_SET_EMBED(str2);
1603 RSTRING_PTR(str2)[0] = 0;
1604 STR_SET_LEN(str2, 0);
1605 rb_enc_associate(str, enc);
1619 return rb_obj_as_string_result(str, obj);
1635 len = RSTRING_LEN(str2);
1636 if (STR_SHARED_P(str2)) {
1639 STR_SET_NOEMBED(str);
1640 STR_SET_LEN(str,
len);
1641 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1642 STR_SET_SHARED(str,
shared);
1643 rb_enc_cr_str_exact_copy(str, str2);
1646 str_replace_shared(str, str2);
1655 size_t size = rb_str_embed_size(
capa);
1657 assert(rb_gc_size_allocatable_p(size));
1677 const VALUE flag_mask =
1683 if (STR_EMBED_P(str)) {
1684 long len = RSTRING_LEN(str);
1686 assert(STR_EMBED_P(dup));
1687 assert(str_embed_capa(dup) >=
len + 1);
1693 root =
RSTRING(str)->as.heap.aux.shared;
1695 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
1696 root = str = str_new_frozen(klass, str);
1699 assert(!STR_SHARED_P(root));
1702 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1703 FL_SET(root, STR_SHARED_ROOT);
1705 flags |= RSTRING_NOEMBED | STR_SHARED;
1708 STR_SET_LEN(dup, RSTRING_LEN(str));
1711 encidx = rb_enc_get_index(str);
1712 flags &= ~ENCODING_MASK;
1715 if (encidx) rb_enc_associate_index(dup, encidx);
1723 if (STR_EMBED_P(str)) {
1724 dup = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
1727 dup = ec_str_alloc_heap(ec, klass);
1730 return str_duplicate_setup(klass, str, dup);
1737 if (STR_EMBED_P(str)) {
1738 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
1741 dup = str_alloc_heap(klass);
1744 return str_duplicate_setup(klass, str, dup);
1755rb_str_dup_m(
VALUE str)
1757 if (LIKELY(BARE_STRING_P(str))) {
1768 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
1775 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
1776 return ec_str_duplicate(ec,
rb_cString, str);
1791 static ID keyword_ids[2];
1792 VALUE orig, opt, venc, vcapa;
1797 if (!keyword_ids[0]) {
1798 keyword_ids[0] = rb_id_encoding();
1799 CONST_ID(keyword_ids[1],
"capacity");
1807 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
1808 enc = rb_to_encoding(venc);
1810 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
1815 if (
capa < STR_BUF_MIN_SIZE) {
1816 capa = STR_BUF_MIN_SIZE;
1820 len = RSTRING_LEN(orig);
1824 if (orig == str) n = 0;
1826 str_modifiable(str);
1827 if (STR_EMBED_P(str)) {
1828 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1829 assert(RSTRING_LEN(str) + 1 <= str_embed_capa(str));
1830 memcpy(new_ptr,
RSTRING(str)->
as.embed.ary, RSTRING_LEN(str) + 1);
1831 RSTRING(str)->as.heap.ptr = new_ptr;
1833 else if (
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
1834 const size_t size = (size_t)
capa + termlen;
1835 const char *
const old_ptr = RSTRING_PTR(str);
1836 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
1837 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1838 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
1840 RSTRING(str)->as.heap.ptr = new_ptr;
1842 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
1843 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
1844 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
1846 STR_SET_LEN(str,
len);
1849 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
1850 rb_enc_cr_str_exact_copy(str, orig);
1852 FL_SET(str, STR_NOEMBED);
1859 rb_enc_associate(str, enc);
1871rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
1877 static ID keyword_ids[2];
1887 keyword_ids[0] = rb_id_encoding();
1888 CONST_ID(keyword_ids[1],
"capacity");
1890 encoding = kwargs[0];
1891 capacity = kwargs[1];
1902 if (UNDEF_P(encoding)) {
1904 encoding = rb_obj_encoding(orig);
1908 if (!UNDEF_P(encoding)) {
1909 enc = rb_to_encoding(encoding);
1914 if (UNDEF_P(capacity)) {
1916 VALUE empty_str = str_new(klass,
"", 0);
1918 rb_enc_associate(empty_str, enc);
1922 VALUE copy = str_duplicate(klass, orig);
1923 rb_enc_associate(copy, enc);
1936 if (orig_capa >
capa) {
1941 long fake_len =
capa - termlen;
1946 VALUE str = str_new0(klass, NULL, fake_len, termlen);
1947 STR_SET_LEN(str, 0);
1948 TERM_FILL(RSTRING_PTR(str), termlen);
1951 rb_enc_associate(str, enc);
1955 rb_str_buf_append(str, orig);
1962#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
1977static inline uintptr_t
1978count_utf8_lead_bytes_with_word(
const uintptr_t *s)
1983 d = (d>>6) | (~d>>7);
1984 d &= NONASCII_MASK >> 7;
1987#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
1989 return rb_popcount_intptr(d);
1993# if SIZEOF_VOIDP == 8
2002enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2008 long diff = (long)(e - p);
2014 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2015 const uintptr_t *s, *t;
2016 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2017 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2018 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2019 while (p < (
const char *)s) {
2020 if (is_utf8_lead_byte(*p))
len++;
2024 len += count_utf8_lead_bytes_with_word(s);
2027 p = (
const char *)s;
2030 if (is_utf8_lead_byte(*p))
len++;
2036 else if (rb_enc_asciicompat(enc)) {
2041 q = search_nonascii(p, e);
2047 p += rb_enc_fast_mbclen(p, e, enc);
2054 q = search_nonascii(p, e);
2060 p += rb_enc_mbclen(p, e, enc);
2067 for (c=0; p<e; c++) {
2068 p += rb_enc_mbclen(p, e, enc);
2083rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2091 long diff = (long)(e - p);
2094 else if (rb_enc_asciicompat(enc)) {
2098 q = search_nonascii(p, e);
2106 ret = rb_enc_precise_mbclen(p, e, enc);
2121 for (c=0; p<e; c++) {
2122 ret = rb_enc_precise_mbclen(p, e, enc);
2146 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2147 if (!enc) enc = STR_ENC_GET(str);
2148 p = RSTRING_PTR(str);
2149 e = RSTRING_END(str);
2153 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2158 return enc_strlen(p, e, enc, cr);
2165 return str_strlen(str, NULL);
2179 return LONG2NUM(str_strlen(str, NULL));
2191rb_str_bytesize(
VALUE str)
2209rb_str_empty(
VALUE str)
2211 return RBOOL(RSTRING_LEN(str) == 0);
2229 char *ptr1, *ptr2, *ptr3;
2234 enc = rb_enc_check_str(str1, str2);
2238 if (len1 > LONG_MAX - len2) {
2239 rb_raise(rb_eArgError,
"string size too big");
2241 str3 = str_new0(
rb_cString, 0, len1+len2, termlen);
2242 ptr3 = RSTRING_PTR(str3);
2243 memcpy(ptr3, ptr1, len1);
2244 memcpy(ptr3+len1, ptr2, len2);
2245 TERM_FILL(&ptr3[len1+len2], termlen);
2261 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2264 int enc1 = rb_enc_get_index(str1);
2265 int enc2 = rb_enc_get_index(str2);
2270 else if (enc2 < 0) {
2273 else if (enc1 != enc2) {
2276 else if (len1 > LONG_MAX - len2) {
2309 rb_enc_copy(str2, str);
2314 rb_raise(rb_eArgError,
"negative argument");
2316 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2317 if (STR_EMBEDDABLE_P(
len, 1)) {
2319 memset(RSTRING_PTR(str2), 0,
len + 1);
2326 STR_SET_LEN(str2,
len);
2327 rb_enc_copy(str2, str);
2330 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2331 rb_raise(rb_eArgError,
"argument too big");
2334 len *= RSTRING_LEN(str);
2335 termlen = TERM_LEN(str);
2337 ptr2 = RSTRING_PTR(str2);
2339 n = RSTRING_LEN(str);
2340 memcpy(ptr2, RSTRING_PTR(str), n);
2341 while (n <=
len/2) {
2342 memcpy(ptr2 + n, ptr2, n);
2345 memcpy(ptr2 + n, ptr2,
len-n);
2347 STR_SET_LEN(str2,
len);
2348 TERM_FILL(&ptr2[
len], termlen);
2349 rb_enc_cr_str_copy_for_substr(str2, str);
2375 VALUE tmp = rb_check_array_type(arg);
2384rb_check_lockedtmp(
VALUE str)
2386 if (
FL_TEST(str, STR_TMPLOCK)) {
2392str_modifiable(
VALUE str)
2394 rb_check_lockedtmp(str);
2399str_dependent_p(
VALUE str)
2401 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2410str_independent(
VALUE str)
2412 str_modifiable(str);
2413 return !str_dependent_p(str);
2417str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2425 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2430 STR_SET_LEN(str,
len);
2435 oldptr = RSTRING_PTR(str);
2437 memcpy(
ptr, oldptr,
len);
2439 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2442 STR_SET_NOEMBED(str);
2443 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2444 TERM_FILL(
ptr +
len, termlen);
2446 STR_SET_LEN(str,
len);
2453 if (!str_independent(str))
2454 str_make_independent(str);
2461 int termlen = TERM_LEN(str);
2462 long len = RSTRING_LEN(str);
2465 rb_raise(rb_eArgError,
"negative expanding string size");
2467 if (expand >= LONG_MAX -
len) {
2468 rb_raise(rb_eArgError,
"string size too big");
2471 if (!str_independent(str)) {
2472 str_make_independent_expand(str,
len, expand, termlen);
2474 else if (expand > 0) {
2475 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2482str_modify_keep_cr(
VALUE str)
2484 if (!str_independent(str))
2485 str_make_independent(str);
2492str_discard(
VALUE str)
2494 str_modifiable(str);
2495 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2496 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2497 RSTRING(str)->as.heap.ptr = 0;
2498 STR_SET_LEN(str, 0);
2509 if (!rb_enc_asciicompat(enc)) {
2529 return RSTRING_PTR(str);
2533zero_filled(
const char *s,
int n)
2535 for (; n > 0; --n) {
2542str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2544 const char *e = s +
len;
2546 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2547 if (zero_filled(s, minlen))
return s;
2553str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2558 if (str_dependent_p(str)) {
2559 if (!zero_filled(s +
len, termlen))
2560 str_make_independent_expand(str,
len, 0L, termlen);
2563 TERM_FILL(s +
len, termlen);
2566 return RSTRING_PTR(str);
2570rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2572 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2573 long len = RSTRING_LEN(str);
2577 rb_check_lockedtmp(str);
2578 str_make_independent_expand(str,
len, 0L, termlen);
2580 else if (str_dependent_p(str)) {
2581 if (termlen > oldtermlen)
2582 str_make_independent_expand(str,
len, 0L, termlen);
2585 if (!STR_EMBED_P(str)) {
2587 assert(!
FL_TEST((str), STR_SHARED));
2590 if (termlen > oldtermlen) {
2591 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2599str_null_check(
VALUE str,
int *w)
2601 char *s = RSTRING_PTR(str);
2602 long len = RSTRING_LEN(str);
2608 if (str_null_char(s,
len, minlen, enc)) {
2611 return str_fill_term(str, s,
len, minlen);
2614 if (!s || memchr(s, 0,
len)) {
2618 s = str_fill_term(str, s,
len, minlen);
2624rb_str_to_cstr(
VALUE str)
2627 return str_null_check(str, &w);
2635 char *s = str_null_check(str, &w);
2638 rb_raise(rb_eArgError,
"string contains null char");
2640 rb_raise(rb_eArgError,
"string contains null byte");
2646rb_str_fill_terminator(
VALUE str,
const int newminlen)
2648 char *s = RSTRING_PTR(str);
2649 long len = RSTRING_LEN(str);
2650 return str_fill_term(str, s,
len, newminlen);
2656 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2680str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2689 else if (rb_enc_asciicompat(enc)) {
2690 const char *p2, *e2;
2693 while (p < e && 0 < nth) {
2700 p2 = search_nonascii(p, e2);
2709 n = rb_enc_mbclen(p, e, enc);
2720 while (p < e && nth--) {
2721 p += rb_enc_mbclen(p, e, enc);
2732 return str_nth_len(p, e, &nth, enc);
2736str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2741 p = str_nth_len(p, e, &nth, enc);
2750str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2752 const char *pp = str_nth(p, e, nth, enc, singlebyte);
2753 if (!pp)
return e - p;
2760 return str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2761 STR_ENC_GET(str), single_byte_optimizable(str));
2766str_utf8_nth(
const char *p,
const char *e,
long *nthp)
2769 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
2770 const uintptr_t *s, *t;
2771 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
2772 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2773 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2774 while (p < (
const char *)s) {
2775 if (is_utf8_lead_byte(*p)) nth--;
2779 nth -= count_utf8_lead_bytes_with_word(s);
2781 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
2785 if (is_utf8_lead_byte(*p)) {
2786 if (nth == 0)
break;
2796str_utf8_offset(
const char *p,
const char *e,
long nth)
2798 const char *pp = str_utf8_nth(p, e, &nth);
2807 if (single_byte_optimizable(str) || pos < 0)
2810 char *p = RSTRING_PTR(str);
2811 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
2816str_subseq(
VALUE str,
long beg,
long len)
2822 assert(beg+
len <= RSTRING_LEN(str));
2824 const int termlen = TERM_LEN(str);
2825 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
2832 if (str_embed_capa(str2) >=
len + termlen) {
2833 char *ptr2 =
RSTRING(str2)->as.embed.ary;
2834 STR_SET_EMBED(str2);
2835 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
2836 TERM_FILL(ptr2+
len, termlen);
2838 STR_SET_LEN(str2,
len);
2842 str_replace_shared(str2, str);
2843 assert(!STR_EMBED_P(str2));
2845 RSTRING(str2)->as.heap.ptr += beg;
2846 if (RSTRING_LEN(str2) >
len) {
2847 STR_SET_LEN(str2,
len);
2857 VALUE str2 = str_subseq(str, beg,
len);
2858 rb_enc_cr_str_copy_for_substr(str2, str);
2867 long blen = RSTRING_LEN(str);
2869 char *p, *s = RSTRING_PTR(str), *e = s + blen;
2871 if (
len < 0)
return 0;
2875 if (single_byte_optimizable(str)) {
2876 if (beg > blen)
return 0;
2879 if (beg < 0)
return 0;
2881 if (
len > blen - beg)
2883 if (
len < 0)
return 0;
2888 if (
len > -beg)
len = -beg;
2891 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
2894 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
2900 slen = str_strlen(str, enc);
2902 if (beg < 0)
return 0;
2904 if (
len == 0)
goto end;
2907 else if (beg > 0 && beg > RSTRING_LEN(str)) {
2911 if (beg > str_strlen(str, enc))
return 0;
2916 enc == rb_utf8_encoding()) {
2917 p = str_utf8_nth(s, e, &beg);
2918 if (beg > 0)
return 0;
2919 len = str_utf8_offset(p, e,
len);
2925 p = s + beg * char_sz;
2929 else if (
len * char_sz > e - p)
2934 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
2935 if (beg > 0)
return 0;
2939 len = str_offset(p, e,
len, enc, 0);
2947static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
2952 return str_substr(str, beg,
len, TRUE);
2956str_substr(
VALUE str,
long beg,
long len,
int empty)
2960 if (!p)
return Qnil;
2961 if (!
len && !empty)
return Qnil;
2963 beg = p - RSTRING_PTR(str);
2965 VALUE str2 = str_subseq(str, beg,
len);
2966 rb_enc_cr_str_copy_for_substr(str2, str);
2975 rb_str_resize(str, RSTRING_LEN(str));
2976 return rb_obj_freeze(str);
2992 return rb_str_dup(str);
3021str_uminus(
VALUE str)
3024 str = rb_str_dup(str);
3026 return rb_fstring(str);
3030#define rb_str_dup_frozen rb_str_new_frozen
3035 if (
FL_TEST(str, STR_TMPLOCK)) {
3038 FL_SET(str, STR_TMPLOCK);
3045 if (!
FL_TEST(str, STR_TMPLOCK)) {
3052RUBY_FUNC_EXPORTED
VALUE
3063 const int termlen = TERM_LEN(str);
3065 str_modifiable(str);
3066 if (STR_SHARED_P(str)) {
3069 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3070 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3077 else if (
len > RSTRING_LEN(str)) {
3080 const char *
const prev_end = RSTRING_END(str);
3081 const char *
const new_end = RSTRING_PTR(str) +
len;
3091 else if (
len < RSTRING_LEN(str)) {
3099 STR_SET_LEN(str,
len);
3100 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3107 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3110 int independent = str_independent(str);
3111 long slen = RSTRING_LEN(str);
3119 const int termlen = TERM_LEN(str);
3120 if (STR_EMBED_P(str)) {
3121 if (
len == slen)
return str;
3122 if (str_embed_capa(str) >=
len + termlen) {
3123 STR_SET_LEN(str,
len);
3127 str_make_independent_expand(str, slen,
len - slen, termlen);
3129 else if (str_embed_capa(str) >=
len + termlen) {
3130 char *
ptr = STR_HEAP_PTR(str);
3132 if (slen >
len) slen =
len;
3135 STR_SET_LEN(str,
len);
3136 if (independent) ruby_xfree(
ptr);
3139 else if (!independent) {
3140 if (
len == slen)
return str;
3141 str_make_independent_expand(str, slen,
len - slen, termlen);
3145 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3146 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3149 else if (
len == slen)
return str;
3150 STR_SET_LEN(str,
len);
3157str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3160 str_modify_keep_cr(str);
3165 if (
len == 0)
return 0;
3167 long total, olen,
off = -1;
3169 const int termlen = TERM_LEN(str);
3172 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3176 long capa = str_capacity(str, termlen);
3178 if (olen > LONG_MAX -
len) {
3179 rb_raise(rb_eArgError,
"string sizes too big");
3183 if (total >= LONG_MAX / 2) {
3186 while (total >
capa) {
3189 RESIZE_CAPA_TERM(str,
capa, termlen);
3190 sptr = RSTRING_PTR(str);
3195 memcpy(sptr + olen,
ptr,
len);
3196 STR_SET_LEN(str, total);
3197 TERM_FILL(sptr + total, termlen);
3202#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3203#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3208 if (
len == 0)
return str;
3210 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3212 return str_buf_cat(str,
ptr,
len);
3227rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3228 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3237 if (str_encindex == ptr_encindex) {
3239 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3243 str_enc = rb_enc_from_index(str_encindex);
3244 ptr_enc = rb_enc_from_index(ptr_encindex);
3245 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3248 if (RSTRING_LEN(str) == 0) {
3257 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3266 *ptr_cr_ret = ptr_cr;
3268 if (str_encindex != ptr_encindex &&
3271 str_enc = rb_enc_from_index(str_encindex);
3272 ptr_enc = rb_enc_from_index(ptr_encindex);
3277 res_encindex = str_encindex;
3282 res_encindex = str_encindex;
3286 res_encindex = ptr_encindex;
3291 res_encindex = str_encindex;
3298 res_encindex = str_encindex;
3304 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3306 str_buf_cat(str,
ptr,
len);
3312 rb_enc_name(str_enc), rb_enc_name(ptr_enc));
3319 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3329 if (rb_enc_asciicompat(enc)) {
3330 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3336 unsigned int c = (
unsigned char)*
ptr;
3337 int len = rb_enc_codelen(c, enc);
3338 rb_enc_mbcput(c, buf, enc);
3339 rb_enc_cr_str_buf_cat(str, buf,
len,
3352 if (str_enc_fastpath(str)) {
3356 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3362 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3373 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3385 return rb_str_buf_append(str, str2);
3389rb_str_concat_literals(
size_t num,
const VALUE *strary)
3393 unsigned long len = 1;
3398 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3399 str = rb_str_buf_new(
len);
3400 str_enc_copy_direct(str, strary[0]);
3402 for (i = s; i < num; ++i) {
3403 const VALUE v = strary[i];
3406 rb_str_buf_append(str, v);
3407 if (encidx != ENCINDEX_US_ASCII) {
3409 rb_enc_set_index(str, encidx);
3434rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3436 str_modifiable(str);
3441 else if (argc > 1) {
3444 rb_enc_copy(arg_str, str);
3445 for (i = 0; i < argc; i++) {
3448 rb_str_buf_append(str, arg_str);
3480 if (rb_num_to_uint(str2, &code) == 0) {
3493 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
3496 buf[0] = (char)code;
3497 rb_str_cat(str1, buf, 1);
3498 if (encidx != rb_enc_to_index(enc)) {
3499 rb_enc_associate_index(str1, encidx);
3504 long pos = RSTRING_LEN(str1);
3509 switch (
len = rb_enc_codelen(code, enc)) {
3510 case ONIGERR_INVALID_CODE_POINT_VALUE:
3511 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
3513 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
3519 rb_enc_mbcput(code, buf, enc);
3520 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
3521 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
3523 rb_str_resize(str1, pos+
len);
3524 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
3533rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
3535 int encidx = rb_enc_to_index(enc);
3537 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
3542 if (encidx == ENCINDEX_US_ASCII && code > 127) {
3543 return ENCINDEX_ASCII_8BIT;
3566rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
3568 str_modifiable(str);
3573 else if (argc > 1) {
3576 rb_enc_copy(arg_str, str);
3577 for (i = 0; i < argc; i++) {
3589 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
3591 if (e && !is_ascii_string(str)) {
3601 const char *ptr1, *ptr2;
3604 return (len1 != len2 ||
3606 memcmp(ptr1, ptr2, len1) != 0);
3620rb_str_hash_m(
VALUE str)
3626#define lesser(a,b) (((a)>(b))?(b):(a))
3634 if (RSTRING_LEN(str1) == 0)
return TRUE;
3635 if (RSTRING_LEN(str2) == 0)
return TRUE;
3638 if (idx1 == idx2)
return TRUE;
3643 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
3647 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
3657 const char *ptr1, *ptr2;
3660 if (str1 == str2)
return 0;
3663 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
3672 if (len1 > len2)
return 1;
3675 if (retval > 0)
return 1;
3702 if (str1 == str2)
return Qtrue;
3709 return rb_str_eql_internal(str1, str2);
3733 if (str1 == str2)
return Qtrue;
3735 return rb_str_eql_internal(str1, str2);
3766 return rb_invcmp(str1, str2);
3808 return str_casecmp(str1, s);
3816 const char *p1, *p1end, *p2, *p2end;
3818 enc = rb_enc_compatible(str1, str2);
3823 p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1);
3824 p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2);
3825 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
3826 while (p1 < p1end && p2 < p2end) {
3828 unsigned int c1 =
TOLOWER(*p1 & 0xff);
3829 unsigned int c2 =
TOLOWER(*p2 & 0xff);
3831 return INT2FIX(c1 < c2 ? -1 : 1);
3838 while (p1 < p1end && p2 < p2end) {
3839 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
3840 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
3842 if (0 <= c1 && 0 <= c2) {
3846 return INT2FIX(c1 < c2 ? -1 : 1);
3850 l1 = rb_enc_mbclen(p1, p1end, enc);
3851 l2 = rb_enc_mbclen(p2, p2end, enc);
3852 len = l1 < l2 ? l1 : l2;
3853 r = memcmp(p1, p2,
len);
3855 return INT2FIX(r < 0 ? -1 : 1);
3857 return INT2FIX(l1 < l2 ? -1 : 1);
3863 if (RSTRING_LEN(str1) == RSTRING_LEN(str2))
return INT2FIX(0);
3864 if (RSTRING_LEN(str1) > RSTRING_LEN(str2))
return INT2FIX(1);
3898 return str_casecmp_p(str1, s);
3905 VALUE folded_str1, folded_str2;
3906 VALUE fold_opt = sym_fold;
3908 enc = rb_enc_compatible(str1, str2);
3913 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
3914 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
3916 return rb_str_eql(folded_str1, folded_str2);
3920strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
3921 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
3923 const char *search_start = str_ptr;
3924 long pos, search_len = str_len - offset;
3928 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
3929 if (pos < 0)
return pos;
3931 if (t == search_start + pos)
break;
3932 search_len -= t - search_start;
3933 if (search_len <= 0)
return -1;
3934 offset += t - search_start;
3937 return pos + offset;
3941#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
3942#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
3945rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
3947 const char *str_ptr, *str_ptr_end, *sub_ptr;
3948 long str_len, sub_len;
3951 enc = rb_enc_check(str, sub);
3952 if (is_broken_string(sub))
return -1;
3954 str_ptr = RSTRING_PTR(str);
3955 str_ptr_end = RSTRING_END(str);
3956 str_len = RSTRING_LEN(str);
3957 sub_ptr = RSTRING_PTR(sub);
3958 sub_len = RSTRING_LEN(sub);
3960 if (str_len < sub_len)
return -1;
3963 long str_len_char, sub_len_char;
3964 int single_byte = single_byte_optimizable(str);
3965 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
3966 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
3968 offset += str_len_char;
3969 if (offset < 0)
return -1;
3971 if (str_len_char - offset < sub_len_char)
return -1;
3972 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
3975 if (sub_len == 0)
return offset;
3978 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
3992rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
3999 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4000 long slen = str_strlen(str, enc);
4002 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4014 pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
4015 enc, single_byte_optimizable(str));
4026 pos = rb_str_index(str, sub, pos);
4040str_ensure_byte_pos(
VALUE str,
long pos)
4042 const char *s = RSTRING_PTR(str);
4043 const char *e = RSTRING_END(str);
4044 const char *p = s + pos;
4045 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4047 "offset %ld does not land on character boundary", pos);
4093rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4099 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4100 long slen = RSTRING_LEN(str);
4102 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4113 str_ensure_byte_pos(str, pos);
4125 pos = rb_str_byteindex(str, sub, pos);
4126 if (pos >= 0)
return LONG2NUM(pos);
4135 char *hit, *adjusted;
4137 long slen, searchlen;
4140 sbeg = RSTRING_PTR(str);
4141 slen = RSTRING_LEN(sub);
4142 if (slen == 0)
return s - sbeg;
4143 e = RSTRING_END(str);
4144 t = RSTRING_PTR(sub);
4146 searchlen = s - sbeg + 1;
4149 hit = memrchr(sbeg, c, searchlen);
4152 if (hit != adjusted) {
4153 searchlen = adjusted - sbeg;
4156 if (memcmp(hit, t, slen) == 0)
4158 searchlen = adjusted - sbeg;
4159 }
while (searchlen > 0);
4170 sbeg = RSTRING_PTR(str);
4171 e = RSTRING_END(str);
4172 t = RSTRING_PTR(sub);
4173 slen = RSTRING_LEN(sub);
4176 if (memcmp(s, t, slen) == 0) {
4179 if (s <= sbeg)
break;
4180 s = rb_enc_prev_char(sbeg, s, e, enc);
4196 enc = rb_enc_check(str, sub);
4197 if (is_broken_string(sub))
return -1;
4198 singlebyte = single_byte_optimizable(str);
4199 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4200 slen = str_strlen(sub, enc);
4203 if (
len < slen)
return -1;
4204 if (
len - pos < slen) pos =
len - slen;
4205 if (
len == 0)
return pos;
4207 sbeg = RSTRING_PTR(str);
4210 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4216 s = str_nth(sbeg, RSTRING_END(str), pos, enc, singlebyte);
4217 return str_rindex(str, sub, s, enc);
4278rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4283 long pos,
len = str_strlen(str, enc);
4285 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4287 if (pos < 0 && (pos +=
len) < 0) {
4293 if (pos >
len) pos =
len;
4301 pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
4302 enc, single_byte_optimizable(str));
4313 pos = rb_str_rindex(str, sub, pos);
4323rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4329 enc = rb_enc_check(str, sub);
4330 if (is_broken_string(sub))
return -1;
4331 len = RSTRING_LEN(str);
4332 slen = RSTRING_LEN(sub);
4335 if (
len < slen)
return -1;
4336 if (
len - pos < slen) pos =
len - slen;
4337 if (
len == 0)
return pos;
4339 sbeg = RSTRING_PTR(str);
4342 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4349 return str_rindex(str, sub, s, enc);
4414rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4418 long pos,
len = RSTRING_LEN(str);
4420 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4422 if (pos < 0 && (pos +=
len) < 0) {
4428 if (pos >
len) pos =
len;
4434 str_ensure_byte_pos(str, pos);
4446 pos = rb_str_byterindex(str, sub, pos);
4447 if (pos >= 0)
return LONG2NUM(pos);
4483 switch (OBJ_BUILTIN_TYPE(y)) {
4535rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
4542 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
4574rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
4578 re = get_pat(argv[0]);
4579 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
4588static enum neighbor_char
4596 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
4598 return NEIGHBOR_NOT_CHAR;
4602 if (!l)
return NEIGHBOR_NOT_CHAR;
4603 if (l !=
len)
return NEIGHBOR_WRAPPED;
4604 rb_enc_mbcput(c, p, enc);
4605 r = rb_enc_precise_mbclen(p, p +
len, enc);
4607 return NEIGHBOR_NOT_CHAR;
4609 return NEIGHBOR_FOUND;
4612 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
4615 return NEIGHBOR_WRAPPED;
4616 ++((
unsigned char*)p)[i];
4617 l = rb_enc_precise_mbclen(p, p+
len, enc);
4621 return NEIGHBOR_FOUND;
4624 memset(p+l, 0xff,
len-l);
4630 for (len2 =
len-1; 0 < len2; len2--) {
4631 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
4635 memset(p+len2+1, 0xff,
len-(len2+1));
4640static enum neighbor_char
4647 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
4649 return NEIGHBOR_NOT_CHAR;
4652 if (!c)
return NEIGHBOR_NOT_CHAR;
4655 if (!l)
return NEIGHBOR_NOT_CHAR;
4656 if (l !=
len)
return NEIGHBOR_WRAPPED;
4657 rb_enc_mbcput(c, p, enc);
4658 r = rb_enc_precise_mbclen(p, p +
len, enc);
4660 return NEIGHBOR_NOT_CHAR;
4662 return NEIGHBOR_FOUND;
4665 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
4668 return NEIGHBOR_WRAPPED;
4669 --((
unsigned char*)p)[i];
4670 l = rb_enc_precise_mbclen(p, p+
len, enc);
4674 return NEIGHBOR_FOUND;
4677 memset(p+l, 0,
len-l);
4683 for (len2 =
len-1; 0 < len2; len2--) {
4684 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
4688 memset(p+len2+1, 0,
len-(len2+1));
4702static enum neighbor_char
4703enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
4705 enum neighbor_char ret;
4709 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
4713 const int max_gaps = 1;
4717 ctype = ONIGENC_CTYPE_DIGIT;
4719 ctype = ONIGENC_CTYPE_ALPHA;
4721 return NEIGHBOR_NOT_CHAR;
4724 for (
try = 0;
try <= max_gaps; ++
try) {
4725 ret = enc_succ_char(p,
len, enc);
4726 if (ret == NEIGHBOR_FOUND) {
4729 return NEIGHBOR_FOUND;
4736 ret = enc_pred_char(p,
len, enc);
4737 if (ret == NEIGHBOR_FOUND) {
4751 return NEIGHBOR_NOT_CHAR;
4754 if (ctype != ONIGENC_CTYPE_DIGIT) {
4756 return NEIGHBOR_WRAPPED;
4760 enc_succ_char(carry,
len, enc);
4761 return NEIGHBOR_WRAPPED;
4829 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
4830 rb_enc_cr_str_copy_for_substr(str, orig);
4831 return str_succ(str);
4838 char *sbeg, *s, *e, *last_alnum = 0;
4839 int found_alnum = 0;
4841 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
4842 long carry_pos = 0, carry_len = 1;
4843 enum neighbor_char neighbor = NEIGHBOR_FOUND;
4845 slen = RSTRING_LEN(str);
4846 if (slen == 0)
return str;
4848 enc = STR_ENC_GET(str);
4849 sbeg = RSTRING_PTR(str);
4850 s = e = sbeg + slen;
4852 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
4853 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
4859 l = rb_enc_precise_mbclen(s, e, enc);
4860 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4861 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4862 neighbor = enc_succ_alnum_char(s, l, enc, carry);
4864 case NEIGHBOR_NOT_CHAR:
4866 case NEIGHBOR_FOUND:
4868 case NEIGHBOR_WRAPPED:
4873 carry_pos = s - sbeg;
4878 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
4879 enum neighbor_char neighbor;
4880 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
4881 l = rb_enc_precise_mbclen(s, e, enc);
4882 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4883 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4885 neighbor = enc_succ_char(tmp, l, enc);
4887 case NEIGHBOR_FOUND:
4891 case NEIGHBOR_WRAPPED:
4894 case NEIGHBOR_NOT_CHAR:
4897 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
4899 enc_succ_char(s, l, enc);
4901 if (!rb_enc_asciicompat(enc)) {
4902 MEMCPY(carry, s,
char, l);
4905 carry_pos = s - sbeg;
4909 RESIZE_CAPA(str, slen + carry_len);
4910 sbeg = RSTRING_PTR(str);
4911 s = sbeg + carry_pos;
4912 memmove(s + carry_len, s, slen - carry_pos);
4913 memmove(s, carry, carry_len);
4915 STR_SET_LEN(str, slen);
4930rb_str_succ_bang(
VALUE str)
4938all_digits_p(
const char *s,
long len)
4992 VALUE end, exclusive;
4996 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5002 VALUE current, after_end;
5009 enc = rb_enc_check(beg, end);
5010 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5012 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5013 char c = RSTRING_PTR(beg)[0];
5014 char e = RSTRING_PTR(end)[0];
5016 if (c > e || (excl && c == e))
return beg;
5018 if ((*each)(rb_enc_str_new(&c, 1, enc), arg))
break;
5019 if (!excl && c == e)
break;
5021 if (excl && c == e)
break;
5026 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5027 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5028 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5033 b = rb_str_to_inum(beg, 10, FALSE);
5034 e = rb_str_to_inum(end, 10, FALSE);
5041 if (excl && bi == ei)
break;
5042 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5047 ID op = excl ?
'<' : idLE;
5048 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5053 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5054 b = rb_funcallv(b, succ, 0, 0);
5061 if (n > 0 || (excl && n == 0))
return beg;
5063 after_end = rb_funcallv(end, succ, 0, 0);
5068 next = rb_funcallv(current, succ, 0, 0);
5069 if ((*each)(current, arg))
break;
5070 if (
NIL_P(next))
break;
5074 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5089 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5090 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5091 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5093 b = rb_str_to_inum(beg, 10, FALSE);
5099 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5107 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5108 b = rb_funcallv(b, succ, 0, 0);
5114 VALUE next = rb_funcallv(current, succ, 0, 0);
5115 if ((*each)(current, arg))
break;
5118 if (RSTRING_LEN(current) == 0)
5129 if (!
rb_equal(str, *argp))
return 0;
5137 beg = rb_str_new_frozen(beg);
5139 end = rb_str_new_frozen(end);
5143 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5144 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5145 rb_enc_asciicompat(STR_ENC_GET(val))) {
5146 const char *bp = RSTRING_PTR(beg);
5147 const char *ep = RSTRING_PTR(end);
5148 const char *vp = RSTRING_PTR(val);
5149 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5150 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5158 if (b <= v && v < e)
return Qtrue;
5159 return RBOOL(!
RTEST(exclusive) && v == e);
5166 all_digits_p(bp, RSTRING_LEN(beg)) &&
5167 all_digits_p(ep, RSTRING_LEN(end))) {
5172 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5174 return RBOOL(
NIL_P(val));
5196 else if (RB_TYPE_P(indx,
T_REGEXP)) {
5197 return rb_str_subpat(str, indx,
INT2FIX(0));
5199 else if (RB_TYPE_P(indx,
T_STRING)) {
5200 if (rb_str_index(str, indx, 0) != -1)
5206 long beg,
len = str_strlen(str, NULL);
5218 return str_substr(str, idx, 1, FALSE);
5237rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5240 if (RB_TYPE_P(argv[0],
T_REGEXP)) {
5241 return rb_str_subpat(str, argv[0], argv[1]);
5250 return rb_str_aref(str, argv[0]);
5256 char *ptr = RSTRING_PTR(str);
5257 long olen = RSTRING_LEN(str), nlen;
5259 str_modifiable(str);
5260 if (
len > olen)
len = olen;
5262 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5264 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5266 ptr =
RSTRING(str)->as.embed.ary;
5267 memmove(ptr, oldptr +
len, nlen);
5268 if (fl == STR_NOEMBED)
xfree(oldptr);
5271 if (!STR_SHARED_P(str)) {
5273 rb_enc_cr_str_exact_copy(shared, str);
5278 STR_SET_LEN(str, nlen);
5280 if (!SHARABLE_MIDDLE_SUBSTRING) {
5281 TERM_FILL(ptr + nlen, TERM_LEN(str));
5288rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5294 if (beg == 0 && vlen == 0) {
5299 str_modify_keep_cr(str);
5303 RESIZE_CAPA(str, slen + vlen -
len);
5304 sptr = RSTRING_PTR(str);
5313 memmove(sptr + beg + vlen,
5315 slen - (beg +
len));
5317 if (vlen < beg &&
len < 0) {
5321 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5324 STR_SET_LEN(str, slen);
5325 TERM_FILL(&sptr[slen], TERM_LEN(str));
5332 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5341 int singlebyte = single_byte_optimizable(str);
5347 enc = rb_enc_check(str, val);
5348 slen = str_strlen(str, enc);
5350 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5357 assert(beg <= slen);
5358 if (
len > slen - beg) {
5361 p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc, singlebyte);
5362 if (!p) p = RSTRING_END(str);
5363 e = str_nth(p, RSTRING_END(str),
len, enc, singlebyte);
5364 if (!e) e = RSTRING_END(str);
5366 beg = p - RSTRING_PTR(str);
5368 rb_str_update_0(str, beg,
len, val);
5369 rb_enc_associate(str, enc);
5380 long start, end,
len;
5390 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5394 nth += regs->num_regs;
5404 enc = rb_enc_check_str(str, val);
5405 rb_str_update_0(str, start,
len, val);
5406 rb_enc_associate(str, enc);
5414 switch (
TYPE(indx)) {
5416 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5420 beg = rb_str_index(str, indx, 0);
5474rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5477 if (RB_TYPE_P(argv[0],
T_REGEXP)) {
5478 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5486 return rb_str_aset(str, argv[0], argv[1]);
5546rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
5554 str_modify_keep_cr(str);
5562 if ((nth += regs->num_regs) <= 0)
return Qnil;
5564 else if (nth >= regs->num_regs)
return Qnil;
5566 len = END(nth) - beg;
5569 else if (argc == 2) {
5578 beg = p - RSTRING_PTR(str);
5581 else if (RB_TYPE_P(indx,
T_STRING)) {
5582 beg = rb_str_index(str, indx, 0);
5583 if (beg == -1)
return Qnil;
5584 len = RSTRING_LEN(indx);
5596 beg = p - RSTRING_PTR(str);
5605 beg = p - RSTRING_PTR(str);
5609 rb_enc_cr_str_copy_for_substr(result, str);
5617 char *sptr = RSTRING_PTR(str);
5618 long slen = RSTRING_LEN(str);
5619 if (beg +
len > slen)
5623 slen - (beg +
len));
5625 STR_SET_LEN(str, slen);
5626 TERM_FILL(&sptr[slen], TERM_LEN(str));
5637 switch (OBJ_BUILTIN_TYPE(pat)) {
5656get_pat_quoted(
VALUE pat,
int check)
5660 switch (OBJ_BUILTIN_TYPE(pat)) {
5674 if (check && is_broken_string(pat)) {
5675 rb_exc_raise(rb_reg_check_preprocess(pat));
5681rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
5684 pos = rb_str_byteindex(str, pat, pos);
5685 if (set_backref_str) {
5687 str = rb_str_new_frozen_String(str);
5688 rb_backref_set_string(str, pos, RSTRING_LEN(pat));
5697 return rb_reg_search0(pat, str, pos, 0, set_backref_str);
5717rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
5731 hash = rb_check_hash_type(argv[1]);
5737 pat = get_pat_quoted(argv[0], 1);
5739 str_modifiable(str);
5740 beg = rb_pat_search(pat, str, 0, 1);
5754 end0 = beg0 + RSTRING_LEN(pat);
5763 if (iter || !
NIL_P(hash)) {
5764 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
5767 repl = rb_obj_as_string(
rb_yield(match0));
5770 repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
5771 repl = rb_obj_as_string(repl);
5773 str_mod_check(str, p,
len);
5780 enc = rb_enc_compatible(str, repl);
5783 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
5787 rb_enc_name(str_enc),
5788 rb_enc_name(STR_ENC_GET(repl)));
5790 enc = STR_ENC_GET(repl);
5793 rb_enc_associate(str, enc);
5803 rlen = RSTRING_LEN(repl);
5804 len = RSTRING_LEN(str);
5806 RESIZE_CAPA(str,
len + rlen - plen);
5808 p = RSTRING_PTR(str);
5810 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
5812 rp = RSTRING_PTR(repl);
5813 memmove(p + beg0, rp, rlen);
5815 STR_SET_LEN(str,
len);
5816 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
5845 rb_str_sub_bang(argc, argv, str);
5850str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
5853 long beg, beg0, end0;
5854 long offset, blen, slen,
len, last;
5855 enum {STR, ITER, MAP} mode = STR;
5857 int need_backref = -1;
5867 hash = rb_check_hash_type(argv[1]);
5876 rb_error_arity(argc, 1, 2);
5879 pat = get_pat_quoted(argv[0], 1);
5880 beg = rb_pat_search(pat, str, 0, need_backref);
5882 if (bang)
return Qnil;
5887 blen = RSTRING_LEN(str) + 30;
5888 dest = rb_str_buf_new(blen);
5889 sp = RSTRING_PTR(str);
5890 slen = RSTRING_LEN(str);
5892 str_enc = STR_ENC_GET(str);
5893 rb_enc_associate(dest, str_enc);
5901 end0 = beg0 + RSTRING_LEN(pat);
5912 val = rb_obj_as_string(
rb_yield(match0));
5915 val = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
5916 val = rb_obj_as_string(val);
5918 str_mod_check(str, sp, slen);
5923 else if (need_backref) {
5925 if (need_backref < 0) {
5926 need_backref = val != repl;
5933 len = beg0 - offset;
5935 rb_enc_str_buf_cat(dest, cp,
len, str_enc);
5938 rb_str_buf_append(dest, val);
5947 if (RSTRING_LEN(str) <= end0)
break;
5948 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc);
5949 rb_enc_str_buf_cat(dest, RSTRING_PTR(str)+end0,
len, str_enc);
5950 offset = end0 +
len;
5952 cp = RSTRING_PTR(str) + offset;
5953 if (offset > RSTRING_LEN(str))
break;
5954 beg = rb_pat_search(pat, str, offset, need_backref);
5958 if (RSTRING_LEN(str) > offset) {
5959 rb_enc_str_buf_cat(dest, cp, RSTRING_LEN(str) - offset, str_enc);
5961 rb_pat_search(pat, str, last, 1);
5963 str_shared_replace(str, dest);
5991rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
5993 str_modify_keep_cr(str);
5994 return str_gsub(argc, argv, str, 1);
6017 return str_gsub(argc, argv, str, 0);
6035 str_modifiable(str);
6036 if (str == str2)
return str;
6040 return str_replace(str, str2);
6055rb_str_clear(
VALUE str)
6059 STR_SET_LEN(str, 0);
6060 RSTRING_PTR(str)[0] = 0;
6061 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6080rb_str_chr(
VALUE str)
6104 pos += RSTRING_LEN(str);
6105 if (pos < 0 || RSTRING_LEN(str) <= pos)
6108 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6127 long len = RSTRING_LEN(str);
6128 char *ptr, *head, *left = 0;
6132 if (pos < -
len ||
len <= pos)
6139 char byte = (char)(
NUM2INT(w) & 0xFF);
6141 if (!str_independent(str))
6142 str_make_independent(str);
6143 enc = STR_ENC_GET(str);
6144 head = RSTRING_PTR(str);
6146 if (!STR_EMBED_P(str)) {
6153 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6161 width = rb_enc_precise_mbclen(left, head+
len, enc);
6163 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6179str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6181 long n = RSTRING_LEN(str);
6183 if (beg > n ||
len < 0)
return Qnil;
6186 if (beg < 0)
return Qnil;
6191 if (!empty)
return Qnil;
6195 VALUE str2 = str_subseq(str, beg,
len);
6197 str_enc_copy_direct(str2, str);
6199 if (RSTRING_LEN(str2) == 0) {
6200 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6228 long beg,
len = RSTRING_LEN(str);
6236 return str_byte_substr(str, beg,
len, TRUE);
6241 return str_byte_substr(str, idx, 1, FALSE);
6288rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6293 return str_byte_substr(str, beg,
len, TRUE);
6296 return str_byte_aref(str, argv[0]);
6300str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6302 long end, slen = RSTRING_LEN(str);
6305 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6312 assert(*beg <= slen);
6313 if (*
len > slen - *beg) {
6317 str_ensure_byte_pos(str, *beg);
6318 str_ensure_byte_pos(str, end);
6343rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6345 long beg,
len, vbeg, vlen;
6351 if (!(argc == 2 || argc == 3 || argc == 5)) {
6352 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6356 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6357 rb_builtin_class_name(argv[0]));
6364 vlen = RSTRING_LEN(val);
6369 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6370 rb_builtin_class_name(argv[2]));
6382 vlen = RSTRING_LEN(val);
6390 str_check_beg_len(str, &beg, &
len);
6391 str_check_beg_len(val, &vbeg, &vlen);
6392 enc = rb_enc_check(str, val);
6393 str_modify_keep_cr(str);
6394 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6395 rb_enc_associate(str, enc);
6413rb_str_reverse(
VALUE str)
6420 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6421 enc = STR_ENC_GET(str);
6423 s = RSTRING_PTR(str); e = RSTRING_END(str);
6424 p = RSTRING_END(rev);
6427 if (RSTRING_LEN(str) > 1) {
6428 if (single_byte_optimizable(str)) {
6435 int clen = rb_enc_fast_mbclen(s, e, enc);
6443 cr = rb_enc_asciicompat(enc) ?
6446 int clen = rb_enc_mbclen(s, e, enc);
6455 STR_SET_LEN(rev, RSTRING_LEN(str));
6456 str_enc_copy_direct(rev, str);
6476rb_str_reverse_bang(
VALUE str)
6478 if (RSTRING_LEN(str) > 1) {
6479 if (single_byte_optimizable(str)) {
6482 str_modify_keep_cr(str);
6483 s = RSTRING_PTR(str);
6484 e = RSTRING_END(str) - 1;
6492 str_shared_replace(str, rb_str_reverse(str));
6496 str_modify_keep_cr(str);
6521 i = rb_str_index(str, arg, 0);
6523 return RBOOL(i != -1);
6565 rb_raise(rb_eArgError,
"invalid radix %d", base);
6567 return rb_str_to_inum(str, base, FALSE);
6591rb_str_to_f(
VALUE str)
6606rb_str_to_s(
VALUE str)
6618 char s[RUBY_MAX_CHAR_LEN];
6619 int n = rb_enc_codelen(c, enc);
6621 rb_enc_mbcput(c, s, enc);
6622 rb_enc_str_buf_cat(str, s, n, enc);
6626#define CHAR_ESC_LEN 13
6629rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
6631 char buf[CHAR_ESC_LEN + 1];
6639 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
6641 else if (c < 0x10000) {
6642 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
6645 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
6650 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
6653 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
6656 l = (int)strlen(buf);
6662ruby_escaped_char(
int c)
6665 case '\0':
return "\\0";
6666 case '\n':
return "\\n";
6667 case '\r':
return "\\r";
6668 case '\t':
return "\\t";
6669 case '\f':
return "\\f";
6670 case '\013':
return "\\v";
6671 case '\010':
return "\\b";
6672 case '\007':
return "\\a";
6673 case '\033':
return "\\e";
6674 case '\x7f':
return "\\c?";
6680rb_str_escape(
VALUE str)
6684 const char *p = RSTRING_PTR(str);
6685 const char *pend = RSTRING_END(str);
6686 const char *prev = p;
6687 char buf[CHAR_ESC_LEN + 1];
6688 VALUE result = rb_str_buf_new(0);
6689 int unicode_p = rb_enc_unicode_p(enc);
6690 int asciicompat = rb_enc_asciicompat(enc);
6695 int n = rb_enc_precise_mbclen(p, pend, enc);
6697 if (p > prev) str_buf_cat(result, prev, p - prev);
6700 n = (int)(pend - p);
6702 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6703 str_buf_cat(result, buf, strlen(buf));
6711 cc = ruby_escaped_char(c);
6713 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6714 str_buf_cat(result, cc, strlen(cc));
6720 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6721 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6725 if (p > prev) str_buf_cat(result, prev, p - prev);
6749 const char *p, *pend, *prev;
6750 char buf[CHAR_ESC_LEN + 1];
6751 VALUE result = rb_str_buf_new(0);
6752 rb_encoding *resenc = rb_default_internal_encoding();
6753 int unicode_p = rb_enc_unicode_p(enc);
6754 int asciicompat = rb_enc_asciicompat(enc);
6756 if (resenc == NULL) resenc = rb_default_external_encoding();
6757 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
6758 rb_enc_associate(result, resenc);
6759 str_buf_cat2(result,
"\"");
6761 p = RSTRING_PTR(str); pend = RSTRING_END(str);
6767 n = rb_enc_precise_mbclen(p, pend, enc);
6769 if (p > prev) str_buf_cat(result, prev, p - prev);
6772 n = (int)(pend - p);
6774 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6775 str_buf_cat(result, buf, strlen(buf));
6783 if ((asciicompat || unicode_p) &&
6784 (c ==
'"'|| c ==
'\\' ||
6789 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
6790 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6791 str_buf_cat2(result,
"\\");
6792 if (asciicompat || enc == resenc) {
6798 case '\n': cc =
'n';
break;
6799 case '\r': cc =
'r';
break;
6800 case '\t': cc =
't';
break;
6801 case '\f': cc =
'f';
break;
6802 case '\013': cc =
'v';
break;
6803 case '\010': cc =
'b';
break;
6804 case '\007': cc =
'a';
break;
6805 case 033: cc =
'e';
break;
6806 default: cc = 0;
break;
6809 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6812 str_buf_cat(result, buf, 2);
6829 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6830 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6835 if (p > prev) str_buf_cat(result, prev, p - prev);
6836 str_buf_cat2(result,
"\"");
6841#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
6861 int encidx = rb_enc_get_index(str);
6864 const char *p, *pend;
6867 int u8 = (encidx == rb_utf8_encindex());
6868 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
6871 if (!rb_enc_asciicompat(enc)) {
6873 len += strlen(enc->name);
6876 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
6879 unsigned char c = *p++;
6882 case '"':
case '\\':
6883 case '\n':
case '\r':
6884 case '\t':
case '\f':
6885 case '\013':
case '\010':
case '\007':
case '\033':
6890 clen = IS_EVSTR(p, pend) ? 2 : 1;
6898 if (u8 && c > 0x7F) {
6899 int n = rb_enc_precise_mbclen(p-1, pend, enc);
6904 else if (cc <= 0xFFFFF)
6917 if (clen > LONG_MAX -
len) {
6924 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
6925 q = RSTRING_PTR(result); qend = q +
len + 1;
6929 unsigned char c = *p++;
6931 if (c ==
'"' || c ==
'\\') {
6935 else if (c ==
'#') {
6936 if (IS_EVSTR(p, pend)) *q++ =
'\\';
6939 else if (c ==
'\n') {
6943 else if (c ==
'\r') {
6947 else if (c ==
'\t') {
6951 else if (c ==
'\f') {
6955 else if (c ==
'\013') {
6959 else if (c ==
'\010') {
6963 else if (c ==
'\007') {
6967 else if (c ==
'\033') {
6977 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
6982 snprintf(q, qend-q,
"u%04X", cc);
6984 snprintf(q, qend-q,
"u{%X}", cc);
6989 snprintf(q, qend-q,
"x%02X", c);
6995 if (!rb_enc_asciicompat(enc)) {
6996 snprintf(q, qend-q, nonascii_suffix, enc->name);
6997 encidx = rb_ascii8bit_encindex();
7000 rb_enc_associate_index(result, encidx);
7006unescape_ascii(
unsigned int c)
7030undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7032 const char *s = *ss;
7036 unsigned char buf[6];
7043 rb_str_cat(undumped, s, 1);
7054 *buf = unescape_ascii(*s);
7055 rb_str_cat(undumped, (
char *)buf, 1);
7066 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7067 if (*penc != enc_utf8) {
7069 rb_enc_associate(undumped, enc_utf8);
7086 if (hexlen == 0 || hexlen > 6) {
7092 if (0xd800 <= c && c <= 0xdfff) {
7095 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7096 rb_str_cat(undumped, (
char *)buf, codelen);
7105 if (0xd800 <= c && c <= 0xdfff) {
7108 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7109 rb_str_cat(undumped, (
char *)buf, codelen);
7125 rb_str_cat(undumped, (
char *)buf, 1);
7129 rb_str_cat(undumped, s-1, 2);
7136static VALUE rb_str_is_ascii_only_p(
VALUE str);
7154str_undump(
VALUE str)
7156 const char *s = RSTRING_PTR(str);
7157 const char *s_end = RSTRING_END(str);
7159 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7161 bool binary =
false;
7165 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7168 if (!str_null_check(str, &w)) {
7171 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7172 if (*s !=
'"')
goto invalid_format;
7190 static const char force_encoding_suffix[] =
".force_encoding(\"";
7191 static const char dup_suffix[] =
".dup";
7192 const char *encname;
7197 size =
sizeof(dup_suffix) - 1;
7198 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7200 size =
sizeof(force_encoding_suffix) - 1;
7201 if (s_end - s <= size)
goto invalid_format;
7202 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7206 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7210 s = memchr(s,
'"', s_end-s);
7212 if (!s)
goto invalid_format;
7213 if (s_end - s != 2)
goto invalid_format;
7214 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7216 encidx = rb_enc_find_index2(encname, (
long)size);
7220 rb_enc_associate_index(undumped, encidx);
7230 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7233 rb_str_cat(undumped, s++, 1);
7241 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7247 if (rb_enc_dummy_p(enc)) {
7254str_true_enc(
VALUE str)
7257 rb_str_check_dummy_enc(enc);
7261static OnigCaseFoldType
7262check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7267 rb_raise(rb_eArgError,
"too many options");
7268 if (argv[0]==sym_turkic) {
7269 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7271 if (argv[1]==sym_lithuanian)
7272 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7274 rb_raise(rb_eArgError,
"invalid second option");
7277 else if (argv[0]==sym_lithuanian) {
7278 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7280 if (argv[1]==sym_turkic)
7281 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7283 rb_raise(rb_eArgError,
"invalid second option");
7287 rb_raise(rb_eArgError,
"too many options");
7288 else if (argv[0]==sym_ascii)
7289 flags |= ONIGENC_CASE_ASCII_ONLY;
7290 else if (argv[0]==sym_fold) {
7291 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7292 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7294 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7297 rb_raise(rb_eArgError,
"invalid option");
7304 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7310#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7311#ifndef CASEMAP_DEBUG
7312# define CASEMAP_DEBUG 0
7320 OnigUChar space[FLEX_ARY_LEN];
7324mapping_buffer_free(
void *p)
7328 while (current_buffer) {
7329 previous_buffer = current_buffer;
7330 current_buffer = current_buffer->next;
7331 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7337 {0, mapping_buffer_free,},
7338 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7346 const OnigUChar *source_current, *source_end;
7347 int target_length = 0;
7348 VALUE buffer_anchor;
7351 size_t buffer_count = 0;
7352 int buffer_length_or_invalid;
7354 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7356 source_current = (OnigUChar*)RSTRING_PTR(source);
7357 source_end = (OnigUChar*)RSTRING_END(source);
7361 while (source_current < source_end) {
7363 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7364 if (CASEMAP_DEBUG) {
7365 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7368 *pre_buffer = current_buffer;
7369 pre_buffer = ¤t_buffer->next;
7370 current_buffer->next = NULL;
7371 current_buffer->capa =
capa;
7372 buffer_length_or_invalid = enc->case_map(flags,
7373 &source_current, source_end,
7374 current_buffer->space,
7375 current_buffer->space+current_buffer->capa,
7377 if (buffer_length_or_invalid < 0) {
7378 current_buffer =
DATA_PTR(buffer_anchor);
7380 mapping_buffer_free(current_buffer);
7381 rb_raise(rb_eArgError,
"input string invalid");
7383 target_length += current_buffer->used = buffer_length_or_invalid;
7385 if (CASEMAP_DEBUG) {
7386 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7389 if (buffer_count==1) {
7390 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7393 char *target_current;
7396 target_current = RSTRING_PTR(target);
7397 current_buffer =
DATA_PTR(buffer_anchor);
7398 while (current_buffer) {
7399 memcpy(target_current, current_buffer->space, current_buffer->used);
7400 target_current += current_buffer->used;
7401 current_buffer = current_buffer->next;
7404 current_buffer =
DATA_PTR(buffer_anchor);
7406 mapping_buffer_free(current_buffer);
7411 str_enc_copy_direct(target, source);
7420 const OnigUChar *source_current, *source_end;
7421 OnigUChar *target_current, *target_end;
7422 long old_length = RSTRING_LEN(source);
7423 int length_or_invalid;
7425 if (old_length == 0)
return Qnil;
7427 source_current = (OnigUChar*)RSTRING_PTR(source);
7428 source_end = (OnigUChar*)RSTRING_END(source);
7429 if (source == target) {
7430 target_current = (OnigUChar*)source_current;
7431 target_end = (OnigUChar*)source_end;
7434 target_current = (OnigUChar*)RSTRING_PTR(target);
7435 target_end = (OnigUChar*)RSTRING_END(target);
7438 length_or_invalid = onigenc_ascii_only_case_map(flags,
7439 &source_current, source_end,
7440 target_current, target_end, enc);
7441 if (length_or_invalid < 0)
7442 rb_raise(rb_eArgError,
"input string invalid");
7443 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7444 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7445 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7446 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7447 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7450 str_enc_copy(target, source);
7456upcase_single(
VALUE str)
7458 char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
7459 bool modified =
false;
7462 unsigned int c = *(
unsigned char*)s;
7464 if (
'a' <= c && c <=
'z') {
7465 *s =
'A' + (c -
'a');
7493rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7496 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7498 flags = check_case_options(argc, argv, flags);
7499 str_modify_keep_cr(str);
7500 enc = str_true_enc(str);
7501 if (case_option_single_p(flags, enc, str)) {
7502 if (upcase_single(str))
7503 flags |= ONIGENC_CASE_MODIFIED;
7505 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7506 rb_str_ascii_casemap(str, str, &flags, enc);
7508 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7510 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7532rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7535 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7538 flags = check_case_options(argc, argv, flags);
7539 enc = str_true_enc(str);
7540 if (case_option_single_p(flags, enc, str)) {
7541 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7542 str_enc_copy_direct(ret, str);
7545 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7547 rb_str_ascii_casemap(str, ret, &flags, enc);
7550 ret = rb_str_casemap(str, &flags, enc);
7557downcase_single(
VALUE str)
7559 char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
7560 bool modified =
false;
7563 unsigned int c = *(
unsigned char*)s;
7565 if (
'A' <= c && c <=
'Z') {
7566 *s =
'a' + (c -
'A');
7595rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
7598 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7600 flags = check_case_options(argc, argv, flags);
7601 str_modify_keep_cr(str);
7602 enc = str_true_enc(str);
7603 if (case_option_single_p(flags, enc, str)) {
7604 if (downcase_single(str))
7605 flags |= ONIGENC_CASE_MODIFIED;
7607 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7608 rb_str_ascii_casemap(str, str, &flags, enc);
7610 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7612 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7634rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
7637 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7640 flags = check_case_options(argc, argv, flags);
7641 enc = str_true_enc(str);
7642 if (case_option_single_p(flags, enc, str)) {
7643 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7644 str_enc_copy_direct(ret, str);
7645 downcase_single(ret);
7647 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7649 rb_str_ascii_casemap(str, ret, &flags, enc);
7652 ret = rb_str_casemap(str, &flags, enc);
7680rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
7683 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7685 flags = check_case_options(argc, argv, flags);
7686 str_modify_keep_cr(str);
7687 enc = str_true_enc(str);
7688 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
7689 if (flags&ONIGENC_CASE_ASCII_ONLY)
7690 rb_str_ascii_casemap(str, str, &flags, enc);
7692 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7694 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7718rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
7721 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7724 flags = check_case_options(argc, argv, flags);
7725 enc = str_true_enc(str);
7726 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
7727 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7729 rb_str_ascii_casemap(str, ret, &flags, enc);
7732 ret = rb_str_casemap(str, &flags, enc);
7759rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
7762 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7764 flags = check_case_options(argc, argv, flags);
7765 str_modify_keep_cr(str);
7766 enc = str_true_enc(str);
7767 if (flags&ONIGENC_CASE_ASCII_ONLY)
7768 rb_str_ascii_casemap(str, str, &flags, enc);
7770 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7772 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7796rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
7799 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7802 flags = check_case_options(argc, argv, flags);
7803 enc = str_true_enc(str);
7804 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
7805 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7807 rb_str_ascii_casemap(str, ret, &flags, enc);
7810 ret = rb_str_casemap(str, &flags, enc);
7815typedef unsigned char *USTR;
7819 unsigned int now, max;
7831 if (t->p == t->pend)
return -1;
7832 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
7835 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
7837 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
7839 if (t->p < t->pend) {
7840 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
7843 if (t->now < 0x80 && c < 0x80) {
7844 rb_raise(rb_eArgError,
7845 "invalid range \"%c-%c\" in string transliteration",
7849 rb_raise(rb_eArgError,
"invalid range in string transliteration");
7853 else if (t->now < c) {
7862 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
7863 if (t->now == t->max) {
7868 if (t->now < t->max) {
7884 const unsigned int errc = -1;
7885 unsigned int trans[256];
7887 struct tr trsrc, trrepl;
7889 unsigned int c, c0, last = 0;
7890 int modify = 0, i, l;
7891 unsigned char *s, *send;
7893 int singlebyte = single_byte_optimizable(str);
7897#define CHECK_IF_ASCII(c) \
7898 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
7899 (cr = ENC_CODERANGE_VALID) : 0)
7903 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
7904 if (RSTRING_LEN(repl) == 0) {
7905 return rb_str_delete_bang(1, &src, str);
7909 e1 = rb_enc_check(str, src);
7910 e2 = rb_enc_check(str, repl);
7915 enc = rb_enc_check(src, repl);
7917 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
7918 if (RSTRING_LEN(src) > 1 &&
7919 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
7920 trsrc.p + l < trsrc.pend) {
7924 trrepl.p = RSTRING_PTR(repl);
7925 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
7926 trsrc.gen = trrepl.gen = 0;
7927 trsrc.now = trrepl.now = 0;
7928 trsrc.max = trrepl.max = 0;
7931 for (i=0; i<256; i++) {
7934 while ((c = trnext(&trsrc, enc)) != errc) {
7939 if (!hash) hash = rb_hash_new();
7943 while ((c = trnext(&trrepl, enc)) != errc)
7946 for (i=0; i<256; i++) {
7947 if (trans[i] != errc) {
7955 for (i=0; i<256; i++) {
7958 while ((c = trnext(&trsrc, enc)) != errc) {
7959 r = trnext(&trrepl, enc);
7960 if (r == errc) r = trrepl.now;
7963 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
7966 if (!hash) hash = rb_hash_new();
7974 str_modify_keep_cr(str);
7975 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)RSTRING_END(str);
7979 long offset, max = RSTRING_LEN(str);
7980 unsigned int save = -1;
7981 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
7986 c0 = c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, e1);
7987 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
7996 if (cflag) c = last;
7999 else if (cflag) c = errc;
8005 if (c != (
unsigned int)-1) {
8011 tlen = rb_enc_codelen(c, enc);
8017 if (enc != e1) may_modify = 1;
8019 if ((offset = t - buf) + tlen > max) {
8020 size_t MAYBE_UNUSED(old) = max + termlen;
8021 max = offset + tlen + (send - s);
8022 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8025 rb_enc_mbcput(c, t, enc);
8026 if (may_modify && memcmp(s, t, tlen) != 0) {
8032 if (!STR_EMBED_P(str)) {
8033 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8035 TERM_FILL((
char *)t, termlen);
8036 RSTRING(str)->as.heap.ptr = (
char *)buf;
8037 STR_SET_LEN(str, t - buf);
8038 STR_SET_NOEMBED(str);
8039 RSTRING(str)->as.heap.aux.capa = max;
8043 c = (
unsigned char)*s;
8044 if (trans[c] != errc) {
8061 long offset, max = (long)((send - s) * 1.2);
8062 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8066 c0 = c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, e1);
8067 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8075 if (cflag) c = last;
8078 else if (cflag) c = errc;
8082 c = cflag ? last : errc;
8085 tlen = rb_enc_codelen(c, enc);
8090 if (enc != e1) may_modify = 1;
8092 if ((offset = t - buf) + tlen > max) {
8093 size_t MAYBE_UNUSED(old) = max + termlen;
8094 max = offset + tlen + (long)((send - s) * 1.2);
8095 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8099 rb_enc_mbcput(c, t, enc);
8100 if (may_modify && memcmp(s, t, tlen) != 0) {
8108 if (!STR_EMBED_P(str)) {
8109 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8111 TERM_FILL((
char *)t, termlen);
8112 RSTRING(str)->as.heap.ptr = (
char *)buf;
8113 STR_SET_LEN(str, t - buf);
8114 STR_SET_NOEMBED(str);
8115 RSTRING(str)->as.heap.aux.capa = max;
8121 rb_enc_associate(str, enc);
8140 return tr_trans(str, src, repl, 0);
8187 tr_trans(str, src, repl, 0);
8191#define TR_TABLE_MAX (UCHAR_MAX+1)
8192#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8194tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8197 const unsigned int errc = -1;
8198 char buf[TR_TABLE_MAX];
8201 VALUE table = 0, ptable = 0;
8202 int i, l, cflag = 0;
8204 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8205 tr.gen =
tr.now =
tr.max = 0;
8207 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8212 for (i=0; i<TR_TABLE_MAX; i++) {
8215 stable[TR_TABLE_MAX] = cflag;
8217 else if (stable[TR_TABLE_MAX] && !cflag) {
8218 stable[TR_TABLE_MAX] = 0;
8220 for (i=0; i<TR_TABLE_MAX; i++) {
8224 while ((c = trnext(&
tr, enc)) != errc) {
8225 if (c < TR_TABLE_MAX) {
8226 buf[(
unsigned char)c] = !cflag;
8231 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8234 table = ptable ? ptable : rb_hash_new();
8238 table = rb_hash_new();
8243 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8244 rb_hash_aset(table, key,
Qtrue);
8248 for (i=0; i<TR_TABLE_MAX; i++) {
8249 stable[i] = stable[i] && buf[i];
8251 if (!table && !cflag) {
8258tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8260 if (c < TR_TABLE_MAX) {
8261 return table[c] != 0;
8267 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8268 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8272 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8275 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8289rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8291 char squeez[TR_TABLE_SIZE];
8294 VALUE del = 0, nodel = 0;
8296 int i, ascompat, cr;
8298 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8300 for (i=0; i<argc; i++) {
8304 enc = rb_enc_check(str, s);
8305 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8308 str_modify_keep_cr(str);
8309 ascompat = rb_enc_asciicompat(enc);
8310 s = t = RSTRING_PTR(str);
8311 send = RSTRING_END(str);
8317 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8328 c = rb_enc_codepoint_len(s, send, &clen, enc);
8330 if (tr_find(c, squeez, del, nodel)) {
8334 if (t != s) rb_enc_mbcput(c, t, enc);
8341 TERM_FILL(t, TERM_LEN(str));
8342 STR_SET_LEN(str, t - RSTRING_PTR(str));
8345 if (modify)
return str;
8365rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8368 rb_str_delete_bang(argc, argv, str);
8382rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8384 char squeez[TR_TABLE_SIZE];
8386 VALUE del = 0, nodel = 0;
8387 unsigned char *s, *send, *t;
8389 int ascompat, singlebyte = single_byte_optimizable(str);
8393 enc = STR_ENC_GET(str);
8396 for (i=0; i<argc; i++) {
8400 enc = rb_enc_check(str, s);
8401 if (singlebyte && !single_byte_optimizable(s))
8403 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8407 str_modify_keep_cr(str);
8408 s = t = (
unsigned char *)RSTRING_PTR(str);
8409 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8410 send = (
unsigned char *)RSTRING_END(str);
8412 ascompat = rb_enc_asciicompat(enc);
8416 unsigned int c = *s++;
8417 if (c != save || (argc > 0 && !squeez[c])) {
8427 if (ascompat && (c = *s) < 0x80) {
8428 if (c != save || (argc > 0 && !squeez[c])) {
8434 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8436 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8437 if (t != s) rb_enc_mbcput(c, t, enc);
8446 TERM_FILL((
char *)t, TERM_LEN(str));
8447 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8448 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8452 if (modify)
return str;
8475rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8478 rb_str_squeeze_bang(argc, argv, str);
8496 return tr_trans(str, src, repl, 1);
8519 tr_trans(str, src, repl, 1);
8548rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8550 char table[TR_TABLE_SIZE];
8552 VALUE del = 0, nodel = 0, tstr;
8562 enc = rb_enc_check(str, tstr);
8565 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
8566 (ptstr = RSTRING_PTR(tstr),
8567 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
8568 !is_broken_string(str)) {
8570 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
8572 s = RSTRING_PTR(str);
8573 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8574 send = RSTRING_END(str);
8576 if (*(
unsigned char*)s++ == c) n++;
8582 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
8583 for (i=1; i<argc; i++) {
8586 enc = rb_enc_check(str, tstr);
8587 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
8590 s = RSTRING_PTR(str);
8591 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8592 send = RSTRING_END(str);
8593 ascompat = rb_enc_asciicompat(enc);
8597 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8605 c = rb_enc_codepoint_len(s, send, &clen, enc);
8606 if (tr_find(c, table, del, nodel)) {
8617rb_fs_check(
VALUE val)
8621 if (
NIL_P(val))
return 0;
8626static const char isspacetable[256] = {
8627 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8629 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8631 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8633 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8635 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8636 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8639 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8640 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8641 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8642 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
8645#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
8648split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
8650 if (empty_count >= 0 &&
len == 0) {
8651 return empty_count + 1;
8653 if (empty_count > 0) {
8657 rb_ary_push(result, str_new_empty_String(str));
8658 }
while (--empty_count > 0);
8662 rb_yield(str_new_empty_String(str));
8663 }
while (--empty_count > 0);
8666 str = rb_str_subseq(str, beg,
len);
8668 rb_ary_push(result, str);
8677 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
8681literal_split_pattern(
VALUE spat, split_type_t default_type)
8689 return SPLIT_TYPE_CHARS;
8691 else if (rb_enc_asciicompat(enc)) {
8692 if (
len == 1 && ptr[0] ==
' ') {
8693 return SPLIT_TYPE_AWK;
8698 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
8699 return SPLIT_TYPE_AWK;
8702 return default_type;
8715rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
8720 split_type_t split_type;
8721 long beg, end, i = 0, empty_count = -1;
8726 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
8728 if (lim <= 0) limit =
Qnil;
8729 else if (lim == 1) {
8730 if (RSTRING_LEN(str) == 0)
8741 if (
NIL_P(limit) && !lim) empty_count = 0;
8743 enc = STR_ENC_GET(str);
8744 split_type = SPLIT_TYPE_REGEXP;
8746 spat = get_pat_quoted(spat, 0);
8748 else if (
NIL_P(spat = rb_fs)) {
8749 split_type = SPLIT_TYPE_AWK;
8751 else if (!(spat = rb_fs_check(spat))) {
8752 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
8757 if (split_type != SPLIT_TYPE_AWK) {
8762 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
8763 if (split_type == SPLIT_TYPE_AWK) {
8765 split_type = SPLIT_TYPE_STRING;
8770 mustnot_broken(spat);
8771 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
8779#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
8782 char *ptr = RSTRING_PTR(str);
8783 char *eptr = RSTRING_END(str);
8784 if (split_type == SPLIT_TYPE_AWK) {
8789 if (result) result = rb_ary_new();
8791 if (is_ascii_string(str)) {
8792 while (ptr < eptr) {
8793 c = (
unsigned char)*ptr++;
8795 if (ascii_isspace(c)) {
8801 if (!
NIL_P(limit) && lim <= i)
break;
8804 else if (ascii_isspace(c)) {
8805 SPLIT_STR(beg, end-beg);
8808 if (!
NIL_P(limit)) ++i;
8816 while (ptr < eptr) {
8819 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
8828 if (!
NIL_P(limit) && lim <= i)
break;
8832 SPLIT_STR(beg, end-beg);
8835 if (!
NIL_P(limit)) ++i;
8843 else if (split_type == SPLIT_TYPE_STRING) {
8844 char *str_start = ptr;
8845 char *substr_start = ptr;
8846 char *sptr = RSTRING_PTR(spat);
8847 long slen = RSTRING_LEN(spat);
8849 if (result) result = rb_ary_new();
8850 mustnot_broken(str);
8851 enc = rb_enc_check(str, spat);
8852 while (ptr < eptr &&
8853 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
8856 if (t != ptr + end) {
8860 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
8863 if (!
NIL_P(limit) && lim <= ++i)
break;
8865 beg = ptr - str_start;
8867 else if (split_type == SPLIT_TYPE_CHARS) {
8868 char *str_start = ptr;
8871 if (result) result = rb_ary_new_capa(RSTRING_LEN(str));
8872 mustnot_broken(str);
8873 enc = rb_enc_get(str);
8874 while (ptr < eptr &&
8875 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
8876 SPLIT_STR(ptr - str_start, n);
8878 if (!
NIL_P(limit) && lim <= ++i)
break;
8880 beg = ptr - str_start;
8883 if (result) result = rb_ary_new();
8884 long len = RSTRING_LEN(str);
8892 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
8897 if (start == end && BEG(0) == END(0)) {
8902 else if (last_null == 1) {
8903 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
8910 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
8916 SPLIT_STR(beg, end-beg);
8917 beg = start = END(0);
8921 for (idx=1; idx < regs->num_regs; idx++) {
8922 if (BEG(idx) == -1)
continue;
8923 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
8925 if (!
NIL_P(limit) && lim <= ++i)
break;
8927 if (match) rb_match_unbusy(match);
8929 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
8930 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
8933 return result ? result : str;
8943 return rb_str_split_m(1, &sep, str);
8946#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
8952 rb_ary_push(ary, e);
8961#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
8964chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
8966 const char *prev = rb_enc_prev_char(p, e, e, enc);
8969 prev = rb_enc_prev_char(p, e, e, enc);
8970 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
8982 RSTRING_LEN(rs) != 1 ||
8983 RSTRING_PTR(rs)[0] !=
'\n')) {
8989#define rb_rs get_rs()
8996 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
8997 long pos,
len, rslen;
9003 static ID keywords[1];
9008 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9012 if (!ENUM_ELEM(ary, str)) {
9020 if (!RSTRING_LEN(str))
goto end;
9021 str = rb_str_new_frozen(str);
9022 ptr = subptr = RSTRING_PTR(str);
9023 pend = RSTRING_END(str);
9024 len = RSTRING_LEN(str);
9026 rslen = RSTRING_LEN(rs);
9028 if (rs == rb_default_rs)
9029 enc = rb_enc_get(str);
9031 enc = rb_enc_check(str, rs);
9036 const char *eol = NULL;
9038 while (subend < pend) {
9039 long chomp_rslen = 0;
9041 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9043 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9045 if (eol == subend)
break;
9049 chomp_rslen = -rslen;
9053 if (!subptr) subptr = subend;
9057 }
while (subend < pend);
9059 if (rslen == 0) chomp_rslen = 0;
9060 line = rb_str_subseq(str, subptr - ptr,
9061 subend - subptr + (chomp ? chomp_rslen : rslen));
9062 if (ENUM_ELEM(ary, line)) {
9063 str_mod_check(str, ptr,
len);
9065 subptr = eol = NULL;
9070 rsptr = RSTRING_PTR(rs);
9077 if ((rs == rb_default_rs) && !rb_enc_asciicompat(enc)) {
9080 rsptr = RSTRING_PTR(rs);
9081 rslen = RSTRING_LEN(rs);
9084 while (subptr < pend) {
9085 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9089 if (hit != adjusted) {
9093 subend = hit += rslen;
9096 subend = chomp_newline(subptr, subend, enc);
9102 line = rb_str_subseq(str, subptr - ptr, subend - subptr);
9103 if (ENUM_ELEM(ary, line)) {
9104 str_mod_check(str, ptr,
len);
9109 if (subptr != pend) {
9112 pend = chomp_newline(subptr, pend, enc);
9114 else if (pend - subptr >= rslen &&
9115 memcmp(pend - rslen, rsptr, rslen) == 0) {
9119 line = rb_str_subseq(str, subptr - ptr, pend - subptr);
9120 ENUM_ELEM(ary, line);
9141rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9144 return rb_str_enumerate_lines(argc, argv, str, 0);
9157rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9159 VALUE ary = WANTARRAY(
"lines", 0);
9160 return rb_str_enumerate_lines(argc, argv, str, ary);
9174 for (i=0; i<RSTRING_LEN(str); i++) {
9175 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9193rb_str_each_byte(
VALUE str)
9196 return rb_str_enumerate_bytes(str, 0);
9208rb_str_bytes(
VALUE str)
9210 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9211 return rb_str_enumerate_bytes(str, ary);
9228 str = rb_str_new_frozen(str);
9229 ptr = RSTRING_PTR(str);
9230 len = RSTRING_LEN(str);
9231 enc = rb_enc_get(str);
9234 for (i = 0; i <
len; i += n) {
9235 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9236 ENUM_ELEM(ary, rb_str_subseq(str, i, n));
9240 for (i = 0; i <
len; i += n) {
9241 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9242 ENUM_ELEM(ary, rb_str_subseq(str, i, n));
9262rb_str_each_char(
VALUE str)
9265 return rb_str_enumerate_chars(str, 0);
9277rb_str_chars(
VALUE str)
9280 return rb_str_enumerate_chars(str, ary);
9284rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9289 const char *ptr, *end;
9292 if (single_byte_optimizable(str))
9293 return rb_str_enumerate_bytes(str, ary);
9295 str = rb_str_new_frozen(str);
9296 ptr = RSTRING_PTR(str);
9297 end = RSTRING_END(str);
9298 enc = STR_ENC_GET(str);
9301 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9322rb_str_each_codepoint(
VALUE str)
9325 return rb_str_enumerate_codepoints(str, 0);
9337rb_str_codepoints(
VALUE str)
9340 return rb_str_enumerate_codepoints(str, ary);
9346 int encidx = rb_enc_to_index(enc);
9347 regex_t *reg_grapheme_cluster = NULL;
9348 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9351 if (encidx == rb_utf8_encindex() && reg_grapheme_cluster_utf8) {
9352 reg_grapheme_cluster = reg_grapheme_cluster_utf8;
9354 if (!reg_grapheme_cluster) {
9355 const OnigUChar source_ascii[] =
"\\X";
9357 const OnigUChar *source = source_ascii;
9358 size_t source_len =
sizeof(source_ascii) - 1;
9360#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9361#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9362#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9363#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9364#define CASE_UTF(e) \
9365 case ENCINDEX_UTF_##e: { \
9366 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9367 source = source_UTF_##e; \
9368 source_len = sizeof(source_UTF_##e); \
9371 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9378 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9379 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9381 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9382 onig_error_code_to_str(message, r, &einfo);
9383 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9385 if (encidx == rb_utf8_encindex()) {
9386 reg_grapheme_cluster_utf8 = reg_grapheme_cluster;
9389 return reg_grapheme_cluster;
9395 size_t grapheme_cluster_count = 0;
9396 regex_t *reg_grapheme_cluster = NULL;
9398 const char *ptr, *end;
9400 if (!rb_enc_unicode_p(enc)) {
9404 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9405 ptr = RSTRING_PTR(str);
9406 end = RSTRING_END(str);
9409 OnigPosition
len = onig_match(reg_grapheme_cluster,
9410 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9411 (
const OnigUChar *)ptr, NULL, 0);
9412 if (
len <= 0)
break;
9413 grapheme_cluster_count++;
9417 return SIZET2NUM(grapheme_cluster_count);
9421rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9424 regex_t *reg_grapheme_cluster = NULL;
9426 const char *ptr0, *ptr, *end;
9428 if (!rb_enc_unicode_p(enc)) {
9429 return rb_str_enumerate_chars(str, ary);
9432 if (!ary) str = rb_str_new_frozen(str);
9433 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9434 ptr0 = ptr = RSTRING_PTR(str);
9435 end = RSTRING_END(str);
9438 OnigPosition
len = onig_match(reg_grapheme_cluster,
9439 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9440 (
const OnigUChar *)ptr, NULL, 0);
9441 if (
len <= 0)
break;
9442 ENUM_ELEM(ary, rb_str_subseq(str, ptr-ptr0,
len));
9462rb_str_each_grapheme_cluster(
VALUE str)
9465 return rb_str_enumerate_grapheme_clusters(str, 0);
9477rb_str_grapheme_clusters(
VALUE str)
9480 return rb_str_enumerate_grapheme_clusters(str, ary);
9484chopped_length(
VALUE str)
9487 const char *p, *p2, *beg, *end;
9489 beg = RSTRING_PTR(str);
9490 end = beg + RSTRING_LEN(str);
9491 if (beg >= end)
return 0;
9492 p = rb_enc_prev_char(beg, end, end, enc);
9494 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
9495 p2 = rb_enc_prev_char(beg, p, end, enc);
9496 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
9512rb_str_chop_bang(
VALUE str)
9514 str_modify_keep_cr(str);
9515 if (RSTRING_LEN(str) > 0) {
9517 len = chopped_length(str);
9518 STR_SET_LEN(str,
len);
9519 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
9538rb_str_chop(
VALUE str)
9540 return rb_str_subseq(str, 0, chopped_length(str));
9544smart_chomp(
VALUE str,
const char *e,
const char *p)
9555 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
9563 if (--e > p && *(e-1) ==
'\r') {
9580 char *pp, *e, *rsptr;
9582 char *
const p = RSTRING_PTR(str);
9583 long len = RSTRING_LEN(str);
9585 if (
len == 0)
return 0;
9587 if (rs == rb_default_rs) {
9588 return smart_chomp(str, e, p);
9591 enc = rb_enc_get(str);
9602 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
9609 while (e > p && *(e-1) ==
'\n') {
9611 if (e > p && *(e-1) ==
'\r')
9617 if (rslen >
len)
return len;
9619 enc = rb_enc_get(rs);
9620 newline = rsptr[rslen-1];
9623 if (newline ==
'\n')
9624 return smart_chomp(str, e, p);
9628 return smart_chomp(str, e, p);
9632 enc = rb_enc_check(str, rs);
9633 if (is_broken_string(rs)) {
9637 if (p[
len-1] == newline &&
9639 memcmp(rsptr, pp, rslen) == 0)) {
9640 if (at_char_boundary(p, pp, e, enc))
9653chomp_rs(
int argc,
const VALUE *argv)
9669 long olen = RSTRING_LEN(str);
9670 long len = chompped_length(str, rs);
9672 str_modify_keep_cr(str);
9673 STR_SET_LEN(str,
len);
9674 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
9691rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
9694 str_modifiable(str);
9695 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
9696 rs = chomp_rs(argc, argv);
9698 return rb_str_chomp_string(str, rs);
9711rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
9713 VALUE rs = chomp_rs(argc, argv);
9715 return rb_str_subseq(str, 0, chompped_length(str, rs));
9721 const char *
const start = s;
9723 if (!s || s >= e)
return 0;
9726 if (single_byte_optimizable(str)) {
9727 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
9732 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
9752rb_str_lstrip_bang(
VALUE str)
9758 str_modify_keep_cr(str);
9759 enc = STR_ENC_GET(str);
9761 loffset = lstrip_offset(str, start, start+olen, enc);
9763 long len = olen-loffset;
9764 s = start + loffset;
9765 memmove(start, s,
len);
9766 STR_SET_LEN(str,
len);
9790rb_str_lstrip(
VALUE str)
9795 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
9796 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
9797 return rb_str_subseq(str, loffset,
len - loffset);
9805 rb_str_check_dummy_enc(enc);
9809 if (!s || s >= e)
return 0;
9813 if (single_byte_optimizable(str)) {
9815 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
9820 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
9840rb_str_rstrip_bang(
VALUE str)
9846 str_modify_keep_cr(str);
9847 enc = STR_ENC_GET(str);
9849 roffset = rstrip_offset(str, start, start+olen, enc);
9851 long len = olen - roffset;
9853 STR_SET_LEN(str,
len);
9877rb_str_rstrip(
VALUE str)
9883 enc = STR_ENC_GET(str);
9885 roffset = rstrip_offset(str, start, start+olen, enc);
9887 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
9888 return rb_str_subseq(str, 0, olen-roffset);
9903rb_str_strip_bang(
VALUE str)
9906 long olen, loffset, roffset;
9909 str_modify_keep_cr(str);
9910 enc = STR_ENC_GET(str);
9912 loffset = lstrip_offset(str, start, start+olen, enc);
9913 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
9915 if (loffset > 0 || roffset > 0) {
9916 long len = olen-roffset;
9919 memmove(start, start + loffset,
len);
9921 STR_SET_LEN(str,
len);
9945rb_str_strip(
VALUE str)
9948 long olen, loffset, roffset;
9952 loffset = lstrip_offset(str, start, start+olen, enc);
9953 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
9955 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
9956 return rb_str_subseq(str, loffset, olen-loffset-roffset);
9960scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
9963 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
9969 end = pos + RSTRING_LEN(pat);
9983 if (RSTRING_LEN(str) > end)
9984 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
9985 RSTRING_END(str), enc);
9993 if (!regs || regs->num_regs == 1) {
9994 result = rb_str_subseq(str, pos, end - pos);
9999 for (
int i = 1; i < regs->num_regs; i++) {
10002 s = rb_str_subseq(str, BEG(i), END(i)-BEG(i));
10005 rb_ary_push(result, s);
10060 long last = -1, prev = 0;
10061 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10063 pat = get_pat_quoted(pat, 1);
10064 mustnot_broken(str);
10066 VALUE ary = rb_ary_new();
10068 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10071 rb_ary_push(ary, result);
10073 if (last >= 0) rb_pat_search(pat, str, last, 1);
10078 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10082 str_mod_check(str, p,
len);
10084 if (last >= 0) rb_pat_search(pat, str, last, 1);
10108rb_str_hex(
VALUE str)
10110 return rb_str_to_inum(str, 16, FALSE);
10135rb_str_oct(
VALUE str)
10137 return rb_str_to_inum(str, -8, FALSE);
10140#ifndef HAVE_CRYPT_R
10145 rb_nativethread_lock_t lock;
10146} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10149crypt_mutex_initialize(
void)
10220# define CRYPT_END() ALLOCV_END(databuf)
10222 extern char *crypt(
const char *,
const char *);
10223# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10226 const char *s, *saltp;
10229 char salt_8bit_clean[3];
10233 mustnot_wchar(str);
10234 mustnot_wchar(salt);
10236 saltp = RSTRING_PTR(salt);
10237 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10238 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10242 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10243 salt_8bit_clean[0] = saltp[0] & 0x7f;
10244 salt_8bit_clean[1] = saltp[1] & 0x7f;
10245 salt_8bit_clean[2] =
'\0';
10246 saltp = salt_8bit_clean;
10251# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10252 data->initialized = 0;
10254 res = crypt_r(s, saltp, data);
10256 crypt_mutex_initialize();
10258 res = crypt(s, saltp);
10299 char *ptr, *p, *pend;
10302 unsigned long sum0 = 0;
10307 ptr = p = RSTRING_PTR(str);
10308 len = RSTRING_LEN(str);
10314 str_mod_check(str, ptr,
len);
10317 sum0 += (
unsigned char)*p;
10328 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10329 sum0 &= (((
unsigned long)1)<<bits)-1;
10349rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10353 long width,
len, flen = 1, fclen = 1;
10356 const char *f =
" ";
10357 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10359 int singlebyte = 1, cr;
10363 enc = STR_ENC_GET(str);
10368 enc = rb_enc_check(str, pad);
10369 f = RSTRING_PTR(pad);
10370 flen = RSTRING_LEN(pad);
10371 fclen = str_strlen(pad, enc);
10372 singlebyte = single_byte_optimizable(pad);
10373 if (flen == 0 || fclen == 0) {
10374 rb_raise(rb_eArgError,
"zero width padding");
10377 len = str_strlen(str, enc);
10378 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10380 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10384 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10385 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10387 size = RSTRING_LEN(str);
10388 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10389 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10390 (
len += llen2 + rlen2) >= LONG_MAX - size) {
10391 rb_raise(rb_eArgError,
"argument too big");
10395 p = RSTRING_PTR(res);
10397 memset(p, *f, llen);
10401 while (llen >= fclen) {
10407 memcpy(p, f, llen2);
10411 memcpy(p, RSTRING_PTR(str), size);
10414 memset(p, *f, rlen);
10418 while (rlen >= fclen) {
10424 memcpy(p, f, rlen2);
10428 TERM_FILL(p, termlen);
10429 STR_SET_LEN(res, p-RSTRING_PTR(res));
10430 rb_enc_associate(res, enc);
10452rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
10454 return rb_str_justify(argc, argv, str,
'l');
10468rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
10470 return rb_str_justify(argc, argv, str,
'r');
10485rb_str_center(
int argc,
VALUE *argv,
VALUE str)
10487 return rb_str_justify(argc, argv, str,
'c');
10503 sep = get_pat_quoted(sep, 0);
10512 sep = rb_str_subseq(str, pos, END(0) - pos);
10515 pos = rb_str_index(str, sep, 0);
10516 if (pos < 0)
goto failed;
10518 return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
10520 rb_str_subseq(str, pos+RSTRING_LEN(sep),
10521 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
10524 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
10538 long pos = RSTRING_LEN(str);
10540 sep = get_pat_quoted(sep, 0);
10549 sep = rb_str_subseq(str, pos, END(0) - pos);
10553 pos = rb_str_rindex(str, sep, pos);
10559 return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
10561 rb_str_subseq(str, pos+RSTRING_LEN(sep),
10562 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
10564 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
10576rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
10580 for (i=0; i<argc; i++) {
10581 VALUE tmp = argv[i];
10583 if (rb_reg_start_with_p(tmp, str))
10587 const char *p, *s, *e;
10592 enc = rb_enc_check(str, tmp);
10593 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
10594 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
10595 p = RSTRING_PTR(str);
10598 if (!at_char_right_boundary(p, s, e, enc))
10600 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
10616rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
10620 for (i=0; i<argc; i++) {
10621 VALUE tmp = argv[i];
10622 const char *p, *s, *e;
10627 enc = rb_enc_check(str, tmp);
10628 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
10629 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
10630 p = RSTRING_PTR(str);
10633 if (!at_char_boundary(p, s, e, enc))
10635 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
10651deleted_prefix_length(
VALUE str,
VALUE prefix)
10653 const char *strptr, *prefixptr;
10654 long olen, prefixlen;
10659 if (!is_broken_string(prefix) ||
10660 !rb_enc_asciicompat(enc) ||
10661 !rb_enc_asciicompat(rb_enc_get(prefix))) {
10662 enc = rb_enc_check(str, prefix);
10666 prefixlen = RSTRING_LEN(prefix);
10667 if (prefixlen <= 0)
return 0;
10668 olen = RSTRING_LEN(str);
10669 if (olen < prefixlen)
return 0;
10670 strptr = RSTRING_PTR(str);
10671 prefixptr = RSTRING_PTR(prefix);
10672 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
10673 if (is_broken_string(prefix)) {
10674 if (!is_broken_string(str)) {
10678 const char *strend = strptr + olen;
10679 const char *after_prefix = strptr + prefixlen;
10680 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
10700rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
10703 str_modify_keep_cr(str);
10705 prefixlen = deleted_prefix_length(str, prefix);
10706 if (prefixlen <= 0)
return Qnil;
10720rb_str_delete_prefix(
VALUE str,
VALUE prefix)
10724 prefixlen = deleted_prefix_length(str, prefix);
10725 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
10727 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
10740deleted_suffix_length(
VALUE str,
VALUE suffix)
10742 const char *strptr, *suffixptr;
10743 long olen, suffixlen;
10747 if (is_broken_string(suffix))
return 0;
10748 enc = rb_enc_check(str, suffix);
10751 suffixlen = RSTRING_LEN(suffix);
10752 if (suffixlen <= 0)
return 0;
10753 olen = RSTRING_LEN(str);
10754 if (olen < suffixlen)
return 0;
10755 strptr = RSTRING_PTR(str);
10756 suffixptr = RSTRING_PTR(suffix);
10757 const char *strend = strptr + olen;
10758 const char *before_suffix = strend - suffixlen;
10759 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
10760 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
10775rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
10777 long olen, suffixlen,
len;
10778 str_modifiable(str);
10780 suffixlen = deleted_suffix_length(str, suffix);
10781 if (suffixlen <= 0)
return Qnil;
10783 olen = RSTRING_LEN(str);
10784 str_modify_keep_cr(str);
10785 len = olen - suffixlen;
10786 STR_SET_LEN(str,
len);
10787 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10803rb_str_delete_suffix(
VALUE str,
VALUE suffix)
10807 suffixlen = deleted_suffix_length(str, suffix);
10808 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
10810 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
10817 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
10825 val = rb_fs_check(val);
10828 "value of %"PRIsVALUE
" must be String or Regexp",
10832 rb_warn_deprecated(
"`$;'", NULL);
10849 str_modifiable(str);
10852 int idx = rb_enc_to_index(encoding);
10859 rb_enc_associate_index(str, idx);
10883 if (STR_EMBED_P(str)) {
10884 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
10889 str_replace_shared_without_enc(str2, str);
10891 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
10924rb_str_valid_encoding_p(
VALUE str)
10944rb_str_is_ascii_only_p(
VALUE str)
10954 static const char ellipsis[] =
"...";
10955 const long ellipsislen =
sizeof(ellipsis) - 1;
10957 const long blen = RSTRING_LEN(str);
10958 const char *
const p = RSTRING_PTR(str), *e = p + blen;
10959 VALUE estr, ret = 0;
10966 else if (
len <= ellipsislen ||
10968 if (rb_enc_asciicompat(enc)) {
10970 rb_enc_associate(ret, enc);
10977 else if (ret = rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
10978 rb_str_cat(ret, ellipsis, ellipsislen);
10982 rb_enc_from_encoding(enc), 0,
Qnil);
10995 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11001 rb_enc_name(enc), rb_enc_name(e));
11020 if (enc == STR_ENC_GET(str)) {
11025 return enc_str_scrub(enc, str, repl, cr);
11033 const char *rep, *p, *e, *p1, *sp;
11039 rb_raise(rb_eArgError,
"both of block and replacement given");
11046 if (!
NIL_P(repl)) {
11047 repl = str_compat_and_valid(repl, enc);
11050 if (rb_enc_dummy_p(enc)) {
11053 encidx = rb_enc_to_index(enc);
11055#define DEFAULT_REPLACE_CHAR(str) do { \
11056 static const char replace[sizeof(str)-1] = str; \
11057 rep = replace; replen = (int)sizeof(replace); \
11060 slen = RSTRING_LEN(str);
11061 p = RSTRING_PTR(str);
11062 e = RSTRING_END(str);
11066 if (rb_enc_asciicompat(enc)) {
11072 else if (!
NIL_P(repl)) {
11073 rep = RSTRING_PTR(repl);
11074 replen = RSTRING_LEN(repl);
11077 else if (encidx == rb_utf8_encindex()) {
11078 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11082 DEFAULT_REPLACE_CHAR(
"?");
11087 p = search_nonascii(p, e);
11092 int ret = rb_enc_precise_mbclen(p, e, enc);
11106 if (
NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
11111 if (e - p < clen) clen = e - p;
11118 for (; clen > 1; clen--) {
11119 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11130 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11131 str_mod_check(str, sp, slen);
11132 repl = str_compat_and_valid(repl, enc);
11139 p = search_nonascii(p, e);
11154 buf = rb_str_buf_new(RSTRING_LEN(str));
11165 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11166 str_mod_check(str, sp, slen);
11167 repl = str_compat_and_valid(repl, enc);
11180 else if (!
NIL_P(repl)) {
11181 rep = RSTRING_PTR(repl);
11182 replen = RSTRING_LEN(repl);
11184 else if (encidx == ENCINDEX_UTF_16BE) {
11185 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11187 else if (encidx == ENCINDEX_UTF_16LE) {
11188 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11190 else if (encidx == ENCINDEX_UTF_32BE) {
11191 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11193 else if (encidx == ENCINDEX_UTF_32LE) {
11194 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11197 DEFAULT_REPLACE_CHAR(
"?");
11201 int ret = rb_enc_precise_mbclen(p, e, enc);
11211 if (
NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
11214 if (e - p < clen) clen = e - p;
11215 if (clen <= mbminlen * 2) {
11220 for (; clen > mbminlen; clen-=mbminlen) {
11221 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11231 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11232 str_mod_check(str, sp, slen);
11233 repl = str_compat_and_valid(repl, enc);
11248 buf = rb_str_buf_new(RSTRING_LEN(str));
11258 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11259 str_mod_check(str, sp, slen);
11260 repl = str_compat_and_valid(repl, enc);
11296str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11304static ID id_normalize;
11305static ID id_normalized_p;
11306static VALUE mUnicodeNormalize;
11309unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11311 static int UnicodeNormalizeRequired = 0;
11314 if (!UnicodeNormalizeRequired) {
11315 rb_require(
"unicode_normalize/normalize.rb");
11316 UnicodeNormalizeRequired = 1;
11320 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11357rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11359 return unicode_normalize_common(argc, argv, str, id_normalize);
11373rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11375 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11402rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11404 return unicode_normalize_common(argc, argv, str, id_normalized_p);
11536#define sym_equal rb_obj_equal
11539sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
11543 int c = rb_enc_precise_mbclen(s, send, enc);
11555rb_str_symname_p(
VALUE sym)
11560 rb_encoding *resenc = rb_default_internal_encoding();
11562 if (resenc == NULL) resenc = rb_default_external_encoding();
11563 enc = STR_ENC_GET(sym);
11564 ptr = RSTRING_PTR(sym);
11565 len = RSTRING_LEN(sym);
11566 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
11574rb_str_quote_unprintable(
VALUE str)
11582 resenc = rb_default_internal_encoding();
11583 if (resenc == NULL) resenc = rb_default_external_encoding();
11584 enc = STR_ENC_GET(str);
11585 ptr = RSTRING_PTR(str);
11586 len = RSTRING_LEN(str);
11587 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
11588 !sym_printable(ptr, ptr +
len, enc)) {
11589 return rb_str_escape(str);
11595rb_id_quote_unprintable(
ID id)
11597 VALUE str = rb_id2str(
id);
11598 if (!rb_str_symname_p(str)) {
11599 return rb_str_escape(str);
11617sym_inspect(
VALUE sym)
11624 if (!rb_str_symname_p(str)) {
11626 len = RSTRING_LEN(str);
11627 rb_str_resize(str,
len + 1);
11628 dest = RSTRING_PTR(str);
11629 memmove(dest + 1, dest,
len);
11634 VALUE orig_str = str;
11637 str = rb_enc_str_new(0,
len + 1, enc);
11638 dest = RSTRING_PTR(str);
11639 memcpy(dest + 1, ptr,
len);
11665rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
11670 rb_raise(rb_eArgError,
"no receiver given");
11767 return rb_str_match(
rb_sym2str(sym), other);
11782sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
11784 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
11797sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
11799 return rb_str_match_m_p(argc, argv, sym);
11817 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
11828sym_length(
VALUE sym)
11842sym_empty(
VALUE sym)
11860 return rb_str_intern(rb_str_upcase(argc, argv,
rb_sym2str(sym)));
11876sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
11878 return rb_str_intern(rb_str_downcase(argc, argv,
rb_sym2str(sym)));
11892sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
11894 return rb_str_intern(rb_str_capitalize(argc, argv,
rb_sym2str(sym)));
11908sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
11910 return rb_str_intern(rb_str_swapcase(argc, argv,
rb_sym2str(sym)));
11922sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
11924 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
11937sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
11939 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
11951sym_encoding(
VALUE sym)
11957string_for_symbol(
VALUE name)
11976 name = string_for_symbol(name);
11977 return rb_intern_str(name);
11986 name = string_for_symbol(name);
11987 return rb_str_intern(name);
12010 return rb_fstring(str);
12017 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), TRUE);
12029 if (UNLIKELY(rb_enc_autoload_p(enc))) {
12030 rb_enc_autoload(enc);
12034 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), TRUE);
12047 assert(rb_vm_fstring_table());
12048 st_foreach(rb_vm_fstring_table(), fstring_set_class_i,
rb_cString);
12213 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT(expr)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define RUBY_ASSERT_ALWAYS(expr)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_isascii(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isascii(), except it additionally takes an encoding.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define NEWOBJ_OF
Old name of RB_NEWOBJ_OF.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define OBJ_FREEZE_RAW
Old name of RB_OBJ_FREEZE_RAW.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
static int rb_enc_mbminlen(rb_encoding *enc)
Queries the minimum number of bytes that the passed encoding needs to represent a character.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
#define rb_check_frozen
Just another name of rb_check_frozen.
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_str_to_interned_str(VALUE str)
Identical to rb_interned_str(), except it takes a Ruby's string instead of C's.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
union RString::@51::@52::@54 aux
Auxiliary info.
long len
Length of the string, not including terminating NUL character.
struct RString::@51::@53 embed
Embedded contents.
VALUE shared
Parent of the string.
union RString::@51 as
String's specific fields.
char * ptr
Pointer to the contents of the string.
struct RString::@51::@52 heap
Strings that use separated memory region for contents use this pattern.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.