Ruby 3.3.0p0 (2023-12-25 revision 5124f9ac7513eb590c37717337c430cb93caa151)
symbol.c
1/**********************************************************************
2
3 symbol.h -
4
5 $Author$
6 created at: Tue Jul 8 15:49:54 JST 2014
7
8 Copyright (C) 2014 Yukihiro Matsumoto
9
10**********************************************************************/
11
12#include "internal.h"
13#include "internal/error.h"
14#include "internal/gc.h"
15#include "internal/hash.h"
16#include "internal/object.h"
17#include "internal/symbol.h"
18#include "internal/vm.h"
19#include "probes.h"
20#include "ruby/encoding.h"
21#include "ruby/st.h"
22#include "symbol.h"
23#include "vm_sync.h"
24#include "builtin.h"
25
26#if defined(USE_SYMBOL_GC) && !(USE_SYMBOL_GC+0)
27# undef USE_SYMBOL_GC
28# define USE_SYMBOL_GC 0
29#else
30# undef USE_SYMBOL_GC
31# define USE_SYMBOL_GC 1
32#endif
33#if defined(SYMBOL_DEBUG) && (SYMBOL_DEBUG+0)
34# undef SYMBOL_DEBUG
35# define SYMBOL_DEBUG 1
36#else
37# undef SYMBOL_DEBUG
38# define SYMBOL_DEBUG 0
39#endif
40#ifndef CHECK_ID_SERIAL
41# define CHECK_ID_SERIAL SYMBOL_DEBUG
42#endif
43
44#define SYMBOL_PINNED_P(sym) (RSYMBOL(sym)->id&~ID_SCOPE_MASK)
45
46#define STATIC_SYM2ID(sym) RSHIFT((VALUE)(sym), RUBY_SPECIAL_SHIFT)
47
48static ID register_static_symid(ID, const char *, long, rb_encoding *);
49static ID register_static_symid_str(ID, VALUE);
50#define REGISTER_SYMID(id, name) register_static_symid((id), (name), strlen(name), enc)
51#include "id.c"
52
53#define is_identchar(p,e,enc) (ISALNUM((unsigned char)*(p)) || (*(p)) == '_' || !ISASCII(*(p)))
54
55#define op_tbl_count numberof(op_tbl)
56STATIC_ASSERT(op_tbl_name_size, sizeof(op_tbl[0].name) == 3);
57#define op_tbl_len(i) (!op_tbl[i].name[1] ? 1 : !op_tbl[i].name[2] ? 2 : 3)
58
59static void
60Init_op_tbl(void)
61{
62 int i;
63 rb_encoding *const enc = rb_usascii_encoding();
64
65 for (i = '!'; i <= '~'; ++i) {
66 if (!ISALNUM(i) && i != '_') {
67 char c = (char)i;
68 register_static_symid(i, &c, 1, enc);
69 }
70 }
71 for (i = 0; i < op_tbl_count; ++i) {
72 register_static_symid(op_tbl[i].token, op_tbl[i].name, op_tbl_len(i), enc);
73 }
74}
75
76static const int ID_ENTRY_UNIT = 512;
77
78enum id_entry_type {
79 ID_ENTRY_STR,
80 ID_ENTRY_SYM,
81 ID_ENTRY_SIZE
82};
83
84rb_symbols_t ruby_global_symbols = {tNEXT_ID-1};
85
86static const struct st_hash_type symhash = {
89};
90
91void
92Init_sym(void)
93{
94 rb_symbols_t *symbols = &ruby_global_symbols;
95
96 VALUE dsym_fstrs = rb_ident_hash_new();
97 symbols->dsymbol_fstr_hash = dsym_fstrs;
98 rb_gc_register_mark_object(dsym_fstrs);
99 rb_obj_hide(dsym_fstrs);
100
101 symbols->str_sym = st_init_table_with_size(&symhash, 1000);
102 symbols->ids = rb_ary_hidden_new(0);
103 rb_gc_register_mark_object(symbols->ids);
104
105 Init_op_tbl();
106 Init_id();
107}
108
109WARN_UNUSED_RESULT(static VALUE dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding *const enc, const ID type));
110WARN_UNUSED_RESULT(static VALUE dsymbol_check(rb_symbols_t *symbols, const VALUE sym));
111WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str));
112WARN_UNUSED_RESULT(static VALUE lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str));
113WARN_UNUSED_RESULT(static VALUE lookup_str_sym(const VALUE str));
114WARN_UNUSED_RESULT(static VALUE lookup_id_str(ID id));
115WARN_UNUSED_RESULT(static ID intern_str(VALUE str, int mutable));
116
117#define GLOBAL_SYMBOLS_ENTER(symbols) rb_symbols_t *symbols = &ruby_global_symbols; RB_VM_LOCK_ENTER()
118#define GLOBAL_SYMBOLS_LEAVE() RB_VM_LOCK_LEAVE()
119
120ID
121rb_id_attrset(ID id)
122{
123 VALUE str, sym;
124 int scope;
125
126 if (!is_notop_id(id)) {
127 switch (id) {
128 case tAREF: case tASET:
129 return tASET; /* only exception */
130 }
131 rb_name_error(id, "cannot make operator ID :%"PRIsVALUE" attrset",
132 rb_id2str(id));
133 }
134 else {
135 scope = id_type(id);
136 switch (scope) {
137 case ID_LOCAL: case ID_INSTANCE: case ID_GLOBAL:
138 case ID_CONST: case ID_CLASS: case ID_JUNK:
139 break;
140 case ID_ATTRSET:
141 return id;
142 default:
143 {
144 if ((str = lookup_id_str(id)) != 0) {
145 rb_name_error(id, "cannot make unknown type ID %d:%"PRIsVALUE" attrset",
146 scope, str);
147 }
148 else {
149 rb_name_error_str(Qnil, "cannot make unknown type anonymous ID %d:%"PRIxVALUE" attrset",
150 scope, (VALUE)id);
151 }
152 }
153 }
154 }
155
156 /* make new symbol and ID */
157 if (!(str = lookup_id_str(id))) {
158 static const char id_types[][8] = {
159 "local",
160 "instance",
161 "invalid",
162 "global",
163 "attrset",
164 "const",
165 "class",
166 "junk",
167 };
168 rb_name_error(id, "cannot make anonymous %.*s ID %"PRIxVALUE" attrset",
169 (int)sizeof(id_types[0]), id_types[scope], (VALUE)id);
170 }
171 str = rb_str_dup(str);
172 rb_str_cat(str, "=", 1);
173 sym = lookup_str_sym(str);
174 id = sym ? rb_sym2id(sym) : intern_str(str, 1);
175 return id;
176}
177
178static int
179is_special_global_name(const char *m, const char *e, rb_encoding *enc)
180{
181 int mb = 0;
182
183 if (m >= e) return 0;
184 if (is_global_name_punct(*m)) {
185 ++m;
186 }
187 else if (*m == '-') {
188 if (++m >= e) return 0;
189 if (is_identchar(m, e, enc)) {
190 if (!ISASCII(*m)) mb = 1;
191 m += rb_enc_mbclen(m, e, enc);
192 }
193 }
194 else {
195 if (!ISDIGIT(*m)) return 0;
196 do {
197 if (!ISASCII(*m)) mb = 1;
198 ++m;
199 } while (m < e && ISDIGIT(*m));
200 }
201 return m == e ? mb + 1 : 0;
202}
203
204int
205rb_symname_p(const char *name)
206{
207 return rb_enc_symname_p(name, rb_ascii8bit_encoding());
208}
209
210int
211rb_enc_symname_p(const char *name, rb_encoding *enc)
212{
213 return rb_enc_symname2_p(name, strlen(name), enc);
214}
215
216static int
217rb_sym_constant_char_p(const char *name, long nlen, rb_encoding *enc)
218{
219 int c, len;
220 const char *end = name + nlen;
221
222 if (nlen < 1) return FALSE;
223 if (ISASCII(*name)) return ISUPPER(*name);
224 c = rb_enc_precise_mbclen(name, end, enc);
225 if (!MBCLEN_CHARFOUND_P(c)) return FALSE;
227 c = rb_enc_mbc_to_codepoint(name, end, enc);
228 if (rb_enc_isupper(c, enc)) return TRUE;
229 if (rb_enc_islower(c, enc)) return FALSE;
230 if (ONIGENC_IS_UNICODE(enc)) {
231 static int ctype_titlecase = 0;
232 if (!ctype_titlecase) {
233 static const UChar cname[] = "titlecaseletter";
234 static const UChar *const end = cname + sizeof(cname) - 1;
235 ctype_titlecase = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, cname, end);
236 }
237 if (rb_enc_isctype(c, ctype_titlecase, enc)) return TRUE;
238 }
239 else {
240 /* fallback to case-folding */
241 OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
242 const OnigUChar *beg = (const OnigUChar *)name;
243 int r = enc->mbc_case_fold(ONIGENC_CASE_FOLD,
244 &beg, (const OnigUChar *)end,
245 fold, enc);
246 if (r > 0 && (r != len || memcmp(fold, name, r)))
247 return TRUE;
248 }
249 return FALSE;
250}
251
252#define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST))
253#define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET))
254
256 const enum { invalid, stophere, needmore, } kind;
257 const enum ruby_id_types type;
258 const long nread;
259};
260
261#define t struct enc_synmane_type_leading_chars_tag
262
264enc_synmane_type_leading_chars(const char *name, long len, rb_encoding *enc, int allowed_attrset)
265{
266 const char *m = name;
267 const char *e = m + len;
268
269 if (! rb_enc_asciicompat(enc)) {
270 return (t) { invalid, 0, 0, };
271 }
272 else if (! m) {
273 return (t) { invalid, 0, 0, };
274 }
275 else if ( len <= 0 ) {
276 return (t) { invalid, 0, 0, };
277 }
278 switch (*m) {
279 case '\0':
280 return (t) { invalid, 0, 0, };
281
282 case '$':
283 if (is_special_global_name(++m, e, enc)) {
284 return (t) { stophere, ID_GLOBAL, len, };
285 }
286 else {
287 return (t) { needmore, ID_GLOBAL, 1, };
288 }
289
290 case '@':
291 switch (*++m) {
292 default: return (t) { needmore, ID_INSTANCE, 1, };
293 case '@': return (t) { needmore, ID_CLASS, 2, };
294 }
295
296 case '<':
297 switch (*++m) {
298 default: return (t) { stophere, ID_JUNK, 1, };
299 case '<': return (t) { stophere, ID_JUNK, 2, };
300 case '=':
301 switch (*++m) {
302 default: return (t) { stophere, ID_JUNK, 2, };
303 case '>': return (t) { stophere, ID_JUNK, 3, };
304 }
305 }
306
307 case '>':
308 switch (*++m) {
309 default: return (t) { stophere, ID_JUNK, 1, };
310 case '>': case '=': return (t) { stophere, ID_JUNK, 2, };
311 }
312
313 case '=':
314 switch (*++m) {
315 default: return (t) { invalid, 0, 1, };
316 case '~': return (t) { stophere, ID_JUNK, 2, };
317 case '=':
318 switch (*++m) {
319 default: return (t) { stophere, ID_JUNK, 2, };
320 case '=': return (t) { stophere, ID_JUNK, 3, };
321 }
322 }
323
324 case '*':
325 switch (*++m) {
326 default: return (t) { stophere, ID_JUNK, 1, };
327 case '*': return (t) { stophere, ID_JUNK, 2, };
328 }
329
330 case '+': case '-':
331 switch (*++m) {
332 default: return (t) { stophere, ID_JUNK, 1, };
333 case '@': return (t) { stophere, ID_JUNK, 2, };
334 }
335
336 case '|': case '^': case '&': case '/': case '%': case '~': case '`':
337 return (t) { stophere, ID_JUNK, 1, };
338
339 case '[':
340 switch (*++m) {
341 default: return (t) { needmore, ID_JUNK, 0, };
342 case ']':
343 switch (*++m) {
344 default: return (t) { stophere, ID_JUNK, 2, };
345 case '=': return (t) { stophere, ID_JUNK, 3, };
346 }
347 }
348
349 case '!':
350 switch (*++m) {
351 case '=': case '~': return (t) { stophere, ID_JUNK, 2, };
352 default:
353 if (allowed_attrset & (1U << ID_JUNK)) {
354 return (t) { needmore, ID_JUNK, 1, };
355 }
356 else {
357 return (t) { stophere, ID_JUNK, 1, };
358 }
359 }
360
361 default:
362 if (rb_sym_constant_char_p(name, len, enc)) {
363 return (t) { needmore, ID_CONST, 0, };
364 }
365 else {
366 return (t) { needmore, ID_LOCAL, 0, };
367 }
368 }
369}
370#undef t
371
372int
373rb_enc_symname_type(const char *name, long len, rb_encoding *enc, unsigned int allowed_attrset)
374{
376 enc_synmane_type_leading_chars(name, len, enc, allowed_attrset);
377 const char *m = name + f.nread;
378 const char *e = name + len;
379 int type = (int)f.type;
380
381 switch (f.kind) {
382 case invalid: return -1;
383 case stophere: break;
384 case needmore:
385
386 if (m >= e || (*m != '_' && !ISALPHA(*m) && ISASCII(*m))) {
387 if (len > 1 && *(e-1) == '=') {
388 type = rb_enc_symname_type(name, len-1, enc, allowed_attrset);
389 if (allowed_attrset & (1U << type)) return ID_ATTRSET;
390 }
391 return -1;
392 }
393 while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
394 if (m >= e) break;
395 switch (*m) {
396 case '!': case '?':
397 if (type == ID_GLOBAL || type == ID_CLASS || type == ID_INSTANCE) return -1;
398 type = ID_JUNK;
399 ++m;
400 if (m + 1 < e || *m != '=') break;
401 /* fall through */
402 case '=':
403 if (!(allowed_attrset & (1U << type))) return -1;
404 type = ID_ATTRSET;
405 ++m;
406 break;
407 }
408 }
409
410 return m == e ? type : -1;
411}
412
413int
414rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
415{
416 return rb_enc_symname_type(name, len, enc, IDSET_ATTRSET_FOR_SYNTAX) != -1;
417}
418
419static int
420rb_str_symname_type(VALUE name, unsigned int allowed_attrset)
421{
422 const char *ptr = StringValuePtr(name);
423 long len = RSTRING_LEN(name);
424 int type = rb_enc_symname_type(ptr, len, rb_enc_get(name), allowed_attrset);
425 RB_GC_GUARD(name);
426 return type;
427}
428
429static void
430set_id_entry(rb_symbols_t *symbols, rb_id_serial_t num, VALUE str, VALUE sym)
431{
432 ASSERT_vm_locking();
433 size_t idx = num / ID_ENTRY_UNIT;
434
435 VALUE ary, ids = symbols->ids;
436 if (idx >= (size_t)RARRAY_LEN(ids) || NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
437 ary = rb_ary_hidden_new(ID_ENTRY_UNIT * ID_ENTRY_SIZE);
438 rb_ary_store(ids, (long)idx, ary);
439 }
440 idx = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
441 rb_ary_store(ary, (long)idx + ID_ENTRY_STR, str);
442 rb_ary_store(ary, (long)idx + ID_ENTRY_SYM, sym);
443}
444
445static VALUE
446get_id_serial_entry(rb_id_serial_t num, ID id, const enum id_entry_type t)
447{
448 VALUE result = 0;
449
450 GLOBAL_SYMBOLS_ENTER(symbols);
451 {
452 if (num && num <= symbols->last_id) {
453 size_t idx = num / ID_ENTRY_UNIT;
454 VALUE ids = symbols->ids;
455 VALUE ary;
456 if (idx < (size_t)RARRAY_LEN(ids) && !NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
457 long pos = (long)(num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
458 result = rb_ary_entry(ary, pos + t);
459
460 if (NIL_P(result)) {
461 result = 0;
462 }
463 else if (CHECK_ID_SERIAL) {
464 if (id) {
465 VALUE sym = result;
466 if (t != ID_ENTRY_SYM)
467 sym = rb_ary_entry(ary, pos + ID_ENTRY_SYM);
468 if (STATIC_SYM_P(sym)) {
469 if (STATIC_SYM2ID(sym) != id) result = 0;
470 }
471 else {
472 if (RSYMBOL(sym)->id != id) result = 0;
473 }
474 }
475 }
476 }
477 }
478 }
479 GLOBAL_SYMBOLS_LEAVE();
480
481 return result;
482}
483
484static VALUE
485get_id_entry(ID id, const enum id_entry_type t)
486{
487 return get_id_serial_entry(rb_id_to_serial(id), id, t);
488}
489
490int
491rb_static_id_valid_p(ID id)
492{
493 return STATIC_ID2SYM(id) == get_id_entry(id, ID_ENTRY_SYM);
494}
495
496static inline ID
497rb_id_serial_to_id(rb_id_serial_t num)
498{
499 if (is_notop_id((ID)num)) {
500 VALUE sym = get_id_serial_entry(num, 0, ID_ENTRY_SYM);
501 if (sym) return SYM2ID(sym);
502 return ((ID)num << ID_SCOPE_SHIFT) | ID_INTERNAL | ID_STATIC_SYM;
503 }
504 else {
505 return (ID)num;
506 }
507}
508
509static int
510register_sym_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
511{
512 if (existing) {
513 rb_fatal("symbol :% "PRIsVALUE" is already registered with %"PRIxVALUE,
514 (VALUE)*key, (VALUE)*value);
515 }
516 *value = arg;
517 return ST_CONTINUE;
518}
519
520static void
521register_sym(rb_symbols_t *symbols, VALUE str, VALUE sym)
522{
523 ASSERT_vm_locking();
524
525 if (SYMBOL_DEBUG) {
526 st_update(symbols->str_sym, (st_data_t)str,
527 register_sym_update_callback, (st_data_t)sym);
528 }
529 else {
530 st_add_direct(symbols->str_sym, (st_data_t)str, (st_data_t)sym);
531 }
532}
533
534void
535rb_free_static_symid_str(void)
536{
537 GLOBAL_SYMBOLS_ENTER(symbols)
538 {
539 st_free_table(symbols->str_sym);
540 }
541 GLOBAL_SYMBOLS_LEAVE();
542}
543
544static void
545unregister_sym(rb_symbols_t *symbols, VALUE str, VALUE sym)
546{
547 ASSERT_vm_locking();
548
549 st_data_t str_data = (st_data_t)str;
550 if (!st_delete(symbols->str_sym, &str_data, NULL)) {
551 rb_bug("%p can't remove str from str_id (%s)", (void *)sym, RSTRING_PTR(str));
552 }
553}
554
555static ID
556register_static_symid(ID id, const char *name, long len, rb_encoding *enc)
557{
558 VALUE str = rb_enc_str_new(name, len, enc);
559 return register_static_symid_str(id, str);
560}
561
562static ID
563register_static_symid_str(ID id, VALUE str)
564{
565 rb_id_serial_t num = rb_id_to_serial(id);
566 VALUE sym = STATIC_ID2SYM(id);
567
568 OBJ_FREEZE(str);
569 str = rb_fstring(str);
570
571 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(str));
572
573 GLOBAL_SYMBOLS_ENTER(symbols)
574 {
575 register_sym(symbols, str, sym);
576 set_id_entry(symbols, num, str, sym);
577 }
578 GLOBAL_SYMBOLS_LEAVE();
579
580 return id;
581}
582
583static int
584sym_check_asciionly(VALUE str)
585{
586 if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE;
587 switch (rb_enc_str_coderange(str)) {
589 rb_raise(rb_eEncodingError, "invalid symbol in encoding %s :%+"PRIsVALUE,
590 rb_enc_name(rb_enc_get(str)), str);
592 return TRUE;
593 }
594 return FALSE;
595}
596
597#if 0
598/*
599 * _str_ itself will be registered at the global symbol table. _str_
600 * can be modified before the registration, since the encoding will be
601 * set to ASCII-8BIT if it is a special global name.
602 */
603
604static inline void
605must_be_dynamic_symbol(VALUE x)
606{
607 if (UNLIKELY(!DYNAMIC_SYM_P(x))) {
608 if (STATIC_SYM_P(x)) {
609 VALUE str = lookup_id_str(RSHIFT((unsigned long)(x),RUBY_SPECIAL_SHIFT));
610
611 if (str) {
612 rb_bug("wrong argument: %s (inappropriate Symbol)", RSTRING_PTR(str));
613 }
614 else {
615 rb_bug("wrong argument: inappropriate Symbol (%p)", (void *)x);
616 }
617 }
618 else {
619 rb_bug("wrong argument type %s (expected Symbol)", rb_builtin_class_name(x));
620 }
621 }
622}
623#endif
624
625static VALUE
626dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding * const enc, const ID type)
627{
628 ASSERT_vm_locking();
629
630 const VALUE dsym = rb_newobj_of(klass, T_SYMBOL | FL_WB_PROTECTED);
631 long hashval;
632
633 rb_enc_set_index(dsym, rb_enc_to_index(enc));
634 OBJ_FREEZE(dsym);
635 RB_OBJ_WRITE(dsym, &RSYMBOL(dsym)->fstr, str);
636 RSYMBOL(dsym)->id = type;
637
638 /* we want hashval to be in Fixnum range [ruby-core:15713] r15672 */
639 hashval = (long)rb_str_hash(str);
640 RSYMBOL(dsym)->hashval = RSHIFT((long)hashval, 1);
641 register_sym(symbols, str, dsym);
642 rb_hash_aset(symbols->dsymbol_fstr_hash, str, Qtrue);
643 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(RSYMBOL(dsym)->fstr));
644
645 return dsym;
646}
647
648static inline VALUE
649dsymbol_check(rb_symbols_t *symbols, const VALUE sym)
650{
651 ASSERT_vm_locking();
652
653 if (UNLIKELY(rb_objspace_garbage_object_p(sym))) {
654 const VALUE fstr = RSYMBOL(sym)->fstr;
655 const ID type = RSYMBOL(sym)->id & ID_SCOPE_MASK;
656 RSYMBOL(sym)->fstr = 0;
657 unregister_sym(symbols, fstr, sym);
658 return dsymbol_alloc(symbols, rb_cSymbol, fstr, rb_enc_get(fstr), type);
659 }
660 else {
661 return sym;
662 }
663}
664
665static ID
666lookup_str_id(VALUE str)
667{
668 st_data_t sym_data;
669 int found;
670
671 GLOBAL_SYMBOLS_ENTER(symbols);
672 {
673 found = st_lookup(symbols->str_sym, (st_data_t)str, &sym_data);
674 }
675 GLOBAL_SYMBOLS_LEAVE();
676
677 if (found) {
678 const VALUE sym = (VALUE)sym_data;
679
680 if (STATIC_SYM_P(sym)) {
681 return STATIC_SYM2ID(sym);
682 }
683 else if (DYNAMIC_SYM_P(sym)) {
684 ID id = RSYMBOL(sym)->id;
685 if (id & ~ID_SCOPE_MASK) return id;
686 }
687 else {
688 rb_bug("non-symbol object %s:%"PRIxVALUE" for %"PRIsVALUE" in symbol table",
689 rb_builtin_class_name(sym), sym, str);
690 }
691 }
692 return (ID)0;
693}
694
695static VALUE
696lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str)
697{
698 st_data_t sym_data;
699 if (st_lookup(symbols->str_sym, (st_data_t)str, &sym_data)) {
700 VALUE sym = (VALUE)sym_data;
701 if (DYNAMIC_SYM_P(sym)) {
702 sym = dsymbol_check(symbols, sym);
703 }
704 return sym;
705 }
706 else {
707 return Qfalse;
708 }
709}
710
711static VALUE
712lookup_str_sym(const VALUE str)
713{
714 VALUE sym;
715
716 GLOBAL_SYMBOLS_ENTER(symbols);
717 {
718 sym = lookup_str_sym_with_lock(symbols, str);
719 }
720 GLOBAL_SYMBOLS_LEAVE();
721
722 return sym;
723}
724
725static VALUE
726lookup_id_str(ID id)
727{
728 return get_id_entry(id, ID_ENTRY_STR);
729}
730
731ID
732rb_intern3(const char *name, long len, rb_encoding *enc)
733{
734 VALUE sym;
735 struct RString fake_str;
736 VALUE str = rb_setup_fake_str(&fake_str, name, len, enc);
737 OBJ_FREEZE(str);
738 sym = lookup_str_sym(str);
739 if (sym) return rb_sym2id(sym);
740 str = rb_enc_str_new(name, len, enc); /* make true string */
741 return intern_str(str, 1);
742}
743
744static ID
745next_id_base_with_lock(rb_symbols_t *symbols)
746{
747 ID id;
748 rb_id_serial_t next_serial = symbols->last_id + 1;
749
750 if (next_serial == 0) {
751 id = (ID)-1;
752 }
753 else {
754 const size_t num = ++symbols->last_id;
755 id = num << ID_SCOPE_SHIFT;
756 }
757
758 return id;
759}
760
761static ID
762next_id_base(void)
763{
764 ID id;
765 GLOBAL_SYMBOLS_ENTER(symbols);
766 {
767 id = next_id_base_with_lock(symbols);
768 }
769 GLOBAL_SYMBOLS_LEAVE();
770 return id;
771}
772
773static ID
774intern_str(VALUE str, int mutable)
775{
776 ID id;
777 ID nid;
778
779 id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
780 if (id == (ID)-1) id = ID_JUNK;
781 if (sym_check_asciionly(str)) {
782 if (!mutable) str = rb_str_dup(str);
783 rb_enc_associate(str, rb_usascii_encoding());
784 }
785 if ((nid = next_id_base()) == (ID)-1) {
786 str = rb_str_ellipsize(str, 20);
787 rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %"PRIsVALUE")",
788 str);
789 }
790 id |= nid;
791 id |= ID_STATIC_SYM;
792 return register_static_symid_str(id, str);
793}
794
795ID
796rb_intern2(const char *name, long len)
797{
798 return rb_intern3(name, len, rb_usascii_encoding());
799}
800
801#undef rb_intern
802ID
803rb_intern(const char *name)
804{
805 return rb_intern2(name, strlen(name));
806}
807
808ID
809rb_intern_str(VALUE str)
810{
811 VALUE sym = lookup_str_sym(str);
812
813 if (sym) {
814 return SYM2ID(sym);
815 }
816
817 return intern_str(str, 0);
818}
819
820void
821rb_gc_free_dsymbol(VALUE sym)
822{
823 VALUE str = RSYMBOL(sym)->fstr;
824
825 if (str) {
826 RSYMBOL(sym)->fstr = 0;
827
828 GLOBAL_SYMBOLS_ENTER(symbols);
829 {
830 unregister_sym(symbols, str, sym);
831 rb_hash_delete_entry(symbols->dsymbol_fstr_hash, str);
832 }
833 GLOBAL_SYMBOLS_LEAVE();
834 }
835}
836
837/*
838 * call-seq:
839 * str.intern -> symbol
840 * str.to_sym -> symbol
841 *
842 * Returns the Symbol corresponding to <i>str</i>, creating the
843 * symbol if it did not previously exist. See Symbol#id2name.
844 *
845 * "Koala".intern #=> :Koala
846 * s = 'cat'.to_sym #=> :cat
847 * s == :cat #=> true
848 * s = '@cat'.to_sym #=> :@cat
849 * s == :@cat #=> true
850 *
851 * This can also be used to create symbols that cannot be represented using the
852 * <code>:xxx</code> notation.
853 *
854 * 'cat and dog'.to_sym #=> :"cat and dog"
855 */
856
857VALUE
858rb_str_intern(VALUE str)
859{
860 VALUE sym;
861
862 GLOBAL_SYMBOLS_ENTER(symbols);
863 {
864 sym = lookup_str_sym_with_lock(symbols, str);
865
866 if (sym) {
867 // ok
868 }
869 else if (USE_SYMBOL_GC) {
870 rb_encoding *enc = rb_enc_get(str);
871 rb_encoding *ascii = rb_usascii_encoding();
872 if (enc != ascii && sym_check_asciionly(str)) {
873 str = rb_str_dup(str);
874 rb_enc_associate(str, ascii);
875 OBJ_FREEZE(str);
876 enc = ascii;
877 }
878 else {
879 str = rb_str_dup(str);
880 OBJ_FREEZE(str);
881 }
882 str = rb_fstring(str);
883 int type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
884 if (type < 0) type = ID_JUNK;
885 sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);
886 }
887 else {
888 ID id = intern_str(str, 0);
889 sym = ID2SYM(id);
890 }
891 }
892 GLOBAL_SYMBOLS_LEAVE();
893 return sym;
894}
895
896ID
898{
899 ID id;
900 if (STATIC_SYM_P(sym)) {
901 id = STATIC_SYM2ID(sym);
902 }
903 else if (DYNAMIC_SYM_P(sym)) {
904 GLOBAL_SYMBOLS_ENTER(symbols);
905 {
906 sym = dsymbol_check(symbols, sym);
907 id = RSYMBOL(sym)->id;
908
909 if (UNLIKELY(!(id & ~ID_SCOPE_MASK))) {
910 VALUE fstr = RSYMBOL(sym)->fstr;
911 ID num = next_id_base_with_lock(symbols);
912
913 RSYMBOL(sym)->id = id |= num;
914 /* make it permanent object */
915
916 set_id_entry(symbols, rb_id_to_serial(num), fstr, sym);
917 rb_hash_delete_entry(symbols->dsymbol_fstr_hash, fstr);
918 }
919 }
920 GLOBAL_SYMBOLS_LEAVE();
921 }
922 else {
923 rb_raise(rb_eTypeError, "wrong argument type %s (expected Symbol)",
924 rb_builtin_class_name(sym));
925 }
926 return id;
927}
928
929#undef rb_id2sym
930VALUE
931rb_id2sym(ID x)
932{
933 if (!DYNAMIC_ID_P(x)) return STATIC_ID2SYM(x);
934 return get_id_entry(x, ID_ENTRY_SYM);
935}
936
937/*
938 * call-seq:
939 * name -> string
940 *
941 * Returns a frozen string representation of +self+ (not including the leading colon):
942 *
943 * :foo.name # => "foo"
944 * :foo.name.frozen? # => true
945 *
946 * Related: Symbol#to_s, Symbol#inspect.
947 */
948
949VALUE
951{
952 if (DYNAMIC_SYM_P(sym)) {
953 return RSYMBOL(sym)->fstr;
954 }
955 else {
956 return rb_id2str(STATIC_SYM2ID(sym));
957 }
958}
959
960VALUE
961rb_id2str(ID id)
962{
963 return lookup_id_str(id);
964}
965
966const char *
967rb_id2name(ID id)
968{
969 VALUE str = rb_id2str(id);
970
971 if (!str) return 0;
972 return RSTRING_PTR(str);
973}
974
975ID
976rb_make_internal_id(void)
977{
978 return next_id_base() | ID_INTERNAL | ID_STATIC_SYM;
979}
980
981ID
982rb_make_temporary_id(size_t n)
983{
984 const ID max_id = RB_ID_SERIAL_MAX & ~0xffff;
985 const ID id = max_id - (ID)n;
986 if (id <= ruby_global_symbols.last_id) {
987 rb_raise(rb_eRuntimeError, "too big to make temporary ID: %" PRIdSIZE, n);
988 }
989 return (id << ID_SCOPE_SHIFT) | ID_STATIC_SYM | ID_INTERNAL;
990}
991
992static int
993symbols_i(st_data_t key, st_data_t value, st_data_t arg)
994{
995 VALUE ary = (VALUE)arg;
996 VALUE sym = (VALUE)value;
997
998 if (STATIC_SYM_P(sym)) {
999 rb_ary_push(ary, sym);
1000 return ST_CONTINUE;
1001 }
1002 else if (!DYNAMIC_SYM_P(sym)) {
1003 rb_bug("invalid symbol: %s", RSTRING_PTR((VALUE)key));
1004 }
1005 else if (!SYMBOL_PINNED_P(sym) && rb_objspace_garbage_object_p(sym)) {
1006 RSYMBOL(sym)->fstr = 0;
1007 return ST_DELETE;
1008 }
1009 else {
1010 rb_ary_push(ary, sym);
1011 return ST_CONTINUE;
1012 }
1013
1014}
1015
1016VALUE
1018{
1019 VALUE ary;
1020
1021 GLOBAL_SYMBOLS_ENTER(symbols);
1022 {
1023 ary = rb_ary_new2(symbols->str_sym->num_entries);
1024 st_foreach(symbols->str_sym, symbols_i, ary);
1025 }
1026 GLOBAL_SYMBOLS_LEAVE();
1027
1028 return ary;
1029}
1030
1031size_t
1032rb_sym_immortal_count(void)
1033{
1034 return (size_t)ruby_global_symbols.last_id;
1035}
1036
1037int
1039{
1040 return is_const_id(id);
1041}
1042
1043int
1045{
1046 return is_class_id(id);
1047}
1048
1049int
1051{
1052 return is_global_id(id);
1053}
1054
1055int
1057{
1058 return is_instance_id(id);
1059}
1060
1061int
1063{
1064 return is_attrset_id(id);
1065}
1066
1067int
1069{
1070 return is_local_id(id);
1071}
1072
1073int
1075{
1076 return is_junk_id(id);
1077}
1078
1079int
1080rb_is_const_sym(VALUE sym)
1081{
1082 return is_const_sym(sym);
1083}
1084
1085int
1086rb_is_attrset_sym(VALUE sym)
1087{
1088 return is_attrset_sym(sym);
1089}
1090
1091ID
1092rb_check_id(volatile VALUE *namep)
1093{
1094 VALUE tmp;
1095 VALUE name = *namep;
1096
1097 if (STATIC_SYM_P(name)) {
1098 return STATIC_SYM2ID(name);
1099 }
1100 else if (DYNAMIC_SYM_P(name)) {
1101 if (SYMBOL_PINNED_P(name)) {
1102 return RSYMBOL(name)->id;
1103 }
1104 else {
1105 *namep = RSYMBOL(name)->fstr;
1106 return 0;
1107 }
1108 }
1109 else if (!RB_TYPE_P(name, T_STRING)) {
1110 tmp = rb_check_string_type(name);
1111 if (NIL_P(tmp)) {
1112 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1113 name);
1114 }
1115 name = tmp;
1116 *namep = name;
1117 }
1118
1119 sym_check_asciionly(name);
1120
1121 return lookup_str_id(name);
1122}
1123
1124// Used by yjit for handling .send without throwing exceptions
1125ID
1126rb_get_symbol_id(VALUE name)
1127{
1128 if (STATIC_SYM_P(name)) {
1129 return STATIC_SYM2ID(name);
1130 }
1131 else if (DYNAMIC_SYM_P(name)) {
1132 if (SYMBOL_PINNED_P(name)) {
1133 return RSYMBOL(name)->id;
1134 }
1135 else {
1136 return 0;
1137 }
1138 }
1139 else {
1140 RUBY_ASSERT_ALWAYS(RB_TYPE_P(name, T_STRING));
1141 return lookup_str_id(name);
1142 }
1143}
1144
1145
1146VALUE
1147rb_check_symbol(volatile VALUE *namep)
1148{
1149 VALUE sym;
1150 VALUE tmp;
1151 VALUE name = *namep;
1152
1153 if (STATIC_SYM_P(name)) {
1154 return name;
1155 }
1156 else if (DYNAMIC_SYM_P(name)) {
1157 if (!SYMBOL_PINNED_P(name)) {
1158 GLOBAL_SYMBOLS_ENTER(symbols);
1159 {
1160 name = dsymbol_check(symbols, name);
1161 }
1162 GLOBAL_SYMBOLS_LEAVE();
1163
1164 *namep = name;
1165 }
1166 return name;
1167 }
1168 else if (!RB_TYPE_P(name, T_STRING)) {
1169 tmp = rb_check_string_type(name);
1170 if (NIL_P(tmp)) {
1171 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1172 name);
1173 }
1174 name = tmp;
1175 *namep = name;
1176 }
1177
1178 sym_check_asciionly(name);
1179
1180 if ((sym = lookup_str_sym(name)) != 0) {
1181 return sym;
1182 }
1183
1184 return Qnil;
1185}
1186
1187ID
1188rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
1189{
1190 struct RString fake_str;
1191 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1192
1193 sym_check_asciionly(name);
1194
1195 return lookup_str_id(name);
1196}
1197
1198VALUE
1199rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc)
1200{
1201 VALUE sym;
1202 struct RString fake_str;
1203 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1204
1205 sym_check_asciionly(name);
1206
1207 if ((sym = lookup_str_sym(name)) != 0) {
1208 return sym;
1209 }
1210
1211 return Qnil;
1212}
1213
1214#undef rb_sym_intern_ascii_cstr
1215#ifdef __clang__
1216NOINLINE(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1217#else
1218FUNC_MINIMIZED(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1219FUNC_MINIMIZED(VALUE rb_sym_intern_ascii(const char *ptr, long len));
1220FUNC_MINIMIZED(VALUE rb_sym_intern_ascii_cstr(const char *ptr));
1221#endif
1222
1223VALUE
1224rb_sym_intern(const char *ptr, long len, rb_encoding *enc)
1225{
1226 struct RString fake_str;
1227 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1228 return rb_str_intern(name);
1229}
1230
1231VALUE
1232rb_sym_intern_ascii(const char *ptr, long len)
1233{
1234 return rb_sym_intern(ptr, len, rb_usascii_encoding());
1235}
1236
1237VALUE
1238rb_sym_intern_ascii_cstr(const char *ptr)
1239{
1240 return rb_sym_intern_ascii(ptr, strlen(ptr));
1241}
1242
1243VALUE
1244rb_to_symbol_type(VALUE obj)
1245{
1246 return rb_convert_type_with_id(obj, T_SYMBOL, "Symbol", idTo_sym);
1247}
1248
1249int
1250rb_is_const_name(VALUE name)
1251{
1252 return rb_str_symname_type(name, 0) == ID_CONST;
1253}
1254
1255int
1256rb_is_class_name(VALUE name)
1257{
1258 return rb_str_symname_type(name, 0) == ID_CLASS;
1259}
1260
1261int
1262rb_is_instance_name(VALUE name)
1263{
1264 return rb_str_symname_type(name, 0) == ID_INSTANCE;
1265}
1266
1267int
1268rb_is_local_name(VALUE name)
1269{
1270 return rb_str_symname_type(name, 0) == ID_LOCAL;
1271}
1272
1273#include "id_table.c"
1274#include "symbol.rbinc"
#define RUBY_ASSERT_ALWAYS(expr)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
Definition assert.h:167
static bool rb_enc_isupper(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isupper(), except it additionally takes an encoding.
Definition ctype.h:124
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
Definition ctype.h:63
static bool rb_enc_islower(OnigCodePoint c, rb_encoding *enc)
Identical to rb_islower(), except it additionally takes an encoding.
Definition ctype.h:110
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
Definition coderange.h:180
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define ISUPPER
Old name of rb_isupper.
Definition ctype.h:89
#define ID2SYM
Old name of RB_ID2SYM.
Definition symbol.h:44
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
Definition fl_type.h:135
#define SYM2ID
Old name of RB_SYM2ID.
Definition symbol.h:45
#define ISDIGIT
Old name of rb_isdigit.
Definition ctype.h:93
#define STATIC_SYM_P
Old name of RB_STATIC_SYM_P.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
Definition encoding.h:516
#define ISALPHA
Old name of rb_isalpha.
Definition ctype.h:92
#define ISASCII
Old name of rb_isascii.
Definition ctype.h:85
#define Qtrue
Old name of RUBY_Qtrue.
#define DYNAMIC_SYM_P
Old name of RB_DYNAMIC_SYM_P.
Definition value_type.h:86
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
Definition coderange.h:182
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
Definition encoding.h:515
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
Definition fl_type.h:59
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
Definition value_type.h:80
#define rb_ary_new2
Old name of rb_ary_new_capa.
Definition array.h:651
#define ISALNUM
Old name of rb_isalnum.
Definition ctype.h:91
void rb_name_error(ID id, const char *fmt,...)
Raises an instance of rb_eNameError.
Definition error.c:2037
VALUE rb_eTypeError
TypeError exception.
Definition error.c:1344
void rb_name_error_str(VALUE str, const char *fmt,...)
Identical to rb_name_error(), except it takes a VALUE instead of ID.
Definition error.c:2052
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1342
VALUE rb_eEncodingError
EncodingError exception.
Definition error.c:1350
VALUE rb_cSymbol
Symbol class.
Definition string.c:79
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
Definition gc.h:619
Encoding relates APIs.
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
Definition encoding.h:590
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
Definition string.c:769
int rb_enc_symname_p(const char *str, rb_encoding *enc)
Identical to rb_symname_p(), except it additionally takes an encoding.
Definition symbol.c:211
VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc)
Identical to rb_check_id_cstr(), except for the return type.
Definition symbol.c:1199
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
Definition symbol.c:414
ID rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
Identical to rb_check_id(), except it takes a pointer to a memory region instead of Ruby's string.
Definition symbol.c:1188
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
Definition symbol.c:1017
int rb_is_global_id(ID id)
Classifies the given ID, then sees if it is a global variable.
Definition symbol.c:1050
int rb_is_instance_id(ID id)
Classifies the given ID, then sees if it is an instance variable.
Definition symbol.c:1056
int rb_is_const_id(ID id)
Classifies the given ID, then sees if it is a constant.
Definition symbol.c:1038
int rb_is_junk_id(ID)
Classifies the given ID, then sees if it is a junk ID.
Definition symbol.c:1074
int rb_symname_p(const char *str)
Sees if the passed C string constructs a valid syntactic symbol.
Definition symbol.c:205
int rb_is_class_id(ID id)
Classifies the given ID, then sees if it is a class variable.
Definition symbol.c:1044
int rb_is_attrset_id(ID id)
Classifies the given ID, then sees if it is an attribute writer.
Definition symbol.c:1062
int rb_is_local_id(ID id)
Classifies the given ID, then sees if it is a local variable.
Definition symbol.c:1068
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
Definition string.c:3598
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
Definition string.c:10952
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
Definition string.c:3587
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
Definition string.c:2654
VALUE rb_check_symbol(volatile VALUE *namep)
Identical to rb_check_id(), except it returns an instance of rb_cSymbol instead.
Definition symbol.c:1147
ID rb_check_id(volatile VALUE *namep)
Detects if the given name is already interned or not.
Definition symbol.c:1092
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
Definition symbol.c:950
ID rb_sym2id(VALUE obj)
Converts an instance of rb_cSymbol into an ID.
Definition symbol.c:897
int len
Length of the buffer.
Definition io.h:8
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
Definition memory.h:161
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define RARRAY_LEN
Just another name of rb_array_len.
Definition rarray.h:51
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
Definition rstring.h:76
@ RUBY_SPECIAL_SHIFT
Least significant 8 bits are reserved.
Ruby's String.
Definition rstring.h:196
char * ptr
Pointer to the contents of the string.
Definition rstring.h:222
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40