Ruby 3.3.6p108 (2024-11-05 revision 75015d4c1f6965b5e85e96fb309f1f2129f933c0)
symbol.c
1/**********************************************************************
2
3 symbol.h -
4
5 $Author$
6 created at: Tue Jul 8 15:49:54 JST 2014
7
8 Copyright (C) 2014 Yukihiro Matsumoto
9
10**********************************************************************/
11
12#include "internal.h"
13#include "internal/error.h"
14#include "internal/gc.h"
15#include "internal/hash.h"
16#include "internal/object.h"
17#include "internal/symbol.h"
18#include "internal/vm.h"
19#include "probes.h"
20#include "ruby/encoding.h"
21#include "ruby/st.h"
22#include "symbol.h"
23#include "vm_sync.h"
24#include "builtin.h"
25
26#if defined(USE_SYMBOL_GC) && !(USE_SYMBOL_GC+0)
27# undef USE_SYMBOL_GC
28# define USE_SYMBOL_GC 0
29#else
30# undef USE_SYMBOL_GC
31# define USE_SYMBOL_GC 1
32#endif
33#if defined(SYMBOL_DEBUG) && (SYMBOL_DEBUG+0)
34# undef SYMBOL_DEBUG
35# define SYMBOL_DEBUG 1
36#else
37# undef SYMBOL_DEBUG
38# define SYMBOL_DEBUG 0
39#endif
40#ifndef CHECK_ID_SERIAL
41# define CHECK_ID_SERIAL SYMBOL_DEBUG
42#endif
43
44#define SYMBOL_PINNED_P(sym) (RSYMBOL(sym)->id&~ID_SCOPE_MASK)
45
46#define STATIC_SYM2ID(sym) RSHIFT((VALUE)(sym), RUBY_SPECIAL_SHIFT)
47
48static ID register_static_symid(ID, const char *, long, rb_encoding *);
49static ID register_static_symid_str(ID, VALUE);
50#define REGISTER_SYMID(id, name) register_static_symid((id), (name), strlen(name), enc)
51#include "id.c"
52
53#define is_identchar(p,e,enc) (ISALNUM((unsigned char)*(p)) || (*(p)) == '_' || !ISASCII(*(p)))
54
55#define op_tbl_count numberof(op_tbl)
56STATIC_ASSERT(op_tbl_name_size, sizeof(op_tbl[0].name) == 3);
57#define op_tbl_len(i) (!op_tbl[i].name[1] ? 1 : !op_tbl[i].name[2] ? 2 : 3)
58
59static void
60Init_op_tbl(void)
61{
62 int i;
63 rb_encoding *const enc = rb_usascii_encoding();
64
65 for (i = '!'; i <= '~'; ++i) {
66 if (!ISALNUM(i) && i != '_') {
67 char c = (char)i;
68 register_static_symid(i, &c, 1, enc);
69 }
70 }
71 for (i = 0; i < op_tbl_count; ++i) {
72 register_static_symid(op_tbl[i].token, op_tbl[i].name, op_tbl_len(i), enc);
73 }
74}
75
76static const int ID_ENTRY_UNIT = 512;
77
78enum id_entry_type {
79 ID_ENTRY_STR,
80 ID_ENTRY_SYM,
81 ID_ENTRY_SIZE
82};
83
84rb_symbols_t ruby_global_symbols = {tNEXT_ID-1};
85
86static const struct st_hash_type symhash = {
89};
90
91void
92Init_sym(void)
93{
94 rb_symbols_t *symbols = &ruby_global_symbols;
95
96 VALUE dsym_fstrs = rb_ident_hash_new();
97 symbols->dsymbol_fstr_hash = dsym_fstrs;
98 rb_gc_register_mark_object(dsym_fstrs);
99 rb_obj_hide(dsym_fstrs);
100
101 symbols->str_sym = st_init_table_with_size(&symhash, 1000);
102 symbols->ids = rb_ary_hidden_new(0);
103 rb_gc_register_mark_object(symbols->ids);
104
105 Init_op_tbl();
106 Init_id();
107}
108
109WARN_UNUSED_RESULT(static VALUE dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding *const enc, const ID type));
110WARN_UNUSED_RESULT(static VALUE dsymbol_check(rb_symbols_t *symbols, const VALUE sym));
111WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str));
112WARN_UNUSED_RESULT(static VALUE lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str));
113WARN_UNUSED_RESULT(static VALUE lookup_str_sym(const VALUE str));
114WARN_UNUSED_RESULT(static VALUE lookup_id_str(ID id));
115WARN_UNUSED_RESULT(static ID intern_str(VALUE str, int mutable));
116
117#define GLOBAL_SYMBOLS_ENTER(symbols) rb_symbols_t *symbols = &ruby_global_symbols; RB_VM_LOCK_ENTER()
118#define GLOBAL_SYMBOLS_LEAVE() RB_VM_LOCK_LEAVE()
119
120ID
121rb_id_attrset(ID id)
122{
123 VALUE str, sym;
124 int scope;
125
126 if (!is_notop_id(id)) {
127 switch (id) {
128 case tAREF: case tASET:
129 return tASET; /* only exception */
130 }
131 rb_name_error(id, "cannot make operator ID :%"PRIsVALUE" attrset",
132 rb_id2str(id));
133 }
134 else {
135 scope = id_type(id);
136 switch (scope) {
137 case ID_LOCAL: case ID_INSTANCE: case ID_GLOBAL:
138 case ID_CONST: case ID_CLASS: case ID_JUNK:
139 break;
140 case ID_ATTRSET:
141 return id;
142 default:
143 {
144 if ((str = lookup_id_str(id)) != 0) {
145 rb_name_error(id, "cannot make unknown type ID %d:%"PRIsVALUE" attrset",
146 scope, str);
147 }
148 else {
149 rb_name_error_str(Qnil, "cannot make unknown type anonymous ID %d:%"PRIxVALUE" attrset",
150 scope, (VALUE)id);
151 }
152 }
153 }
154 }
155
156 /* make new symbol and ID */
157 if (!(str = lookup_id_str(id))) {
158 static const char id_types[][8] = {
159 "local",
160 "instance",
161 "invalid",
162 "global",
163 "attrset",
164 "const",
165 "class",
166 "junk",
167 };
168 rb_name_error(id, "cannot make anonymous %.*s ID %"PRIxVALUE" attrset",
169 (int)sizeof(id_types[0]), id_types[scope], (VALUE)id);
170 }
171 str = rb_str_dup(str);
172 rb_str_cat(str, "=", 1);
173 sym = lookup_str_sym(str);
174 id = sym ? rb_sym2id(sym) : intern_str(str, 1);
175 return id;
176}
177
178static int
179is_special_global_name(const char *m, const char *e, rb_encoding *enc)
180{
181 int mb = 0;
182
183 if (m >= e) return 0;
184 if (is_global_name_punct(*m)) {
185 ++m;
186 }
187 else if (*m == '-') {
188 if (++m >= e) return 0;
189 if (is_identchar(m, e, enc)) {
190 if (!ISASCII(*m)) mb = 1;
191 m += rb_enc_mbclen(m, e, enc);
192 }
193 }
194 else {
195 if (!ISDIGIT(*m)) return 0;
196 do {
197 if (!ISASCII(*m)) mb = 1;
198 ++m;
199 } while (m < e && ISDIGIT(*m));
200 }
201 return m == e ? mb + 1 : 0;
202}
203
204int
205rb_symname_p(const char *name)
206{
207 return rb_enc_symname_p(name, rb_ascii8bit_encoding());
208}
209
210int
211rb_enc_symname_p(const char *name, rb_encoding *enc)
212{
213 return rb_enc_symname2_p(name, strlen(name), enc);
214}
215
216static int
217rb_sym_constant_char_p(const char *name, long nlen, rb_encoding *enc)
218{
219 int c, len;
220 const char *end = name + nlen;
221
222 if (nlen < 1) return FALSE;
223 if (ISASCII(*name)) return ISUPPER(*name);
224 c = rb_enc_precise_mbclen(name, end, enc);
225 if (!MBCLEN_CHARFOUND_P(c)) return FALSE;
227 c = rb_enc_mbc_to_codepoint(name, end, enc);
228 if (rb_enc_isupper(c, enc)) return TRUE;
229 if (rb_enc_islower(c, enc)) return FALSE;
230 if (ONIGENC_IS_UNICODE(enc)) {
231 static int ctype_titlecase = 0;
232 if (!ctype_titlecase) {
233 static const UChar cname[] = "titlecaseletter";
234 static const UChar *const end = cname + sizeof(cname) - 1;
235 ctype_titlecase = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, cname, end);
236 }
237 if (rb_enc_isctype(c, ctype_titlecase, enc)) return TRUE;
238 }
239 else {
240 /* fallback to case-folding */
241 OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
242 const OnigUChar *beg = (const OnigUChar *)name;
243 int r = enc->mbc_case_fold(ONIGENC_CASE_FOLD,
244 &beg, (const OnigUChar *)end,
245 fold, enc);
246 if (r > 0 && (r != len || memcmp(fold, name, r)))
247 return TRUE;
248 }
249 return FALSE;
250}
251
252#define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST))
253#define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET))
254
256 const enum { invalid, stophere, needmore, } kind;
257 const enum ruby_id_types type;
258 const long nread;
259};
260
261#define t struct enc_synmane_type_leading_chars_tag
262
264enc_synmane_type_leading_chars(const char *name, long len, rb_encoding *enc, int allowed_attrset)
265{
266 const char *m = name;
267 const char *e = m + len;
268
269 if (! rb_enc_asciicompat(enc)) {
270 return (t) { invalid, 0, 0, };
271 }
272 else if (! m) {
273 return (t) { invalid, 0, 0, };
274 }
275 else if ( len <= 0 ) {
276 return (t) { invalid, 0, 0, };
277 }
278 switch (*m) {
279 case '\0':
280 return (t) { invalid, 0, 0, };
281
282 case '$':
283 if (is_special_global_name(++m, e, enc)) {
284 return (t) { stophere, ID_GLOBAL, len, };
285 }
286 else {
287 return (t) { needmore, ID_GLOBAL, 1, };
288 }
289
290 case '@':
291 switch (*++m) {
292 default: return (t) { needmore, ID_INSTANCE, 1, };
293 case '@': return (t) { needmore, ID_CLASS, 2, };
294 }
295
296 case '<':
297 switch (*++m) {
298 default: return (t) { stophere, ID_JUNK, 1, };
299 case '<': return (t) { stophere, ID_JUNK, 2, };
300 case '=':
301 switch (*++m) {
302 default: return (t) { stophere, ID_JUNK, 2, };
303 case '>': return (t) { stophere, ID_JUNK, 3, };
304 }
305 }
306
307 case '>':
308 switch (*++m) {
309 default: return (t) { stophere, ID_JUNK, 1, };
310 case '>': case '=': return (t) { stophere, ID_JUNK, 2, };
311 }
312
313 case '=':
314 switch (*++m) {
315 default: return (t) { invalid, 0, 1, };
316 case '~': return (t) { stophere, ID_JUNK, 2, };
317 case '=':
318 switch (*++m) {
319 default: return (t) { stophere, ID_JUNK, 2, };
320 case '=': return (t) { stophere, ID_JUNK, 3, };
321 }
322 }
323
324 case '*':
325 switch (*++m) {
326 default: return (t) { stophere, ID_JUNK, 1, };
327 case '*': return (t) { stophere, ID_JUNK, 2, };
328 }
329
330 case '+': case '-':
331 switch (*++m) {
332 default: return (t) { stophere, ID_JUNK, 1, };
333 case '@': return (t) { stophere, ID_JUNK, 2, };
334 }
335
336 case '|': case '^': case '&': case '/': case '%': case '~': case '`':
337 return (t) { stophere, ID_JUNK, 1, };
338
339 case '[':
340 switch (*++m) {
341 default: return (t) { needmore, ID_JUNK, 0, };
342 case ']':
343 switch (*++m) {
344 default: return (t) { stophere, ID_JUNK, 2, };
345 case '=': return (t) { stophere, ID_JUNK, 3, };
346 }
347 }
348
349 case '!':
350 switch (*++m) {
351 case '=': case '~': return (t) { stophere, ID_JUNK, 2, };
352 default:
353 if (allowed_attrset & (1U << ID_JUNK)) {
354 return (t) { needmore, ID_JUNK, 1, };
355 }
356 else {
357 return (t) { stophere, ID_JUNK, 1, };
358 }
359 }
360
361 default:
362 if (rb_sym_constant_char_p(name, len, enc)) {
363 return (t) { needmore, ID_CONST, 0, };
364 }
365 else {
366 return (t) { needmore, ID_LOCAL, 0, };
367 }
368 }
369}
370#undef t
371
372int
373rb_enc_symname_type(const char *name, long len, rb_encoding *enc, unsigned int allowed_attrset)
374{
376 enc_synmane_type_leading_chars(name, len, enc, allowed_attrset);
377 const char *m = name + f.nread;
378 const char *e = name + len;
379 int type = (int)f.type;
380
381 switch (f.kind) {
382 case invalid: return -1;
383 case stophere: break;
384 case needmore:
385
386 if (m >= e || (*m != '_' && !ISALPHA(*m) && ISASCII(*m))) {
387 if (len > 1 && *(e-1) == '=') {
388 type = rb_enc_symname_type(name, len-1, enc, allowed_attrset);
389 if (allowed_attrset & (1U << type)) return ID_ATTRSET;
390 }
391 return -1;
392 }
393 while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
394 if (m >= e) break;
395 switch (*m) {
396 case '!': case '?':
397 if (type == ID_GLOBAL || type == ID_CLASS || type == ID_INSTANCE) return -1;
398 type = ID_JUNK;
399 ++m;
400 if (m + 1 < e || *m != '=') break;
401 /* fall through */
402 case '=':
403 if (!(allowed_attrset & (1U << type))) return -1;
404 type = ID_ATTRSET;
405 ++m;
406 break;
407 }
408 }
409
410 return m == e ? type : -1;
411}
412
413int
414rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
415{
416 return rb_enc_symname_type(name, len, enc, IDSET_ATTRSET_FOR_SYNTAX) != -1;
417}
418
419static int
420rb_str_symname_type(VALUE name, unsigned int allowed_attrset)
421{
422 const char *ptr = StringValuePtr(name);
423 long len = RSTRING_LEN(name);
424 int type = rb_enc_symname_type(ptr, len, rb_enc_get(name), allowed_attrset);
425 RB_GC_GUARD(name);
426 return type;
427}
428
429static void
430set_id_entry(rb_symbols_t *symbols, rb_id_serial_t num, VALUE str, VALUE sym)
431{
432 ASSERT_vm_locking();
433 size_t idx = num / ID_ENTRY_UNIT;
434
435 VALUE ary, ids = symbols->ids;
436 if (idx >= (size_t)RARRAY_LEN(ids) || NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
437 ary = rb_ary_hidden_new(ID_ENTRY_UNIT * ID_ENTRY_SIZE);
438 rb_ary_store(ids, (long)idx, ary);
439 }
440 idx = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
441 rb_ary_store(ary, (long)idx + ID_ENTRY_STR, str);
442 rb_ary_store(ary, (long)idx + ID_ENTRY_SYM, sym);
443}
444
445static VALUE
446get_id_serial_entry(rb_id_serial_t num, ID id, const enum id_entry_type t)
447{
448 VALUE result = 0;
449
450 GLOBAL_SYMBOLS_ENTER(symbols);
451 {
452 if (num && num <= symbols->last_id) {
453 size_t idx = num / ID_ENTRY_UNIT;
454 VALUE ids = symbols->ids;
455 VALUE ary;
456 if (idx < (size_t)RARRAY_LEN(ids) && !NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
457 long pos = (long)(num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
458 result = rb_ary_entry(ary, pos + t);
459
460 if (NIL_P(result)) {
461 result = 0;
462 }
463 else if (CHECK_ID_SERIAL) {
464 if (id) {
465 VALUE sym = result;
466 if (t != ID_ENTRY_SYM)
467 sym = rb_ary_entry(ary, pos + ID_ENTRY_SYM);
468 if (STATIC_SYM_P(sym)) {
469 if (STATIC_SYM2ID(sym) != id) result = 0;
470 }
471 else {
472 if (RSYMBOL(sym)->id != id) result = 0;
473 }
474 }
475 }
476 }
477 }
478 }
479 GLOBAL_SYMBOLS_LEAVE();
480
481 return result;
482}
483
484static VALUE
485get_id_entry(ID id, const enum id_entry_type t)
486{
487 return get_id_serial_entry(rb_id_to_serial(id), id, t);
488}
489
490int
491rb_static_id_valid_p(ID id)
492{
493 return STATIC_ID2SYM(id) == get_id_entry(id, ID_ENTRY_SYM);
494}
495
496static inline ID
497rb_id_serial_to_id(rb_id_serial_t num)
498{
499 if (is_notop_id((ID)num)) {
500 VALUE sym = get_id_serial_entry(num, 0, ID_ENTRY_SYM);
501 if (sym) return SYM2ID(sym);
502 return ((ID)num << ID_SCOPE_SHIFT) | ID_INTERNAL | ID_STATIC_SYM;
503 }
504 else {
505 return (ID)num;
506 }
507}
508
509static int
510register_sym_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
511{
512 if (existing) {
513 rb_fatal("symbol :% "PRIsVALUE" is already registered with %"PRIxVALUE,
514 (VALUE)*key, (VALUE)*value);
515 }
516 *value = arg;
517 return ST_CONTINUE;
518}
519
520static void
521register_sym(rb_symbols_t *symbols, VALUE str, VALUE sym)
522{
523 ASSERT_vm_locking();
524
525 if (SYMBOL_DEBUG) {
526 st_update(symbols->str_sym, (st_data_t)str,
527 register_sym_update_callback, (st_data_t)sym);
528 }
529 else {
530 st_add_direct(symbols->str_sym, (st_data_t)str, (st_data_t)sym);
531 }
532}
533
534void
535rb_free_static_symid_str(void)
536{
537 GLOBAL_SYMBOLS_ENTER(symbols)
538 {
539 st_free_table(symbols->str_sym);
540 }
541 GLOBAL_SYMBOLS_LEAVE();
542}
543
544static void
545unregister_sym(rb_symbols_t *symbols, VALUE str, VALUE sym)
546{
547 ASSERT_vm_locking();
548
549 st_data_t str_data = (st_data_t)str;
550 if (!st_delete(symbols->str_sym, &str_data, NULL)) {
551 rb_bug("%p can't remove str from str_id (%s)", (void *)sym, RSTRING_PTR(str));
552 }
553}
554
555static ID
556register_static_symid(ID id, const char *name, long len, rb_encoding *enc)
557{
558 VALUE str = rb_enc_str_new(name, len, enc);
559 return register_static_symid_str(id, str);
560}
561
562static ID
563register_static_symid_str(ID id, VALUE str)
564{
565 rb_id_serial_t num = rb_id_to_serial(id);
566 VALUE sym = STATIC_ID2SYM(id);
567
568 OBJ_FREEZE(str);
569 str = rb_fstring(str);
570
571 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(str));
572
573 GLOBAL_SYMBOLS_ENTER(symbols)
574 {
575 register_sym(symbols, str, sym);
576 set_id_entry(symbols, num, str, sym);
577 }
578 GLOBAL_SYMBOLS_LEAVE();
579
580 return id;
581}
582
583static int
584sym_check_asciionly(VALUE str, bool fake_str)
585{
586 if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE;
587 switch (rb_enc_str_coderange(str)) {
589 if (fake_str) {
590 str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), rb_enc_get(str));
591 }
592 rb_raise(rb_eEncodingError, "invalid symbol in encoding %s :%+"PRIsVALUE,
593 rb_enc_name(rb_enc_get(str)), str);
595 return TRUE;
596 }
597 return FALSE;
598}
599
600#if 0
601/*
602 * _str_ itself will be registered at the global symbol table. _str_
603 * can be modified before the registration, since the encoding will be
604 * set to ASCII-8BIT if it is a special global name.
605 */
606
607static inline void
608must_be_dynamic_symbol(VALUE x)
609{
610 if (UNLIKELY(!DYNAMIC_SYM_P(x))) {
611 if (STATIC_SYM_P(x)) {
612 VALUE str = lookup_id_str(RSHIFT((unsigned long)(x),RUBY_SPECIAL_SHIFT));
613
614 if (str) {
615 rb_bug("wrong argument: %s (inappropriate Symbol)", RSTRING_PTR(str));
616 }
617 else {
618 rb_bug("wrong argument: inappropriate Symbol (%p)", (void *)x);
619 }
620 }
621 else {
622 rb_bug("wrong argument type %s (expected Symbol)", rb_builtin_class_name(x));
623 }
624 }
625}
626#endif
627
628static VALUE
629dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding * const enc, const ID type)
630{
631 ASSERT_vm_locking();
632
633 const VALUE dsym = rb_newobj_of(klass, T_SYMBOL | FL_WB_PROTECTED);
634 long hashval;
635
636 rb_enc_set_index(dsym, rb_enc_to_index(enc));
637 OBJ_FREEZE(dsym);
638 RB_OBJ_WRITE(dsym, &RSYMBOL(dsym)->fstr, str);
639 RSYMBOL(dsym)->id = type;
640
641 /* we want hashval to be in Fixnum range [ruby-core:15713] r15672 */
642 hashval = (long)rb_str_hash(str);
643 RSYMBOL(dsym)->hashval = RSHIFT((long)hashval, 1);
644 register_sym(symbols, str, dsym);
645 rb_hash_aset(symbols->dsymbol_fstr_hash, str, Qtrue);
646 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(RSYMBOL(dsym)->fstr));
647
648 return dsym;
649}
650
651static inline VALUE
652dsymbol_check(rb_symbols_t *symbols, const VALUE sym)
653{
654 ASSERT_vm_locking();
655
656 if (UNLIKELY(rb_objspace_garbage_object_p(sym))) {
657 const VALUE fstr = RSYMBOL(sym)->fstr;
658 const ID type = RSYMBOL(sym)->id & ID_SCOPE_MASK;
659 RSYMBOL(sym)->fstr = 0;
660 unregister_sym(symbols, fstr, sym);
661 return dsymbol_alloc(symbols, rb_cSymbol, fstr, rb_enc_get(fstr), type);
662 }
663 else {
664 return sym;
665 }
666}
667
668static ID
669lookup_str_id(VALUE str)
670{
671 st_data_t sym_data;
672 int found;
673
674 GLOBAL_SYMBOLS_ENTER(symbols);
675 {
676 found = st_lookup(symbols->str_sym, (st_data_t)str, &sym_data);
677 }
678 GLOBAL_SYMBOLS_LEAVE();
679
680 if (found) {
681 const VALUE sym = (VALUE)sym_data;
682
683 if (STATIC_SYM_P(sym)) {
684 return STATIC_SYM2ID(sym);
685 }
686 else if (DYNAMIC_SYM_P(sym)) {
687 ID id = RSYMBOL(sym)->id;
688 if (id & ~ID_SCOPE_MASK) return id;
689 }
690 else {
691 rb_bug("non-symbol object %s:%"PRIxVALUE" for %"PRIsVALUE" in symbol table",
692 rb_builtin_class_name(sym), sym, str);
693 }
694 }
695 return (ID)0;
696}
697
698static VALUE
699lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str)
700{
701 st_data_t sym_data;
702 if (st_lookup(symbols->str_sym, (st_data_t)str, &sym_data)) {
703 VALUE sym = (VALUE)sym_data;
704 if (DYNAMIC_SYM_P(sym)) {
705 sym = dsymbol_check(symbols, sym);
706 }
707 return sym;
708 }
709 else {
710 return Qfalse;
711 }
712}
713
714static VALUE
715lookup_str_sym(const VALUE str)
716{
717 VALUE sym;
718
719 GLOBAL_SYMBOLS_ENTER(symbols);
720 {
721 sym = lookup_str_sym_with_lock(symbols, str);
722 }
723 GLOBAL_SYMBOLS_LEAVE();
724
725 return sym;
726}
727
728static VALUE
729lookup_id_str(ID id)
730{
731 return get_id_entry(id, ID_ENTRY_STR);
732}
733
734ID
735rb_intern3(const char *name, long len, rb_encoding *enc)
736{
737 VALUE sym;
738 struct RString fake_str;
739 VALUE str = rb_setup_fake_str(&fake_str, name, len, enc);
740 OBJ_FREEZE(str);
741 sym = lookup_str_sym(str);
742 if (sym) return rb_sym2id(sym);
743 str = rb_enc_str_new(name, len, enc); /* make true string */
744 return intern_str(str, 1);
745}
746
747static ID
748next_id_base_with_lock(rb_symbols_t *symbols)
749{
750 ID id;
751 rb_id_serial_t next_serial = symbols->last_id + 1;
752
753 if (next_serial == 0) {
754 id = (ID)-1;
755 }
756 else {
757 const size_t num = ++symbols->last_id;
758 id = num << ID_SCOPE_SHIFT;
759 }
760
761 return id;
762}
763
764static ID
765next_id_base(void)
766{
767 ID id;
768 GLOBAL_SYMBOLS_ENTER(symbols);
769 {
770 id = next_id_base_with_lock(symbols);
771 }
772 GLOBAL_SYMBOLS_LEAVE();
773 return id;
774}
775
776static ID
777intern_str(VALUE str, int mutable)
778{
779 ID id;
780 ID nid;
781
782 id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
783 if (id == (ID)-1) id = ID_JUNK;
784 if (sym_check_asciionly(str, false)) {
785 if (!mutable) str = rb_str_dup(str);
786 rb_enc_associate(str, rb_usascii_encoding());
787 }
788 if ((nid = next_id_base()) == (ID)-1) {
789 str = rb_str_ellipsize(str, 20);
790 rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %"PRIsVALUE")",
791 str);
792 }
793 id |= nid;
794 id |= ID_STATIC_SYM;
795 return register_static_symid_str(id, str);
796}
797
798ID
799rb_intern2(const char *name, long len)
800{
801 return rb_intern3(name, len, rb_usascii_encoding());
802}
803
804#undef rb_intern
805ID
806rb_intern(const char *name)
807{
808 return rb_intern2(name, strlen(name));
809}
810
811ID
812rb_intern_str(VALUE str)
813{
814 VALUE sym = lookup_str_sym(str);
815
816 if (sym) {
817 return SYM2ID(sym);
818 }
819
820 return intern_str(str, 0);
821}
822
823void
824rb_gc_free_dsymbol(VALUE sym)
825{
826 VALUE str = RSYMBOL(sym)->fstr;
827
828 if (str) {
829 RSYMBOL(sym)->fstr = 0;
830
831 GLOBAL_SYMBOLS_ENTER(symbols);
832 {
833 unregister_sym(symbols, str, sym);
834 rb_hash_delete_entry(symbols->dsymbol_fstr_hash, str);
835 }
836 GLOBAL_SYMBOLS_LEAVE();
837 }
838}
839
840/*
841 * call-seq:
842 * str.intern -> symbol
843 * str.to_sym -> symbol
844 *
845 * Returns the Symbol corresponding to <i>str</i>, creating the
846 * symbol if it did not previously exist. See Symbol#id2name.
847 *
848 * "Koala".intern #=> :Koala
849 * s = 'cat'.to_sym #=> :cat
850 * s == :cat #=> true
851 * s = '@cat'.to_sym #=> :@cat
852 * s == :@cat #=> true
853 *
854 * This can also be used to create symbols that cannot be represented using the
855 * <code>:xxx</code> notation.
856 *
857 * 'cat and dog'.to_sym #=> :"cat and dog"
858 */
859
860VALUE
861rb_str_intern(VALUE str)
862{
863 VALUE sym;
864
865 GLOBAL_SYMBOLS_ENTER(symbols);
866 {
867 sym = lookup_str_sym_with_lock(symbols, str);
868
869 if (sym) {
870 // ok
871 }
872 else if (USE_SYMBOL_GC) {
873 rb_encoding *enc = rb_enc_get(str);
874 rb_encoding *ascii = rb_usascii_encoding();
875 if (enc != ascii && sym_check_asciionly(str, false)) {
876 str = rb_str_dup(str);
877 rb_enc_associate(str, ascii);
878 OBJ_FREEZE(str);
879 enc = ascii;
880 }
881 else {
882 str = rb_str_dup(str);
883 OBJ_FREEZE(str);
884 }
885 str = rb_fstring(str);
886 int type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
887 if (type < 0) type = ID_JUNK;
888 sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);
889 }
890 else {
891 ID id = intern_str(str, 0);
892 sym = ID2SYM(id);
893 }
894 }
895 GLOBAL_SYMBOLS_LEAVE();
896 return sym;
897}
898
899ID
901{
902 ID id;
903 if (STATIC_SYM_P(sym)) {
904 id = STATIC_SYM2ID(sym);
905 }
906 else if (DYNAMIC_SYM_P(sym)) {
907 GLOBAL_SYMBOLS_ENTER(symbols);
908 {
909 sym = dsymbol_check(symbols, sym);
910 id = RSYMBOL(sym)->id;
911
912 if (UNLIKELY(!(id & ~ID_SCOPE_MASK))) {
913 VALUE fstr = RSYMBOL(sym)->fstr;
914 ID num = next_id_base_with_lock(symbols);
915
916 RSYMBOL(sym)->id = id |= num;
917 /* make it permanent object */
918
919 set_id_entry(symbols, rb_id_to_serial(num), fstr, sym);
920 rb_hash_delete_entry(symbols->dsymbol_fstr_hash, fstr);
921 }
922 }
923 GLOBAL_SYMBOLS_LEAVE();
924 }
925 else {
926 rb_raise(rb_eTypeError, "wrong argument type %s (expected Symbol)",
927 rb_builtin_class_name(sym));
928 }
929 return id;
930}
931
932#undef rb_id2sym
933VALUE
934rb_id2sym(ID x)
935{
936 if (!DYNAMIC_ID_P(x)) return STATIC_ID2SYM(x);
937 return get_id_entry(x, ID_ENTRY_SYM);
938}
939
940/*
941 * call-seq:
942 * name -> string
943 *
944 * Returns a frozen string representation of +self+ (not including the leading colon):
945 *
946 * :foo.name # => "foo"
947 * :foo.name.frozen? # => true
948 *
949 * Related: Symbol#to_s, Symbol#inspect.
950 */
951
952VALUE
954{
955 if (DYNAMIC_SYM_P(sym)) {
956 return RSYMBOL(sym)->fstr;
957 }
958 else {
959 return rb_id2str(STATIC_SYM2ID(sym));
960 }
961}
962
963VALUE
964rb_id2str(ID id)
965{
966 return lookup_id_str(id);
967}
968
969const char *
970rb_id2name(ID id)
971{
972 VALUE str = rb_id2str(id);
973
974 if (!str) return 0;
975 return RSTRING_PTR(str);
976}
977
978ID
979rb_make_internal_id(void)
980{
981 return next_id_base() | ID_INTERNAL | ID_STATIC_SYM;
982}
983
984ID
985rb_make_temporary_id(size_t n)
986{
987 const ID max_id = RB_ID_SERIAL_MAX & ~0xffff;
988 const ID id = max_id - (ID)n;
989 if (id <= ruby_global_symbols.last_id) {
990 rb_raise(rb_eRuntimeError, "too big to make temporary ID: %" PRIdSIZE, n);
991 }
992 return (id << ID_SCOPE_SHIFT) | ID_STATIC_SYM | ID_INTERNAL;
993}
994
995static int
996symbols_i(st_data_t key, st_data_t value, st_data_t arg)
997{
998 VALUE ary = (VALUE)arg;
999 VALUE sym = (VALUE)value;
1000
1001 if (STATIC_SYM_P(sym)) {
1002 rb_ary_push(ary, sym);
1003 return ST_CONTINUE;
1004 }
1005 else if (!DYNAMIC_SYM_P(sym)) {
1006 rb_bug("invalid symbol: %s", RSTRING_PTR((VALUE)key));
1007 }
1008 else if (!SYMBOL_PINNED_P(sym) && rb_objspace_garbage_object_p(sym)) {
1009 RSYMBOL(sym)->fstr = 0;
1010 return ST_DELETE;
1011 }
1012 else {
1013 rb_ary_push(ary, sym);
1014 return ST_CONTINUE;
1015 }
1016
1017}
1018
1019VALUE
1021{
1022 VALUE ary;
1023
1024 GLOBAL_SYMBOLS_ENTER(symbols);
1025 {
1026 ary = rb_ary_new2(symbols->str_sym->num_entries);
1027 st_foreach(symbols->str_sym, symbols_i, ary);
1028 }
1029 GLOBAL_SYMBOLS_LEAVE();
1030
1031 return ary;
1032}
1033
1034size_t
1035rb_sym_immortal_count(void)
1036{
1037 return (size_t)ruby_global_symbols.last_id;
1038}
1039
1040int
1042{
1043 return is_const_id(id);
1044}
1045
1046int
1048{
1049 return is_class_id(id);
1050}
1051
1052int
1054{
1055 return is_global_id(id);
1056}
1057
1058int
1060{
1061 return is_instance_id(id);
1062}
1063
1064int
1066{
1067 return is_attrset_id(id);
1068}
1069
1070int
1072{
1073 return is_local_id(id);
1074}
1075
1076int
1078{
1079 return is_junk_id(id);
1080}
1081
1082int
1083rb_is_const_sym(VALUE sym)
1084{
1085 return is_const_sym(sym);
1086}
1087
1088int
1089rb_is_attrset_sym(VALUE sym)
1090{
1091 return is_attrset_sym(sym);
1092}
1093
1094ID
1095rb_check_id(volatile VALUE *namep)
1096{
1097 VALUE tmp;
1098 VALUE name = *namep;
1099
1100 if (STATIC_SYM_P(name)) {
1101 return STATIC_SYM2ID(name);
1102 }
1103 else if (DYNAMIC_SYM_P(name)) {
1104 if (SYMBOL_PINNED_P(name)) {
1105 return RSYMBOL(name)->id;
1106 }
1107 else {
1108 *namep = RSYMBOL(name)->fstr;
1109 return 0;
1110 }
1111 }
1112 else if (!RB_TYPE_P(name, T_STRING)) {
1113 tmp = rb_check_string_type(name);
1114 if (NIL_P(tmp)) {
1115 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1116 name);
1117 }
1118 name = tmp;
1119 *namep = name;
1120 }
1121
1122 sym_check_asciionly(name, false);
1123
1124 return lookup_str_id(name);
1125}
1126
1127// Used by yjit for handling .send without throwing exceptions
1128ID
1129rb_get_symbol_id(VALUE name)
1130{
1131 if (STATIC_SYM_P(name)) {
1132 return STATIC_SYM2ID(name);
1133 }
1134 else if (DYNAMIC_SYM_P(name)) {
1135 if (SYMBOL_PINNED_P(name)) {
1136 return RSYMBOL(name)->id;
1137 }
1138 else {
1139 return 0;
1140 }
1141 }
1142 else {
1143 RUBY_ASSERT_ALWAYS(RB_TYPE_P(name, T_STRING));
1144 return lookup_str_id(name);
1145 }
1146}
1147
1148
1149VALUE
1150rb_check_symbol(volatile VALUE *namep)
1151{
1152 VALUE sym;
1153 VALUE tmp;
1154 VALUE name = *namep;
1155
1156 if (STATIC_SYM_P(name)) {
1157 return name;
1158 }
1159 else if (DYNAMIC_SYM_P(name)) {
1160 if (!SYMBOL_PINNED_P(name)) {
1161 GLOBAL_SYMBOLS_ENTER(symbols);
1162 {
1163 name = dsymbol_check(symbols, name);
1164 }
1165 GLOBAL_SYMBOLS_LEAVE();
1166
1167 *namep = name;
1168 }
1169 return name;
1170 }
1171 else if (!RB_TYPE_P(name, T_STRING)) {
1172 tmp = rb_check_string_type(name);
1173 if (NIL_P(tmp)) {
1174 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1175 name);
1176 }
1177 name = tmp;
1178 *namep = name;
1179 }
1180
1181 sym_check_asciionly(name, false);
1182
1183 if ((sym = lookup_str_sym(name)) != 0) {
1184 return sym;
1185 }
1186
1187 return Qnil;
1188}
1189
1190ID
1191rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
1192{
1193 struct RString fake_str;
1194 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1195
1196 sym_check_asciionly(name, true);
1197
1198 return lookup_str_id(name);
1199}
1200
1201VALUE
1202rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc)
1203{
1204 VALUE sym;
1205 struct RString fake_str;
1206 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1207
1208 sym_check_asciionly(name, true);
1209
1210 if ((sym = lookup_str_sym(name)) != 0) {
1211 return sym;
1212 }
1213
1214 return Qnil;
1215}
1216
1217#undef rb_sym_intern_ascii_cstr
1218#ifdef __clang__
1219NOINLINE(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1220#else
1221FUNC_MINIMIZED(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1222FUNC_MINIMIZED(VALUE rb_sym_intern_ascii(const char *ptr, long len));
1223FUNC_MINIMIZED(VALUE rb_sym_intern_ascii_cstr(const char *ptr));
1224#endif
1225
1226VALUE
1227rb_sym_intern(const char *ptr, long len, rb_encoding *enc)
1228{
1229 struct RString fake_str;
1230 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1231 return rb_str_intern(name);
1232}
1233
1234VALUE
1235rb_sym_intern_ascii(const char *ptr, long len)
1236{
1237 return rb_sym_intern(ptr, len, rb_usascii_encoding());
1238}
1239
1240VALUE
1241rb_sym_intern_ascii_cstr(const char *ptr)
1242{
1243 return rb_sym_intern_ascii(ptr, strlen(ptr));
1244}
1245
1246VALUE
1247rb_to_symbol_type(VALUE obj)
1248{
1249 return rb_convert_type_with_id(obj, T_SYMBOL, "Symbol", idTo_sym);
1250}
1251
1252int
1253rb_is_const_name(VALUE name)
1254{
1255 return rb_str_symname_type(name, 0) == ID_CONST;
1256}
1257
1258int
1259rb_is_class_name(VALUE name)
1260{
1261 return rb_str_symname_type(name, 0) == ID_CLASS;
1262}
1263
1264int
1265rb_is_instance_name(VALUE name)
1266{
1267 return rb_str_symname_type(name, 0) == ID_INSTANCE;
1268}
1269
1270int
1271rb_is_local_name(VALUE name)
1272{
1273 return rb_str_symname_type(name, 0) == ID_LOCAL;
1274}
1275
1276#include "id_table.c"
1277#include "symbol.rbinc"
#define RUBY_ASSERT_ALWAYS(expr)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
Definition assert.h:167
static bool rb_enc_isupper(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isupper(), except it additionally takes an encoding.
Definition ctype.h:124
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
Definition ctype.h:63
static bool rb_enc_islower(OnigCodePoint c, rb_encoding *enc)
Identical to rb_islower(), except it additionally takes an encoding.
Definition ctype.h:110
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
Definition coderange.h:180
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define ISUPPER
Old name of rb_isupper.
Definition ctype.h:89
#define ID2SYM
Old name of RB_ID2SYM.
Definition symbol.h:44
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
Definition fl_type.h:135
#define SYM2ID
Old name of RB_SYM2ID.
Definition symbol.h:45
#define ISDIGIT
Old name of rb_isdigit.
Definition ctype.h:93
#define STATIC_SYM_P
Old name of RB_STATIC_SYM_P.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
Definition encoding.h:516
#define ISALPHA
Old name of rb_isalpha.
Definition ctype.h:92
#define ISASCII
Old name of rb_isascii.
Definition ctype.h:85
#define Qtrue
Old name of RUBY_Qtrue.
#define DYNAMIC_SYM_P
Old name of RB_DYNAMIC_SYM_P.
Definition value_type.h:86
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
Definition coderange.h:182
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
Definition encoding.h:515
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
Definition fl_type.h:59
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
Definition value_type.h:80
#define rb_ary_new2
Old name of rb_ary_new_capa.
Definition array.h:651
#define ISALNUM
Old name of rb_isalnum.
Definition ctype.h:91
void rb_name_error(ID id, const char *fmt,...)
Raises an instance of rb_eNameError.
Definition error.c:2037
VALUE rb_eTypeError
TypeError exception.
Definition error.c:1344
void rb_name_error_str(VALUE str, const char *fmt,...)
Identical to rb_name_error(), except it takes a VALUE instead of ID.
Definition error.c:2052
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1342
VALUE rb_eEncodingError
EncodingError exception.
Definition error.c:1350
VALUE rb_cSymbol
Symbol class.
Definition string.c:79
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
Definition gc.h:619
Encoding relates APIs.
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
Definition encoding.h:590
int rb_enc_symname_p(const char *str, rb_encoding *enc)
Identical to rb_symname_p(), except it additionally takes an encoding.
Definition symbol.c:211
VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc)
Identical to rb_check_id_cstr(), except for the return type.
Definition symbol.c:1202
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
Definition symbol.c:414
ID rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
Identical to rb_check_id(), except it takes a pointer to a memory region instead of Ruby's string.
Definition symbol.c:1191
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
Definition symbol.c:1020
int rb_is_global_id(ID id)
Classifies the given ID, then sees if it is a global variable.
Definition symbol.c:1053
int rb_is_instance_id(ID id)
Classifies the given ID, then sees if it is an instance variable.
Definition symbol.c:1059
int rb_is_const_id(ID id)
Classifies the given ID, then sees if it is a constant.
Definition symbol.c:1041
int rb_is_junk_id(ID)
Classifies the given ID, then sees if it is a junk ID.
Definition symbol.c:1077
int rb_symname_p(const char *str)
Sees if the passed C string constructs a valid syntactic symbol.
Definition symbol.c:205
int rb_is_class_id(ID id)
Classifies the given ID, then sees if it is a class variable.
Definition symbol.c:1047
int rb_is_attrset_id(ID id)
Classifies the given ID, then sees if it is an attribute writer.
Definition symbol.c:1065
int rb_is_local_id(ID id)
Classifies the given ID, then sees if it is a local variable.
Definition symbol.c:1071
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
Definition string.c:3629
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
Definition string.c:11013
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
Definition string.c:3618
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
Definition string.c:2681
VALUE rb_check_symbol(volatile VALUE *namep)
Identical to rb_check_id(), except it returns an instance of rb_cSymbol instead.
Definition symbol.c:1150
ID rb_check_id(volatile VALUE *namep)
Detects if the given name is already interned or not.
Definition symbol.c:1095
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
Definition symbol.c:953
ID rb_sym2id(VALUE obj)
Converts an instance of rb_cSymbol into an ID.
Definition symbol.c:900
int len
Length of the buffer.
Definition io.h:8
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
Definition memory.h:161
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define RARRAY_LEN
Just another name of rb_array_len.
Definition rarray.h:51
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
Definition rstring.h:76
@ RUBY_SPECIAL_SHIFT
Least significant 8 bits are reserved.
Ruby's String.
Definition rstring.h:196
char * ptr
Pointer to the contents of the string.
Definition rstring.h:222
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40