Ruby 3.3.6p108 (2024-11-05 revision 75015d4c1f6965b5e85e96fb309f1f2129f933c0)
st.c
1/* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
3
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6
7/* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
14
15 The hash table has two arrays called *bins* and *entries*.
16
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
31
32 o The entry array contains table entries in the same order as they
33 were inserted.
34
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
39
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
42
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
46
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
55
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
64
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
69
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
72
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
81
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
86
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
89
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
92
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
97
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
100
101*/
102
103#ifdef NOT_RUBY
104#include "regint.h"
105#include "st.h"
106#elif defined RUBY_EXPORT
107#include "internal.h"
108#include "internal/bits.h"
109#include "internal/hash.h"
110#include "internal/sanitizers.h"
111#endif
112
113#include <stdio.h>
114#ifdef HAVE_STDLIB_H
115#include <stdlib.h>
116#endif
117#include <string.h>
118#include <assert.h>
119
120#ifdef __GNUC__
121#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
122#define EXPECT(expr, val) __builtin_expect(expr, val)
123#define ATTRIBUTE_UNUSED __attribute__((unused))
124#else
125#define PREFETCH(addr, write_p)
126#define EXPECT(expr, val) (expr)
127#define ATTRIBUTE_UNUSED
128#endif
129
130/* The type of hashes. */
131typedef st_index_t st_hash_t;
132
134 st_hash_t hash;
135 st_data_t key;
136 st_data_t record;
137};
138
139#define type_numhash st_hashtype_num
140static const struct st_hash_type st_hashtype_num = {
141 st_numcmp,
142 st_numhash,
143};
144
145static int st_strcmp(st_data_t, st_data_t);
146static st_index_t strhash(st_data_t);
147static const struct st_hash_type type_strhash = {
148 st_strcmp,
149 strhash,
150};
151
152static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
153static st_index_t strcasehash(st_data_t);
154static const struct st_hash_type type_strcasehash = {
155 st_locale_insensitive_strcasecmp_i,
156 strcasehash,
157};
158
159/* Value used to catch uninitialized entries/bins during debugging.
160 There is a possibility for a false alarm, but its probability is
161 extremely small. */
162#define ST_INIT_VAL 0xafafafafafafafaf
163#define ST_INIT_VAL_BYTE 0xafa
164
165#ifdef RUBY
166#undef malloc
167#undef realloc
168#undef calloc
169#undef free
170#define malloc ruby_xmalloc
171#define calloc ruby_xcalloc
172#define realloc ruby_xrealloc
173#define free ruby_xfree
174#endif
175
176#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
177#define PTR_EQUAL(tab, ptr, hash_val, key_) \
178 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
179
180/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
181 up to TRUE if the table is rebuilt during the comparison. */
182#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
183 do { \
184 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
185 res = PTR_EQUAL(tab, ptr, hash_val, key); \
186 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
187 } while (FALSE)
188
189/* Features of a table. */
191 /* Power of 2 used for number of allocated entries. */
192 unsigned char entry_power;
193 /* Power of 2 used for number of allocated bins. Depending on the
194 table size, the number of bins is 2-4 times more than the
195 number of entries. */
196 unsigned char bin_power;
197 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
198 unsigned char size_ind;
199 /* Bins are packed in words of type st_index_t. The following is
200 a size of bins counted by words. */
201 st_index_t bins_words;
202};
203
204/* Features of all possible size tables. */
205#if SIZEOF_ST_INDEX_T == 8
206#define MAX_POWER2 62
207static const struct st_features features[] = {
208 {0, 1, 0, 0x0},
209 {1, 2, 0, 0x1},
210 {2, 3, 0, 0x1},
211 {3, 4, 0, 0x2},
212 {4, 5, 0, 0x4},
213 {5, 6, 0, 0x8},
214 {6, 7, 0, 0x10},
215 {7, 8, 0, 0x20},
216 {8, 9, 1, 0x80},
217 {9, 10, 1, 0x100},
218 {10, 11, 1, 0x200},
219 {11, 12, 1, 0x400},
220 {12, 13, 1, 0x800},
221 {13, 14, 1, 0x1000},
222 {14, 15, 1, 0x2000},
223 {15, 16, 1, 0x4000},
224 {16, 17, 2, 0x10000},
225 {17, 18, 2, 0x20000},
226 {18, 19, 2, 0x40000},
227 {19, 20, 2, 0x80000},
228 {20, 21, 2, 0x100000},
229 {21, 22, 2, 0x200000},
230 {22, 23, 2, 0x400000},
231 {23, 24, 2, 0x800000},
232 {24, 25, 2, 0x1000000},
233 {25, 26, 2, 0x2000000},
234 {26, 27, 2, 0x4000000},
235 {27, 28, 2, 0x8000000},
236 {28, 29, 2, 0x10000000},
237 {29, 30, 2, 0x20000000},
238 {30, 31, 2, 0x40000000},
239 {31, 32, 2, 0x80000000},
240 {32, 33, 3, 0x200000000},
241 {33, 34, 3, 0x400000000},
242 {34, 35, 3, 0x800000000},
243 {35, 36, 3, 0x1000000000},
244 {36, 37, 3, 0x2000000000},
245 {37, 38, 3, 0x4000000000},
246 {38, 39, 3, 0x8000000000},
247 {39, 40, 3, 0x10000000000},
248 {40, 41, 3, 0x20000000000},
249 {41, 42, 3, 0x40000000000},
250 {42, 43, 3, 0x80000000000},
251 {43, 44, 3, 0x100000000000},
252 {44, 45, 3, 0x200000000000},
253 {45, 46, 3, 0x400000000000},
254 {46, 47, 3, 0x800000000000},
255 {47, 48, 3, 0x1000000000000},
256 {48, 49, 3, 0x2000000000000},
257 {49, 50, 3, 0x4000000000000},
258 {50, 51, 3, 0x8000000000000},
259 {51, 52, 3, 0x10000000000000},
260 {52, 53, 3, 0x20000000000000},
261 {53, 54, 3, 0x40000000000000},
262 {54, 55, 3, 0x80000000000000},
263 {55, 56, 3, 0x100000000000000},
264 {56, 57, 3, 0x200000000000000},
265 {57, 58, 3, 0x400000000000000},
266 {58, 59, 3, 0x800000000000000},
267 {59, 60, 3, 0x1000000000000000},
268 {60, 61, 3, 0x2000000000000000},
269 {61, 62, 3, 0x4000000000000000},
270 {62, 63, 3, 0x8000000000000000},
271};
272
273#else
274#define MAX_POWER2 30
275
276static const struct st_features features[] = {
277 {0, 1, 0, 0x1},
278 {1, 2, 0, 0x1},
279 {2, 3, 0, 0x2},
280 {3, 4, 0, 0x4},
281 {4, 5, 0, 0x8},
282 {5, 6, 0, 0x10},
283 {6, 7, 0, 0x20},
284 {7, 8, 0, 0x40},
285 {8, 9, 1, 0x100},
286 {9, 10, 1, 0x200},
287 {10, 11, 1, 0x400},
288 {11, 12, 1, 0x800},
289 {12, 13, 1, 0x1000},
290 {13, 14, 1, 0x2000},
291 {14, 15, 1, 0x4000},
292 {15, 16, 1, 0x8000},
293 {16, 17, 2, 0x20000},
294 {17, 18, 2, 0x40000},
295 {18, 19, 2, 0x80000},
296 {19, 20, 2, 0x100000},
297 {20, 21, 2, 0x200000},
298 {21, 22, 2, 0x400000},
299 {22, 23, 2, 0x800000},
300 {23, 24, 2, 0x1000000},
301 {24, 25, 2, 0x2000000},
302 {25, 26, 2, 0x4000000},
303 {26, 27, 2, 0x8000000},
304 {27, 28, 2, 0x10000000},
305 {28, 29, 2, 0x20000000},
306 {29, 30, 2, 0x40000000},
307 {30, 31, 2, 0x80000000},
308};
309
310#endif
311
312/* The reserved hash value and its substitution. */
313#define RESERVED_HASH_VAL (~(st_hash_t) 0)
314#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
315
316/* Return hash value of KEY for table TAB. */
317static inline st_hash_t
318do_hash(st_data_t key, st_table *tab)
319{
320 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
321
322 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
323 another value. Such mapping should be extremely rare. */
324 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
325}
326
327/* Power of 2 defining the minimal number of allocated entries. */
328#define MINIMAL_POWER2 2
329
330#if MINIMAL_POWER2 < 2
331#error "MINIMAL_POWER2 should be >= 2"
332#endif
333
334/* If the power2 of the allocated `entries` is less than the following
335 value, don't allocate bins and use a linear search. */
336#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
337
338/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
339static int
340get_power2(st_index_t size)
341{
342 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
343 if (n <= MAX_POWER2)
344 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
345#ifdef RUBY
346 /* Ran out of the table entries */
347 rb_raise(rb_eRuntimeError, "st_table too big");
348#endif
349 /* should raise exception */
350 return -1;
351}
352
353/* Return value of N-th bin in array BINS of table with bins size
354 index S. */
355static inline st_index_t
356get_bin(st_index_t *bins, int s, st_index_t n)
357{
358 return (s == 0 ? ((unsigned char *) bins)[n]
359 : s == 1 ? ((unsigned short *) bins)[n]
360 : s == 2 ? ((unsigned int *) bins)[n]
361 : ((st_index_t *) bins)[n]);
362}
363
364/* Set up N-th bin in array BINS of table with bins size index S to
365 value V. */
366static inline void
367set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
368{
369 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
370 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
371 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
372 else ((st_index_t *) bins)[n] = v;
373}
374
375/* These macros define reserved values for empty table bin and table
376 bin which contains a deleted entry. We will never use such values
377 for an entry index in bins. */
378#define EMPTY_BIN 0
379#define DELETED_BIN 1
380/* Base of a real entry index in the bins. */
381#define ENTRY_BASE 2
382
383/* Mark I-th bin of table TAB as empty, in other words not
384 corresponding to any entry. */
385#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
386
387/* Values used for not found entry and bin with given
388 characteristics. */
389#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
390#define UNDEFINED_BIN_IND (~(st_index_t) 0)
391
392/* Entry and bin values returned when we found a table rebuild during
393 the search. */
394#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
395#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
396
397/* Mark I-th bin of table TAB as corresponding to a deleted table
398 entry. Update number of entries in the table and number of bins
399 corresponding to deleted entries. */
400#define MARK_BIN_DELETED(tab, i) \
401 do { \
402 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
403 } while (0)
404
405/* Macros to check that value B is used empty bins and bins
406 corresponding deleted entries. */
407#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
408#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
409#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
410
411/* Macros to check empty bins and bins corresponding to deleted
412 entries. Bins are given by their index I in table TAB. */
413#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
414#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
415#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
416
417/* Macros for marking and checking deleted entries given by their
418 pointer E_PTR. */
419#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
420#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
421
422/* Return bin size index of table TAB. */
423static inline unsigned int
424get_size_ind(const st_table *tab)
425{
426 return tab->size_ind;
427}
428
429/* Return the number of allocated bins of table TAB. */
430static inline st_index_t
431get_bins_num(const st_table *tab)
432{
433 return ((st_index_t) 1)<<tab->bin_power;
434}
435
436/* Return mask for a bin index in table TAB. */
437static inline st_index_t
438bins_mask(const st_table *tab)
439{
440 return get_bins_num(tab) - 1;
441}
442
443/* Return the index of table TAB bin corresponding to
444 HASH_VALUE. */
445static inline st_index_t
446hash_bin(st_hash_t hash_value, st_table *tab)
447{
448 return hash_value & bins_mask(tab);
449}
450
451/* Return the number of allocated entries of table TAB. */
452static inline st_index_t
453get_allocated_entries(const st_table *tab)
454{
455 return ((st_index_t) 1)<<tab->entry_power;
456}
457
458/* Return size of the allocated bins of table TAB. */
459static inline st_index_t
460bins_size(const st_table *tab)
461{
462 return features[tab->entry_power].bins_words * sizeof (st_index_t);
463}
464
465/* Mark all bins of table TAB as empty. */
466static void
467initialize_bins(st_table *tab)
468{
469 memset(tab->bins, 0, bins_size(tab));
470}
471
472/* Make table TAB empty. */
473static void
474make_tab_empty(st_table *tab)
475{
476 tab->num_entries = 0;
477 tab->entries_start = tab->entries_bound = 0;
478 if (tab->bins != NULL)
479 initialize_bins(tab);
480}
481
482#ifdef HASH_LOG
483#ifdef HAVE_UNISTD_H
484#include <unistd.h>
485#endif
486static struct {
487 int all, total, num, str, strcase;
488} collision;
489
490/* Flag switching off output of package statistics at the end of
491 program. */
492static int init_st = 0;
493
494/* Output overall number of table searches and collisions into a
495 temporary file. */
496static void
497stat_col(void)
498{
499 char fname[10+sizeof(long)*3];
500 FILE *f;
501 if (!collision.total) return;
502 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
503 if (f == NULL)
504 return;
505 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
506 ((double)collision.all / (collision.total)) * 100);
507 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
508 fclose(f);
509}
510#endif
511
512st_table *
513st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
514{
515 int n;
516
517#ifdef HASH_LOG
518#if HASH_LOG+0 < 0
519 {
520 const char *e = getenv("ST_HASH_LOG");
521 if (!e || !*e) init_st = 1;
522 }
523#endif
524 if (init_st == 0) {
525 init_st = 1;
526 atexit(stat_col);
527 }
528#endif
529
530 n = get_power2(size);
531#ifndef RUBY
532 if (n < 0)
533 return NULL;
534#endif
535
536 tab->type = type;
537 tab->entry_power = n;
538 tab->bin_power = features[n].bin_power;
539 tab->size_ind = features[n].size_ind;
540 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
541 tab->bins = NULL;
542 else {
543 tab->bins = (st_index_t *) malloc(bins_size(tab));
544#ifndef RUBY
545 if (tab->bins == NULL) {
546 free(tab);
547 return NULL;
548 }
549#endif
550 }
551 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
552 * sizeof(st_table_entry));
553#ifndef RUBY
554 if (tab->entries == NULL) {
555 st_free_table(tab);
556 return NULL;
557 }
558#endif
559 make_tab_empty(tab);
560 tab->rebuilds_num = 0;
561 return tab;
562}
563
564/* Create and return table with TYPE which can hold at least SIZE
565 entries. The real number of entries which the table can hold is
566 the nearest power of two for SIZE. */
567st_table *
568st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
569{
570 st_table *tab = malloc(sizeof(st_table));
571#ifndef RUBY
572 if (tab == NULL)
573 return NULL;
574#endif
575
576#ifdef RUBY
577 st_init_existing_table_with_size(tab, type, size);
578#else
579 if (st_init_existing_table_with_size(tab, type, size) == NULL) {
580 free(tab);
581 return NULL;
582 }
583#endif
584
585 return tab;
586}
587
588size_t
589st_table_size(const struct st_table *tbl)
590{
591 return tbl->num_entries;
592}
593
594/* Create and return table with TYPE which can hold a minimal number
595 of entries (see comments for get_power2). */
596st_table *
597st_init_table(const struct st_hash_type *type)
598{
599 return st_init_table_with_size(type, 0);
600}
601
602/* Create and return table which can hold a minimal number of
603 numbers. */
604st_table *
605st_init_numtable(void)
606{
607 return st_init_table(&type_numhash);
608}
609
610/* Create and return table which can hold SIZE numbers. */
611st_table *
612st_init_numtable_with_size(st_index_t size)
613{
614 return st_init_table_with_size(&type_numhash, size);
615}
616
617/* Create and return table which can hold a minimal number of
618 strings. */
619st_table *
620st_init_strtable(void)
621{
622 return st_init_table(&type_strhash);
623}
624
625/* Create and return table which can hold SIZE strings. */
626st_table *
627st_init_strtable_with_size(st_index_t size)
628{
629 return st_init_table_with_size(&type_strhash, size);
630}
631
632/* Create and return table which can hold a minimal number of strings
633 whose character case is ignored. */
634st_table *
635st_init_strcasetable(void)
636{
637 return st_init_table(&type_strcasehash);
638}
639
640/* Create and return table which can hold SIZE strings whose character
641 case is ignored. */
642st_table *
643st_init_strcasetable_with_size(st_index_t size)
644{
645 return st_init_table_with_size(&type_strcasehash, size);
646}
647
648/* Make table TAB empty. */
649void
650st_clear(st_table *tab)
651{
652 make_tab_empty(tab);
653 tab->rebuilds_num++;
654}
655
656/* Free table TAB space. */
657void
658st_free_table(st_table *tab)
659{
660 free(tab->bins);
661 free(tab->entries);
662 free(tab);
663}
664
665/* Return byte size of memory allocated for table TAB. */
666size_t
667st_memsize(const st_table *tab)
668{
669 return(sizeof(st_table)
670 + (tab->bins == NULL ? 0 : bins_size(tab))
671 + get_allocated_entries(tab) * sizeof(st_table_entry));
672}
673
674static st_index_t
675find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
676
677static st_index_t
678find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
679
680static st_index_t
681find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
682
683static st_index_t
684find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
685 st_data_t key, st_index_t *bin_ind);
686
687#ifdef HASH_LOG
688static void
689count_collision(const struct st_hash_type *type)
690{
691 collision.all++;
692 if (type == &type_numhash) {
693 collision.num++;
694 }
695 else if (type == &type_strhash) {
696 collision.strcase++;
697 }
698 else if (type == &type_strcasehash) {
699 collision.str++;
700 }
701}
702
703#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
704#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
705#define collision_check 0
706#else
707#define COLLISION
708#define FOUND_BIN
709#endif
710
711/* If the number of entries in the table is at least REBUILD_THRESHOLD
712 times less than the entry array length, decrease the table
713 size. */
714#define REBUILD_THRESHOLD 4
715
716#if REBUILD_THRESHOLD < 2
717#error "REBUILD_THRESHOLD should be >= 2"
718#endif
719
720static void rebuild_table_with(st_table *new_tab, st_table *tab);
721
722/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
723 and can change size of the table entries and bins arrays.
724 Rebuilding is implemented by creation of a new table or by
725 compaction of the existing one. */
726static void
727rebuild_table(st_table *tab)
728{
729 if ((2 * tab->num_entries <= get_allocated_entries(tab)
730 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
731 || tab->num_entries < (1 << MINIMAL_POWER2)) {
732 /* Compaction: */
733 tab->num_entries = 0;
734 if (tab->bins != NULL)
735 initialize_bins(tab);
736 rebuild_table_with(tab, tab);
737 }
738 else {
739 st_table *new_tab;
740 /* This allocation could trigger GC and compaction. If tab is the
741 * gen_iv_tbl, then tab could have changed in size due to objects being
742 * freed and/or moved. Do not store attributes of tab before this line. */
743 new_tab = st_init_table_with_size(tab->type,
744 2 * tab->num_entries - 1);
745 rebuild_table_with(new_tab, tab);
746 }
747}
748
749static void
750rebuild_table_with(st_table *new_tab, st_table *tab)
751{
752 st_index_t i, ni;
753 unsigned int size_ind;
754 st_table_entry *new_entries;
755 st_table_entry *curr_entry_ptr;
756 st_index_t *bins;
757 st_index_t bin_ind;
758
759 new_entries = new_tab->entries;
760
761 ni = 0;
762 bins = new_tab->bins;
763 size_ind = get_size_ind(new_tab);
764 st_index_t bound = tab->entries_bound;
765 st_table_entry *entries = tab->entries;
766
767 for (i = tab->entries_start; i < bound; i++) {
768 curr_entry_ptr = &entries[i];
769 PREFETCH(entries + i + 1, 0);
770 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
771 continue;
772 if (&new_entries[ni] != curr_entry_ptr)
773 new_entries[ni] = *curr_entry_ptr;
774 if (EXPECT(bins != NULL, 1)) {
775 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
776 curr_entry_ptr->key);
777 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
778 }
779 new_tab->num_entries++;
780 ni++;
781 }
782 if (new_tab != tab) {
783 tab->entry_power = new_tab->entry_power;
784 tab->bin_power = new_tab->bin_power;
785 tab->size_ind = new_tab->size_ind;
786 free(tab->bins);
787 tab->bins = new_tab->bins;
788 free(tab->entries);
789 tab->entries = new_tab->entries;
790 free(new_tab);
791 }
792 tab->entries_start = 0;
793 tab->entries_bound = tab->num_entries;
794 tab->rebuilds_num++;
795}
796
797/* Return the next secondary hash index for table TAB using previous
798 index IND and PERTURB. Finally modulo of the function becomes a
799 full *cycle linear congruential generator*, in other words it
800 guarantees traversing all table bins in extreme case.
801
802 According the Hull-Dobell theorem a generator
803 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
804 o m and c are relatively prime
805 o a-1 is divisible by all prime factors of m
806 o a-1 is divisible by 4 if m is divisible by 4.
807
808 For our case a is 5, c is 1, and m is a power of two. */
809static inline st_index_t
810secondary_hash(st_index_t ind, st_table *tab, st_index_t *perturb)
811{
812 *perturb >>= 11;
813 ind = (ind << 2) + ind + *perturb + 1;
814 return hash_bin(ind, tab);
815}
816
817/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
818 search. Return the index of the found entry in array `entries`.
819 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
820 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
821static inline st_index_t
822find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
823{
824 int eq_p, rebuilt_p;
825 st_index_t i, bound;
826 st_table_entry *entries;
827
828 bound = tab->entries_bound;
829 entries = tab->entries;
830 for (i = tab->entries_start; i < bound; i++) {
831 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
832 if (EXPECT(rebuilt_p, 0))
833 return REBUILT_TABLE_ENTRY_IND;
834 if (eq_p)
835 return i;
836 }
837 return UNDEFINED_ENTRY_IND;
838}
839
840/* Use the quadratic probing. The method has a better data locality
841 but more collisions than the current approach. In average it
842 results in a bit slower search. */
843/*#define QUADRATIC_PROBE*/
844
845/* Return index of entry with HASH_VALUE and KEY in table TAB. If
846 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
847 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
848static st_index_t
849find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
850{
851 int eq_p, rebuilt_p;
852 st_index_t ind;
853#ifdef QUADRATIC_PROBE
854 st_index_t d;
855#else
856 st_index_t perturb;
857#endif
858 st_index_t bin;
859 st_table_entry *entries = tab->entries;
860
861 ind = hash_bin(hash_value, tab);
862#ifdef QUADRATIC_PROBE
863 d = 1;
864#else
865 perturb = hash_value;
866#endif
867 FOUND_BIN;
868 for (;;) {
869 bin = get_bin(tab->bins, get_size_ind(tab), ind);
870 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
871 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
872 if (EXPECT(rebuilt_p, 0))
873 return REBUILT_TABLE_ENTRY_IND;
874 if (eq_p)
875 break;
876 }
877 else if (EMPTY_BIN_P(bin))
878 return UNDEFINED_ENTRY_IND;
879#ifdef QUADRATIC_PROBE
880 ind = hash_bin(ind + d, tab);
881 d++;
882#else
883 ind = secondary_hash(ind, tab, &perturb);
884#endif
885 COLLISION;
886 }
887 return bin;
888}
889
890/* Find and return index of table TAB bin corresponding to an entry
891 with HASH_VALUE and KEY. If there is no such bin, return
892 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
893 return REBUILT_TABLE_BIN_IND. */
894static st_index_t
895find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
896{
897 int eq_p, rebuilt_p;
898 st_index_t ind;
899#ifdef QUADRATIC_PROBE
900 st_index_t d;
901#else
902 st_index_t perturb;
903#endif
904 st_index_t bin;
905 st_table_entry *entries = tab->entries;
906
907 ind = hash_bin(hash_value, tab);
908#ifdef QUADRATIC_PROBE
909 d = 1;
910#else
911 perturb = hash_value;
912#endif
913 FOUND_BIN;
914 for (;;) {
915 bin = get_bin(tab->bins, get_size_ind(tab), ind);
916 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
917 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
918 if (EXPECT(rebuilt_p, 0))
919 return REBUILT_TABLE_BIN_IND;
920 if (eq_p)
921 break;
922 }
923 else if (EMPTY_BIN_P(bin))
924 return UNDEFINED_BIN_IND;
925#ifdef QUADRATIC_PROBE
926 ind = hash_bin(ind + d, tab);
927 d++;
928#else
929 ind = secondary_hash(ind, tab, &perturb);
930#endif
931 COLLISION;
932 }
933 return ind;
934}
935
936/* Find and return index of table TAB bin corresponding to an entry
937 with HASH_VALUE and KEY. The entry should be in the table
938 already. */
939static st_index_t
940find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
941{
942 st_index_t ind;
943#ifdef QUADRATIC_PROBE
944 st_index_t d;
945#else
946 st_index_t perturb;
947#endif
948 st_index_t bin;
949
950 ind = hash_bin(hash_value, tab);
951#ifdef QUADRATIC_PROBE
952 d = 1;
953#else
954 perturb = hash_value;
955#endif
956 FOUND_BIN;
957 for (;;) {
958 bin = get_bin(tab->bins, get_size_ind(tab), ind);
959 if (EMPTY_OR_DELETED_BIN_P(bin))
960 return ind;
961#ifdef QUADRATIC_PROBE
962 ind = hash_bin(ind + d, tab);
963 d++;
964#else
965 ind = secondary_hash(ind, tab, &perturb);
966#endif
967 COLLISION;
968 }
969}
970
971/* Return index of table TAB bin for HASH_VALUE and KEY through
972 BIN_IND and the pointed value as the function result. Reserve the
973 bin for inclusion of the corresponding entry into the table if it
974 is not there yet. We always find such bin as bins array length is
975 bigger entries array. Although we can reuse a deleted bin, the
976 result bin value is always empty if the table has no entry with
977 KEY. Return the entries array index of the found entry or
978 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
979 during the search, return REBUILT_TABLE_ENTRY_IND. */
980static st_index_t
981find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
982 st_data_t key, st_index_t *bin_ind)
983{
984 int eq_p, rebuilt_p;
985 st_index_t ind;
986 st_hash_t curr_hash_value = *hash_value;
987#ifdef QUADRATIC_PROBE
988 st_index_t d;
989#else
990 st_index_t perturb;
991#endif
992 st_index_t entry_index;
993 st_index_t first_deleted_bin_ind;
994 st_table_entry *entries;
995
996 ind = hash_bin(curr_hash_value, tab);
997#ifdef QUADRATIC_PROBE
998 d = 1;
999#else
1000 perturb = curr_hash_value;
1001#endif
1002 FOUND_BIN;
1003 first_deleted_bin_ind = UNDEFINED_BIN_IND;
1004 entries = tab->entries;
1005 for (;;) {
1006 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1007 if (EMPTY_BIN_P(entry_index)) {
1008 tab->num_entries++;
1009 entry_index = UNDEFINED_ENTRY_IND;
1010 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1011 /* We can reuse bin of a deleted entry. */
1012 ind = first_deleted_bin_ind;
1013 MARK_BIN_EMPTY(tab, ind);
1014 }
1015 break;
1016 }
1017 else if (! DELETED_BIN_P(entry_index)) {
1018 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1019 if (EXPECT(rebuilt_p, 0))
1020 return REBUILT_TABLE_ENTRY_IND;
1021 if (eq_p)
1022 break;
1023 }
1024 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1025 first_deleted_bin_ind = ind;
1026#ifdef QUADRATIC_PROBE
1027 ind = hash_bin(ind + d, tab);
1028 d++;
1029#else
1030 ind = secondary_hash(ind, tab, &perturb);
1031#endif
1032 COLLISION;
1033 }
1034 *bin_ind = ind;
1035 return entry_index;
1036}
1037
1038/* Find an entry with KEY in table TAB. Return non-zero if we found
1039 it. Set up *RECORD to the found entry record. */
1040int
1041st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1042{
1043 st_index_t bin;
1044 st_hash_t hash = do_hash(key, tab);
1045
1046 retry:
1047 if (tab->bins == NULL) {
1048 bin = find_entry(tab, hash, key);
1049 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1050 goto retry;
1051 if (bin == UNDEFINED_ENTRY_IND)
1052 return 0;
1053 }
1054 else {
1055 bin = find_table_entry_ind(tab, hash, key);
1056 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1057 goto retry;
1058 if (bin == UNDEFINED_ENTRY_IND)
1059 return 0;
1060 bin -= ENTRY_BASE;
1061 }
1062 if (value != 0)
1063 *value = tab->entries[bin].record;
1064 return 1;
1065}
1066
1067/* Find an entry with KEY in table TAB. Return non-zero if we found
1068 it. Set up *RESULT to the found table entry key. */
1069int
1070st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1071{
1072 st_index_t bin;
1073 st_hash_t hash = do_hash(key, tab);
1074
1075 retry:
1076 if (tab->bins == NULL) {
1077 bin = find_entry(tab, hash, key);
1078 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1079 goto retry;
1080 if (bin == UNDEFINED_ENTRY_IND)
1081 return 0;
1082 }
1083 else {
1084 bin = find_table_entry_ind(tab, hash, key);
1085 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1086 goto retry;
1087 if (bin == UNDEFINED_ENTRY_IND)
1088 return 0;
1089 bin -= ENTRY_BASE;
1090 }
1091 if (result != 0)
1092 *result = tab->entries[bin].key;
1093 return 1;
1094}
1095
1096/* Check the table and rebuild it if it is necessary. */
1097static inline void
1098rebuild_table_if_necessary (st_table *tab)
1099{
1100 st_index_t bound = tab->entries_bound;
1101
1102 if (bound == get_allocated_entries(tab))
1103 rebuild_table(tab);
1104}
1105
1106/* Insert (KEY, VALUE) into table TAB and return zero. If there is
1107 already entry with KEY in the table, return nonzero and update
1108 the value of the found entry. */
1109int
1110st_insert(st_table *tab, st_data_t key, st_data_t value)
1111{
1112 st_table_entry *entry;
1113 st_index_t bin;
1114 st_index_t ind;
1115 st_hash_t hash_value;
1116 st_index_t bin_ind;
1117 int new_p;
1118
1119 hash_value = do_hash(key, tab);
1120 retry:
1121 rebuild_table_if_necessary(tab);
1122 if (tab->bins == NULL) {
1123 bin = find_entry(tab, hash_value, key);
1124 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1125 goto retry;
1126 new_p = bin == UNDEFINED_ENTRY_IND;
1127 if (new_p)
1128 tab->num_entries++;
1129 bin_ind = UNDEFINED_BIN_IND;
1130 }
1131 else {
1132 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1133 key, &bin_ind);
1134 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1135 goto retry;
1136 new_p = bin == UNDEFINED_ENTRY_IND;
1137 bin -= ENTRY_BASE;
1138 }
1139 if (new_p) {
1140 ind = tab->entries_bound++;
1141 entry = &tab->entries[ind];
1142 entry->hash = hash_value;
1143 entry->key = key;
1144 entry->record = value;
1145 if (bin_ind != UNDEFINED_BIN_IND)
1146 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1147 return 0;
1148 }
1149 tab->entries[bin].record = value;
1150 return 1;
1151}
1152
1153/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1154 entry with KEY before the insertion. */
1155static inline void
1156st_add_direct_with_hash(st_table *tab,
1157 st_data_t key, st_data_t value, st_hash_t hash)
1158{
1159 st_table_entry *entry;
1160 st_index_t ind;
1161 st_index_t bin_ind;
1162
1163 rebuild_table_if_necessary(tab);
1164 ind = tab->entries_bound++;
1165 entry = &tab->entries[ind];
1166 entry->hash = hash;
1167 entry->key = key;
1168 entry->record = value;
1169 tab->num_entries++;
1170 if (tab->bins != NULL) {
1171 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1172 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1173 }
1174}
1175
1176void
1177rb_st_add_direct_with_hash(st_table *tab,
1178 st_data_t key, st_data_t value, st_hash_t hash)
1179{
1180 st_add_direct_with_hash(tab, key, value, hash);
1181}
1182
1183/* Insert (KEY, VALUE) into table TAB. The table should not have
1184 entry with KEY before the insertion. */
1185void
1186st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1187{
1188 st_hash_t hash_value;
1189
1190 hash_value = do_hash(key, tab);
1191 st_add_direct_with_hash(tab, key, value, hash_value);
1192}
1193
1194/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1195 there is already entry with KEY in the table, return nonzero and
1196 update the value of the found entry. */
1197int
1198st_insert2(st_table *tab, st_data_t key, st_data_t value,
1199 st_data_t (*func)(st_data_t))
1200{
1201 st_table_entry *entry;
1202 st_index_t bin;
1203 st_index_t ind;
1204 st_hash_t hash_value;
1205 st_index_t bin_ind;
1206 int new_p;
1207
1208 hash_value = do_hash(key, tab);
1209 retry:
1210 rebuild_table_if_necessary (tab);
1211 if (tab->bins == NULL) {
1212 bin = find_entry(tab, hash_value, key);
1213 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1214 goto retry;
1215 new_p = bin == UNDEFINED_ENTRY_IND;
1216 if (new_p)
1217 tab->num_entries++;
1218 bin_ind = UNDEFINED_BIN_IND;
1219 }
1220 else {
1221 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1222 key, &bin_ind);
1223 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1224 goto retry;
1225 new_p = bin == UNDEFINED_ENTRY_IND;
1226 bin -= ENTRY_BASE;
1227 }
1228 if (new_p) {
1229 key = (*func)(key);
1230 ind = tab->entries_bound++;
1231 entry = &tab->entries[ind];
1232 entry->hash = hash_value;
1233 entry->key = key;
1234 entry->record = value;
1235 if (bin_ind != UNDEFINED_BIN_IND)
1236 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1237 return 0;
1238 }
1239 tab->entries[bin].record = value;
1240 return 1;
1241}
1242
1243/* Create a copy of old_tab into new_tab. */
1244st_table *
1245st_replace(st_table *new_tab, st_table *old_tab)
1246{
1247 *new_tab = *old_tab;
1248 if (old_tab->bins == NULL)
1249 new_tab->bins = NULL;
1250 else {
1251 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1252#ifndef RUBY
1253 if (new_tab->bins == NULL) {
1254 return NULL;
1255 }
1256#endif
1257 }
1258 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1259 * sizeof(st_table_entry));
1260#ifndef RUBY
1261 if (new_tab->entries == NULL) {
1262 return NULL;
1263 }
1264#endif
1265 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1266 get_allocated_entries(old_tab));
1267 if (old_tab->bins != NULL)
1268 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1269
1270 return new_tab;
1271}
1272
1273/* Create and return a copy of table OLD_TAB. */
1274st_table *
1275st_copy(st_table *old_tab)
1276{
1277 st_table *new_tab;
1278
1279 new_tab = (st_table *) malloc(sizeof(st_table));
1280#ifndef RUBY
1281 if (new_tab == NULL)
1282 return NULL;
1283#endif
1284
1285 if (st_replace(new_tab, old_tab) == NULL) {
1286 st_free_table(new_tab);
1287 return NULL;
1288 }
1289
1290 return new_tab;
1291}
1292
1293/* Update the entries start of table TAB after removing an entry
1294 with index N in the array entries. */
1295static inline void
1296update_range_for_deleted(st_table *tab, st_index_t n)
1297{
1298 /* Do not update entries_bound here. Otherwise, we can fill all
1299 bins by deleted entry value before rebuilding the table. */
1300 if (tab->entries_start == n) {
1301 st_index_t start = n + 1;
1302 st_index_t bound = tab->entries_bound;
1303 st_table_entry *entries = tab->entries;
1304 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1305 tab->entries_start = start;
1306 }
1307}
1308
1309/* Delete entry with KEY from table TAB, set up *VALUE (unless
1310 VALUE is zero) from deleted table entry, and return non-zero. If
1311 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1312 is zero), and return zero. */
1313static int
1314st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1315{
1316 st_table_entry *entry;
1317 st_index_t bin;
1318 st_index_t bin_ind;
1319 st_hash_t hash;
1320
1321 hash = do_hash(*key, tab);
1322 retry:
1323 if (tab->bins == NULL) {
1324 bin = find_entry(tab, hash, *key);
1325 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1326 goto retry;
1327 if (bin == UNDEFINED_ENTRY_IND) {
1328 if (value != 0) *value = 0;
1329 return 0;
1330 }
1331 }
1332 else {
1333 bin_ind = find_table_bin_ind(tab, hash, *key);
1334 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1335 goto retry;
1336 if (bin_ind == UNDEFINED_BIN_IND) {
1337 if (value != 0) *value = 0;
1338 return 0;
1339 }
1340 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1341 MARK_BIN_DELETED(tab, bin_ind);
1342 }
1343 entry = &tab->entries[bin];
1344 *key = entry->key;
1345 if (value != 0) *value = entry->record;
1346 MARK_ENTRY_DELETED(entry);
1347 tab->num_entries--;
1348 update_range_for_deleted(tab, bin);
1349 return 1;
1350}
1351
1352int
1353st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1354{
1355 return st_general_delete(tab, key, value);
1356}
1357
1358/* The function and other functions with suffix '_safe' or '_check'
1359 are originated from the previous implementation of the hash tables.
1360 It was necessary for correct deleting entries during traversing
1361 tables. The current implementation permits deletion during
1362 traversing without a specific way to do this. */
1363int
1364st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1365 st_data_t never ATTRIBUTE_UNUSED)
1366{
1367 return st_general_delete(tab, key, value);
1368}
1369
1370/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1371 return zero. Otherwise, remove the first entry in the table.
1372 Return its key through KEY and its record through VALUE (unless
1373 VALUE is zero). */
1374int
1375st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1376{
1377 st_index_t i, bound;
1378 st_index_t bin;
1379 st_table_entry *entries, *curr_entry_ptr;
1380 st_index_t bin_ind;
1381
1382 entries = tab->entries;
1383 bound = tab->entries_bound;
1384 for (i = tab->entries_start; i < bound; i++) {
1385 curr_entry_ptr = &entries[i];
1386 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1387 st_hash_t entry_hash = curr_entry_ptr->hash;
1388 st_data_t entry_key = curr_entry_ptr->key;
1389
1390 if (value != 0) *value = curr_entry_ptr->record;
1391 *key = entry_key;
1392 retry:
1393 if (tab->bins == NULL) {
1394 bin = find_entry(tab, entry_hash, entry_key);
1395 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1396 entries = tab->entries;
1397 goto retry;
1398 }
1399 curr_entry_ptr = &entries[bin];
1400 }
1401 else {
1402 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1403 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1404 entries = tab->entries;
1405 goto retry;
1406 }
1407 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1408 - ENTRY_BASE];
1409 MARK_BIN_DELETED(tab, bin_ind);
1410 }
1411 MARK_ENTRY_DELETED(curr_entry_ptr);
1412 tab->num_entries--;
1413 update_range_for_deleted(tab, i);
1414 return 1;
1415 }
1416 }
1417 if (value != 0) *value = 0;
1418 return 0;
1419}
1420
1421/* See comments for function st_delete_safe. */
1422void
1423st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1424 st_data_t never ATTRIBUTE_UNUSED)
1425{
1426}
1427
1428/* Find entry with KEY in table TAB, call FUNC with pointers to copies
1429 of the key and the value of the found entry, and non-zero as the
1430 3rd argument. If the entry is not found, call FUNC with a pointer
1431 to KEY, a pointer to zero, and a zero argument. If the call
1432 returns ST_CONTINUE, the table will have an entry with key and
1433 value returned by FUNC through the 1st and 2nd parameters. If the
1434 call of FUNC returns ST_DELETE, the table will not have entry with
1435 KEY. The function returns flag of that the entry with KEY was in
1436 the table before the call. */
1437int
1438st_update(st_table *tab, st_data_t key,
1439 st_update_callback_func *func, st_data_t arg)
1440{
1441 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1442 st_index_t bin = 0; /* Ditto */
1443 st_table_entry *entries;
1444 st_index_t bin_ind;
1445 st_data_t value = 0, old_key;
1446 int retval, existing;
1447 st_hash_t hash = do_hash(key, tab);
1448
1449 retry:
1450 entries = tab->entries;
1451 if (tab->bins == NULL) {
1452 bin = find_entry(tab, hash, key);
1453 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1454 goto retry;
1455 existing = bin != UNDEFINED_ENTRY_IND;
1456 entry = &entries[bin];
1457 bin_ind = UNDEFINED_BIN_IND;
1458 }
1459 else {
1460 bin_ind = find_table_bin_ind(tab, hash, key);
1461 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1462 goto retry;
1463 existing = bin_ind != UNDEFINED_BIN_IND;
1464 if (existing) {
1465 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1466 entry = &entries[bin];
1467 }
1468 }
1469 if (existing) {
1470 key = entry->key;
1471 value = entry->record;
1472 }
1473 old_key = key;
1474 retval = (*func)(&key, &value, arg, existing);
1475 switch (retval) {
1476 case ST_CONTINUE:
1477 if (! existing) {
1478 st_add_direct_with_hash(tab, key, value, hash);
1479 break;
1480 }
1481 if (old_key != key) {
1482 entry->key = key;
1483 }
1484 entry->record = value;
1485 break;
1486 case ST_DELETE:
1487 if (existing) {
1488 if (bin_ind != UNDEFINED_BIN_IND)
1489 MARK_BIN_DELETED(tab, bin_ind);
1490 MARK_ENTRY_DELETED(entry);
1491 tab->num_entries--;
1492 update_range_for_deleted(tab, bin);
1493 }
1494 break;
1495 }
1496 return existing;
1497}
1498
1499/* Traverse all entries in table TAB calling FUNC with current entry
1500 key and value and zero. If the call returns ST_STOP, stop
1501 traversing. If the call returns ST_DELETE, delete the current
1502 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1503 traversing. The function returns zero unless an error is found.
1504 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1505 different for ST_CHECK and when the current element is removed
1506 during traversing. */
1507static inline int
1508st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1509 int check_p)
1510{
1511 st_index_t bin;
1512 st_index_t bin_ind;
1513 st_table_entry *entries, *curr_entry_ptr;
1514 enum st_retval retval;
1515 st_index_t i, rebuilds_num;
1516 st_hash_t hash;
1517 st_data_t key;
1518 int error_p, packed_p = tab->bins == NULL;
1519
1520 entries = tab->entries;
1521 /* The bound can change inside the loop even without rebuilding
1522 the table, e.g. by an entry insertion. */
1523 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1524 curr_entry_ptr = &entries[i];
1525 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1526 continue;
1527 key = curr_entry_ptr->key;
1528 rebuilds_num = tab->rebuilds_num;
1529 hash = curr_entry_ptr->hash;
1530 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1531
1532 if (retval == ST_REPLACE && replace) {
1533 st_data_t value;
1534 value = curr_entry_ptr->record;
1535 retval = (*replace)(&key, &value, arg, TRUE);
1536 curr_entry_ptr->key = key;
1537 curr_entry_ptr->record = value;
1538 }
1539
1540 if (rebuilds_num != tab->rebuilds_num) {
1541 retry:
1542 entries = tab->entries;
1543 packed_p = tab->bins == NULL;
1544 if (packed_p) {
1545 i = find_entry(tab, hash, key);
1546 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1547 goto retry;
1548 error_p = i == UNDEFINED_ENTRY_IND;
1549 }
1550 else {
1551 i = find_table_entry_ind(tab, hash, key);
1552 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1553 goto retry;
1554 error_p = i == UNDEFINED_ENTRY_IND;
1555 i -= ENTRY_BASE;
1556 }
1557 if (error_p && check_p) {
1558 /* call func with error notice */
1559 retval = (*func)(0, 0, arg, 1);
1560 return 1;
1561 }
1562 curr_entry_ptr = &entries[i];
1563 }
1564 switch (retval) {
1565 case ST_REPLACE:
1566 break;
1567 case ST_CONTINUE:
1568 break;
1569 case ST_CHECK:
1570 if (check_p)
1571 break;
1572 case ST_STOP:
1573 return 0;
1574 case ST_DELETE: {
1575 st_data_t key = curr_entry_ptr->key;
1576
1577 again:
1578 if (packed_p) {
1579 bin = find_entry(tab, hash, key);
1580 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1581 goto again;
1582 if (bin == UNDEFINED_ENTRY_IND)
1583 break;
1584 }
1585 else {
1586 bin_ind = find_table_bin_ind(tab, hash, key);
1587 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1588 goto again;
1589 if (bin_ind == UNDEFINED_BIN_IND)
1590 break;
1591 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1592 MARK_BIN_DELETED(tab, bin_ind);
1593 }
1594 curr_entry_ptr = &entries[bin];
1595 MARK_ENTRY_DELETED(curr_entry_ptr);
1596 tab->num_entries--;
1597 update_range_for_deleted(tab, bin);
1598 break;
1599 }
1600 }
1601 }
1602 return 0;
1603}
1604
1605int
1606st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1607{
1608 return st_general_foreach(tab, func, replace, arg, TRUE);
1609}
1610
1611struct functor {
1612 st_foreach_callback_func *func;
1613 st_data_t arg;
1614};
1615
1616static int
1617apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1618{
1619 const struct functor *f = (void *)d;
1620 return f->func(k, v, f->arg);
1621}
1622
1623int
1624st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1625{
1626 const struct functor f = { func, arg };
1627 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1628}
1629
1630/* See comments for function st_delete_safe. */
1631int
1632st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1633 st_data_t never ATTRIBUTE_UNUSED)
1634{
1635 return st_general_foreach(tab, func, 0, arg, TRUE);
1636}
1637
1638/* Set up array KEYS by at most SIZE keys of head table TAB entries.
1639 Return the number of keys set up in array KEYS. */
1640static inline st_index_t
1641st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1642{
1643 st_index_t i, bound;
1644 st_data_t key, *keys_start, *keys_end;
1645 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1646
1647 bound = tab->entries_bound;
1648 keys_start = keys;
1649 keys_end = keys + size;
1650 for (i = tab->entries_start; i < bound; i++) {
1651 if (keys == keys_end)
1652 break;
1653 curr_entry_ptr = &entries[i];
1654 key = curr_entry_ptr->key;
1655 if (! DELETED_ENTRY_P(curr_entry_ptr))
1656 *keys++ = key;
1657 }
1658
1659 return keys - keys_start;
1660}
1661
1662st_index_t
1663st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1664{
1665 return st_general_keys(tab, keys, size);
1666}
1667
1668/* See comments for function st_delete_safe. */
1669st_index_t
1670st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1671 st_data_t never ATTRIBUTE_UNUSED)
1672{
1673 return st_general_keys(tab, keys, size);
1674}
1675
1676/* Set up array VALUES by at most SIZE values of head table TAB
1677 entries. Return the number of values set up in array VALUES. */
1678static inline st_index_t
1679st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1680{
1681 st_index_t i, bound;
1682 st_data_t *values_start, *values_end;
1683 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1684
1685 values_start = values;
1686 values_end = values + size;
1687 bound = tab->entries_bound;
1688 for (i = tab->entries_start; i < bound; i++) {
1689 if (values == values_end)
1690 break;
1691 curr_entry_ptr = &entries[i];
1692 if (! DELETED_ENTRY_P(curr_entry_ptr))
1693 *values++ = curr_entry_ptr->record;
1694 }
1695
1696 return values - values_start;
1697}
1698
1699st_index_t
1700st_values(st_table *tab, st_data_t *values, st_index_t size)
1701{
1702 return st_general_values(tab, values, size);
1703}
1704
1705/* See comments for function st_delete_safe. */
1706st_index_t
1707st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1708 st_data_t never ATTRIBUTE_UNUSED)
1709{
1710 return st_general_values(tab, values, size);
1711}
1712
1713#define FNV1_32A_INIT 0x811c9dc5
1714
1715/*
1716 * 32 bit magic FNV-1a prime
1717 */
1718#define FNV_32_PRIME 0x01000193
1719
1720/* __POWERPC__ added to accommodate Darwin case. */
1721#ifndef UNALIGNED_WORD_ACCESS
1722# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1723 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1724 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1725 defined(__mc68020__)
1726# define UNALIGNED_WORD_ACCESS 1
1727# endif
1728#endif
1729#ifndef UNALIGNED_WORD_ACCESS
1730# define UNALIGNED_WORD_ACCESS 0
1731#endif
1732
1733/* This hash function is quite simplified MurmurHash3
1734 * Simplification is legal, cause most of magic still happens in finalizator.
1735 * And finalizator is almost the same as in MurmurHash3 */
1736#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1737#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1738
1739#if ST_INDEX_BITS <= 32
1740#define C1 (st_index_t)0xcc9e2d51
1741#define C2 (st_index_t)0x1b873593
1742#else
1743#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1744#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1745#endif
1746NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1747NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1748NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1749
1750static inline st_index_t
1751murmur_step(st_index_t h, st_index_t k)
1752{
1753#if ST_INDEX_BITS <= 32
1754#define r1 (17)
1755#define r2 (11)
1756#else
1757#define r1 (33)
1758#define r2 (24)
1759#endif
1760 k *= C1;
1761 h ^= ROTL(k, r1);
1762 h *= C2;
1763 h = ROTL(h, r2);
1764 return h;
1765}
1766#undef r1
1767#undef r2
1768
1769static inline st_index_t
1770murmur_finish(st_index_t h)
1771{
1772#if ST_INDEX_BITS <= 32
1773#define r1 (16)
1774#define r2 (13)
1775#define r3 (16)
1776 const st_index_t c1 = 0x85ebca6b;
1777 const st_index_t c2 = 0xc2b2ae35;
1778#else
1779/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1780#define r1 (30)
1781#define r2 (27)
1782#define r3 (31)
1783 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1784 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1785#endif
1786#if ST_INDEX_BITS > 64
1787 h ^= h >> 64;
1788 h *= c2;
1789 h ^= h >> 65;
1790#endif
1791 h ^= h >> r1;
1792 h *= c1;
1793 h ^= h >> r2;
1794 h *= c2;
1795 h ^= h >> r3;
1796 return h;
1797}
1798#undef r1
1799#undef r2
1800#undef r3
1801
1802st_index_t
1803st_hash(const void *ptr, size_t len, st_index_t h)
1804{
1805 const char *data = ptr;
1806 st_index_t t = 0;
1807 size_t l = len;
1808
1809#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1810#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1811#if SIZEOF_ST_INDEX_T > 4
1812#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1813#if SIZEOF_ST_INDEX_T > 8
1814#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1815 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1816#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1817#endif
1818#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1819#else
1820#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1821#endif
1822#undef SKIP_TAIL
1823 if (len >= sizeof(st_index_t)) {
1824#if !UNALIGNED_WORD_ACCESS
1825 int align = (int)((st_data_t)data % sizeof(st_index_t));
1826 if (align) {
1827 st_index_t d = 0;
1828 int sl, sr, pack;
1829
1830 switch (align) {
1831#ifdef WORDS_BIGENDIAN
1832# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1833 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1834#else
1835# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1836 t |= data_at(n) << CHAR_BIT*(n)
1837#endif
1838 UNALIGNED_ADD_ALL;
1839#undef UNALIGNED_ADD
1840 }
1841
1842#ifdef WORDS_BIGENDIAN
1843 t >>= (CHAR_BIT * align) - CHAR_BIT;
1844#else
1845 t <<= (CHAR_BIT * align);
1846#endif
1847
1848 data += sizeof(st_index_t)-align;
1849 len -= sizeof(st_index_t)-align;
1850
1851 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1852 sr = CHAR_BIT * align;
1853
1854 while (len >= sizeof(st_index_t)) {
1855 d = *(st_index_t *)data;
1856#ifdef WORDS_BIGENDIAN
1857 t = (t << sr) | (d >> sl);
1858#else
1859 t = (t >> sr) | (d << sl);
1860#endif
1861 h = murmur_step(h, t);
1862 t = d;
1863 data += sizeof(st_index_t);
1864 len -= sizeof(st_index_t);
1865 }
1866
1867 pack = len < (size_t)align ? (int)len : align;
1868 d = 0;
1869 switch (pack) {
1870#ifdef WORDS_BIGENDIAN
1871# define UNALIGNED_ADD(n) case (n) + 1: \
1872 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1873#else
1874# define UNALIGNED_ADD(n) case (n) + 1: \
1875 d |= data_at(n) << CHAR_BIT*(n)
1876#endif
1877 UNALIGNED_ADD_ALL;
1878#undef UNALIGNED_ADD
1879 }
1880#ifdef WORDS_BIGENDIAN
1881 t = (t << sr) | (d >> sl);
1882#else
1883 t = (t >> sr) | (d << sl);
1884#endif
1885
1886 if (len < (size_t)align) goto skip_tail;
1887# define SKIP_TAIL 1
1888 h = murmur_step(h, t);
1889 data += pack;
1890 len -= pack;
1891 }
1892 else
1893#endif
1894#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1895#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1896#else
1897#define aligned_data data
1898#endif
1899 {
1900 do {
1901 h = murmur_step(h, *(st_index_t *)aligned_data);
1902 data += sizeof(st_index_t);
1903 len -= sizeof(st_index_t);
1904 } while (len >= sizeof(st_index_t));
1905 }
1906 }
1907
1908 t = 0;
1909 switch (len) {
1910#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1911 /* in this case byteorder doesn't really matter */
1912#if SIZEOF_ST_INDEX_T > 4
1913 case 7: t |= data_at(6) << 48;
1914 case 6: t |= data_at(5) << 40;
1915 case 5: t |= data_at(4) << 32;
1916 case 4:
1917 t |= (st_index_t)*(uint32_t*)aligned_data;
1918 goto skip_tail;
1919# define SKIP_TAIL 1
1920#endif
1921 case 3: t |= data_at(2) << 16;
1922 case 2: t |= data_at(1) << 8;
1923 case 1: t |= data_at(0);
1924#else
1925#ifdef WORDS_BIGENDIAN
1926# define UNALIGNED_ADD(n) case (n) + 1: \
1927 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1928#else
1929# define UNALIGNED_ADD(n) case (n) + 1: \
1930 t |= data_at(n) << CHAR_BIT*(n)
1931#endif
1932 UNALIGNED_ADD_ALL;
1933#undef UNALIGNED_ADD
1934#endif
1935#ifdef SKIP_TAIL
1936 skip_tail:
1937#endif
1938 h ^= t; h -= ROTL(t, 7);
1939 h *= C2;
1940 }
1941 h ^= l;
1942#undef aligned_data
1943
1944 return murmur_finish(h);
1945}
1946
1947st_index_t
1948st_hash_uint32(st_index_t h, uint32_t i)
1949{
1950 return murmur_step(h, i);
1951}
1952
1953NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1954st_index_t
1955st_hash_uint(st_index_t h, st_index_t i)
1956{
1957 i += h;
1958/* no matter if it is BigEndian or LittleEndian,
1959 * we hash just integers */
1960#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1961 h = murmur_step(h, i >> 8*8);
1962#endif
1963 h = murmur_step(h, i);
1964 return h;
1965}
1966
1967st_index_t
1968st_hash_end(st_index_t h)
1969{
1970 h = murmur_finish(h);
1971 return h;
1972}
1973
1974#undef st_hash_start
1975st_index_t
1976rb_st_hash_start(st_index_t h)
1977{
1978 return h;
1979}
1980
1981static st_index_t
1982strhash(st_data_t arg)
1983{
1984 register const char *string = (const char *)arg;
1985 return st_hash(string, strlen(string), FNV1_32A_INIT);
1986}
1987
1988int
1989st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
1990{
1991 char c1, c2;
1992
1993 while (1) {
1994 c1 = *s1++;
1995 c2 = *s2++;
1996 if (c1 == '\0' || c2 == '\0') {
1997 if (c1 != '\0') return 1;
1998 if (c2 != '\0') return -1;
1999 return 0;
2000 }
2001 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2002 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2003 if (c1 != c2) {
2004 if (c1 > c2)
2005 return 1;
2006 else
2007 return -1;
2008 }
2009 }
2010}
2011
2012int
2013st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
2014{
2015 char c1, c2;
2016 size_t i;
2017
2018 for (i = 0; i < n; i++) {
2019 c1 = *s1++;
2020 c2 = *s2++;
2021 if (c1 == '\0' || c2 == '\0') {
2022 if (c1 != '\0') return 1;
2023 if (c2 != '\0') return -1;
2024 return 0;
2025 }
2026 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2027 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2028 if (c1 != c2) {
2029 if (c1 > c2)
2030 return 1;
2031 else
2032 return -1;
2033 }
2034 }
2035 return 0;
2036}
2037
2038static int
2039st_strcmp(st_data_t lhs, st_data_t rhs)
2040{
2041 const char *s1 = (char *)lhs;
2042 const char *s2 = (char *)rhs;
2043 return strcmp(s1, s2);
2044}
2045
2046static int
2047st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2048{
2049 const char *s1 = (char *)lhs;
2050 const char *s2 = (char *)rhs;
2051 return st_locale_insensitive_strcasecmp(s1, s2);
2052}
2053
2054NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2055static st_index_t
2056strcasehash(st_data_t arg)
2057{
2058 register const char *string = (const char *)arg;
2059 register st_index_t hval = FNV1_32A_INIT;
2060
2061 /*
2062 * FNV-1a hash each octet in the buffer
2063 */
2064 while (*string) {
2065 unsigned int c = (unsigned char)*string++;
2066 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2067 hval ^= c;
2068
2069 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2070 hval *= FNV_32_PRIME;
2071 }
2072 return hval;
2073}
2074
2075int
2076st_numcmp(st_data_t x, st_data_t y)
2077{
2078 return x != y;
2079}
2080
2081st_index_t
2082st_numhash(st_data_t n)
2083{
2084 enum {s1 = 11, s2 = 3};
2085 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2086}
2087
2088#ifdef RUBY
2089/* Expand TAB to be suitable for holding SIZ entries in total.
2090 Pre-existing entries remain not deleted inside of TAB, but its bins
2091 are cleared to expect future reconstruction. See rehash below. */
2092static void
2093st_expand_table(st_table *tab, st_index_t siz)
2094{
2095 st_table *tmp;
2096 st_index_t n;
2097
2098 if (siz <= get_allocated_entries(tab))
2099 return; /* enough room already */
2100
2101 tmp = st_init_table_with_size(tab->type, siz);
2102 n = get_allocated_entries(tab);
2103 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2104 free(tab->entries);
2105 free(tab->bins);
2106 free(tmp->bins);
2107 tab->entry_power = tmp->entry_power;
2108 tab->bin_power = tmp->bin_power;
2109 tab->size_ind = tmp->size_ind;
2110 tab->entries = tmp->entries;
2111 tab->bins = NULL;
2112 tab->rebuilds_num++;
2113 free(tmp);
2114}
2115
2116/* Rehash using linear search. Return TRUE if we found that the table
2117 was rebuilt. */
2118static int
2119st_rehash_linear(st_table *tab)
2120{
2121 int eq_p, rebuilt_p;
2122 st_index_t i, j;
2123 st_table_entry *p, *q;
2124
2125 free(tab->bins);
2126 tab->bins = NULL;
2127
2128 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2129 p = &tab->entries[i];
2130 if (DELETED_ENTRY_P(p))
2131 continue;
2132 for (j = i + 1; j < tab->entries_bound; j++) {
2133 q = &tab->entries[j];
2134 if (DELETED_ENTRY_P(q))
2135 continue;
2136 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2137 if (EXPECT(rebuilt_p, 0))
2138 return TRUE;
2139 if (eq_p) {
2140 *p = *q;
2141 MARK_ENTRY_DELETED(q);
2142 tab->num_entries--;
2143 update_range_for_deleted(tab, j);
2144 }
2145 }
2146 }
2147 return FALSE;
2148}
2149
2150/* Rehash using index. Return TRUE if we found that the table was
2151 rebuilt. */
2152static int
2153st_rehash_indexed(st_table *tab)
2154{
2155 int eq_p, rebuilt_p;
2156 st_index_t i;
2157 st_index_t const n = bins_size(tab);
2158 unsigned int const size_ind = get_size_ind(tab);
2159 st_index_t *bins = realloc(tab->bins, n);
2160 tab->bins = bins;
2161 initialize_bins(tab);
2162 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2163 st_table_entry *p = &tab->entries[i];
2164 st_index_t ind;
2165#ifdef QUADRATIC_PROBE
2166 st_index_t d = 1;
2167#else
2168 st_index_t perturb = p->hash;
2169#endif
2170
2171 if (DELETED_ENTRY_P(p))
2172 continue;
2173
2174 ind = hash_bin(p->hash, tab);
2175 for (;;) {
2176 st_index_t bin = get_bin(bins, size_ind, ind);
2177 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2178 /* ok, new room */
2179 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2180 break;
2181 }
2182 else {
2183 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2184 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2185 if (EXPECT(rebuilt_p, 0))
2186 return TRUE;
2187 if (eq_p) {
2188 /* duplicated key; delete it */
2189 q->record = p->record;
2190 MARK_ENTRY_DELETED(p);
2191 tab->num_entries--;
2192 update_range_for_deleted(tab, bin);
2193 break;
2194 }
2195 else {
2196 /* hash collision; skip it */
2197#ifdef QUADRATIC_PROBE
2198 ind = hash_bin(ind + d, tab);
2199 d++;
2200#else
2201 ind = secondary_hash(ind, tab, &perturb);
2202#endif
2203 }
2204 }
2205 }
2206 }
2207 return FALSE;
2208}
2209
2210/* Reconstruct TAB's bins according to TAB's entries. This function
2211 permits conflicting keys inside of entries. No errors are reported
2212 then. All but one of them are discarded silently. */
2213static void
2214st_rehash(st_table *tab)
2215{
2216 int rebuilt_p;
2217
2218 do {
2219 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2220 rebuilt_p = st_rehash_linear(tab);
2221 else
2222 rebuilt_p = st_rehash_indexed(tab);
2223 } while (rebuilt_p);
2224}
2225
2226static st_data_t
2227st_stringify(VALUE key)
2228{
2229 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2230 rb_hash_key_str(key) : key;
2231}
2232
2233static void
2234st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2235{
2236 st_data_t k = st_stringify(key);
2238 e.hash = do_hash(k, tab);
2239 e.key = k;
2240 e.record = val;
2241
2242 tab->entries[tab->entries_bound++] = e;
2243 tab->num_entries++;
2244 RB_OBJ_WRITTEN(hash, Qundef, k);
2245 RB_OBJ_WRITTEN(hash, Qundef, val);
2246}
2247
2248static void
2249st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2250{
2251 long i;
2252
2253 for (i = 0; i < argc; /* */) {
2254 st_data_t k = st_stringify(argv[i++]);
2255 st_data_t v = argv[i++];
2256 st_insert(tab, k, v);
2257 RB_OBJ_WRITTEN(hash, Qundef, k);
2258 RB_OBJ_WRITTEN(hash, Qundef, v);
2259 }
2260}
2261
2262static void
2263st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2264{
2265 long i;
2266
2267 /* push elems */
2268 for (i = 0; i < argc; /* */) {
2269 VALUE key = argv[i++];
2270 VALUE val = argv[i++];
2271 st_insert_single(tab, hash, key, val);
2272 }
2273
2274 /* reindex */
2275 st_rehash(tab);
2276}
2277
2278/* Mimics ruby's { foo => bar } syntax. This function is subpart
2279 of rb_hash_bulk_insert. */
2280void
2281rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2282{
2283 st_index_t n, size = argc / 2;
2284 st_table *tab = RHASH_ST_TABLE(hash);
2285
2286 tab = RHASH_TBL_RAW(hash);
2287 n = tab->entries_bound + size;
2288 st_expand_table(tab, n);
2289 if (UNLIKELY(tab->num_entries))
2290 st_insert_generic(tab, argc, argv, hash);
2291 else if (argc <= 2)
2292 st_insert_single(tab, hash, argv[0], argv[1]);
2293 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2294 st_insert_linear(tab, argc, argv, hash);
2295 else
2296 st_insert_generic(tab, argc, argv, hash);
2297}
2298
2299// to iterate iv_index_tbl
2300st_data_t
2301rb_st_nth_key(st_table *tab, st_index_t index)
2302{
2303 if (LIKELY(tab->entries_start == 0 &&
2304 tab->num_entries == tab->entries_bound &&
2305 index < tab->num_entries)) {
2306 return tab->entries[index].key;
2307 }
2308 else {
2309 rb_bug("unreachable");
2310 }
2311}
2312
2313void
2314rb_st_compact_table(st_table *tab)
2315{
2316 st_index_t num = tab->num_entries;
2317 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2318 /* Compaction: */
2319 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2320 rebuild_table_with(new_tab, tab);
2321 }
2322}
2323
2324#endif
#define Qundef
Old name of RUBY_Qundef.
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1342
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:215
VALUE rb_cString
String class.
Definition string.c:78
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition gc.h:631
int len
Length of the buffer.
Definition io.h:8
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition memory.h:366
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
Definition stdarg.h:35
Definition st.c:133
Definition st.h:79
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40