rcx

library of miscellaneous bits of C code
git clone git://git.rr3.xyz/rcx
Log | Files | Refs | README | LICENSE

commit ae39fe778a747c38cdd3824e21ddbca7e8aef742
parent 35506348ea8ea822e9ac8a55b001773d55ef36f0
Author: Robert Russell <robertrussell.72001@gmail.com>
Date:   Sun,  4 Jun 2023 09:52:09 -0700

Change hash table structure

See a previous commit message.

Diffstat:
Minc/dict.h | 6++++--
Msrc/dict.c | 250+++++++++++++++++++++++++++++++++++++------------------------------------------
2 files changed, 121 insertions(+), 135 deletions(-)

diff --git a/inc/dict.h b/inc/dict.h @@ -56,7 +56,8 @@ typedef struct D { \ u64 seed; \ usize count; \ usize ntombs; \ - void *pages; \ + void *mem; \ + u8 *data; \ V default_val; \ } D; @@ -81,7 +82,8 @@ static inline UNUSED int R_DICT_METHOD(init,##__VA_ARGS__)(D *d, usize hint) { \ static inline UNUSED void R_DICT_METHOD(free,##__VA_ARGS__)(D *d) { \ if (d->niter > 0) \ r_warnf("dict freed with %"PRId32" iterators alive", d->niter); \ - R_DICT_FREE(d->pages); \ + R_DICT_FREE(d->mem); \ + d->data = 0; /* Ensure hard error on use after free. */ \ } \ static inline UNUSED bool R_DICT_METHOD(get,##__VA_ARGS__)(D *d, V *v, K k) { \ u64 h = R_DICT_HASH(&k, d->seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \ diff --git a/src/dict.c b/src/dict.c @@ -7,16 +7,21 @@ #include "rand.h" #include "rcx.h" -#if defined(__GNUC__) && defined(R_HAVE_AVX2) && 0 +#if defined(__GNUC__) && defined(R_HAVE_AVX2) #include "simd.h" -#define PAGE_LEN 32u -#elif defined(__GNUC__) && defined(R_HAVE_SSE2) && 0 +#define PROBE_LEN 32u +#define PROBE_LEN_BITS 5u +#elif defined(__GNUC__) && defined(R_HAVE_SSE2) #include "simd.h" -#define PAGE_LEN 16u +#define PROBE_LEN 16u +#define PROBE_LEN_BITS 4u #else -#define PAGE_LEN 8u +#define PROBE_LEN 8u +#define PROBE_LEN_BITS 3u #endif +/* PAGE_LEN should be a multiple of 8 for packing and alignment. */ +#define PAGE_LEN 8u #define LF_NUM 3u #define LF_DEN 4u #define EMPTY 0u @@ -25,7 +30,7 @@ typedef struct r_dict_spec Spec; typedef struct r_dict Dict; -typedef struct page Page; +typedef u8 Page; /* For readability only; a page isn't really a u8. */ enum scan_mode { ACCESS = 0, @@ -34,24 +39,19 @@ enum scan_mode { }; struct r_dict { - u8 b; + u8 b; /* Log2 of number of slots */ u32 niter; u64 seed; usize count; usize ntombs; - Page *pages; + void *mem; + u8 *data; /* Cache line-aligned pointer into mem */ /* V default_val; */ }; -struct page { - u8 meta[PAGE_LEN]; - /* K key[PAGE_LEN]; */ - /* V val[PAGE_LEN]; */ -}; - -static inline u64 +static inline usize hash0(u64 h, u8 b) { - return h & ((U64_C(1) << b) - 1u); + return h & ((U64_C(1) << (b - PROBE_LEN_BITS)) - 1u); } static inline u8 @@ -64,56 +64,19 @@ hash1(u64 h, u8 b) { static inline bool exceeds_lf(u64 n, u8 b) { /* Note that the math here is 64-bit to avoid overflow. */ - return LF_DEN * n > LF_NUM * PAGE_LEN * (U64_C(1) << b); -} - -/* TODO: try computing psize when in dict.h instead. */ - -/* Compute Page size *without* overflow checks. */ -static inline usize -psize_unsafe(Spec *spec) { - return sizeof(Page) + PAGE_LEN*spec->ksize + PAGE_LEN*spec->vsize; -} - -/* Compute Page size *with* overflow checks. */ -static inline usize -psize_safe(Spec *spec) { - usize psize = sizeof(Page); - - usize ksize = spec->ksize; - assert(!r_mul_will_overflow_(PAGE_LEN, ksize) - && PAGE_LEN*ksize <= USIZE_MAX - psize); - psize += PAGE_LEN * ksize; - - usize vsize = spec->vsize; - assert(!r_mul_will_overflow_(PAGE_LEN, vsize) - && PAGE_LEN*vsize <= USIZE_MAX - psize); - psize += PAGE_LEN * vsize; - - return psize; -} - -static inline void * -key(Page *p, usize j, Spec *spec) { - return (u8 *)(p+1) + j*spec->ksize; -} - -static inline void * -val(Page *p, usize j, Spec *spec) { - return (u8 *)(p+1) + PAGE_LEN*spec->ksize + j*spec->vsize; + return LF_DEN * n > LF_NUM * (U64_C(1) << b); } static void insert(Page *pages, u8 b, void *k, void *v, u64 h, Spec *spec) { assert(false); - - usize psize = psize_unsafe(spec); - u64 h0 = hash0(h, b); +#if 0 + usize h0 = hash0(h, b); u8 h1 = hash1(h, b); usize pn = h0; for (usize i = 1;; i++) { - Page *p = (void *)((u8 *)pages + pn*psize); + Page *p = (void *)((u8 *)pages + pn*spec->psize); /* TODO: this is wrong. Should probably just generalize scan and * replace insert with scan. Otherwise we need a bunch of #ifs here. */ @@ -127,17 +90,18 @@ insert(Page *pages, u8 b, void *k, void *v, u64 h, Spec *spec) { return; } - j = (j + 1) & (PAGE_LEN - 1); + j = (j + 1) & (PROBE_LEN - 1); } while (j != h1); pn = (pn + i) & ((USIZE_C(1) << b) - 1u); } +#endif } static int grow(Dict *d, Spec *spec) { - usize psize = psize_unsafe(spec); - + assert(false); +#if 0 u8 oldb = d->b; u8 newb = oldb + 1; assert(newb < USIZE_BITS); @@ -146,17 +110,17 @@ grow(Dict *d, Spec *spec) { Page *old = d->pages; usize newlen = USIZE_C(1) << newb; - if (r_mul_will_overflow_(newlen, psize)) { + if (r_mul_will_overflow_(newlen, spec->psize)) { errno = ENOMEM; return -1; } - Page *new = spec->allocz(newlen * psize); + Page *new = spec->allocz(newlen * spec->psize); if (!new) return -1; for (usize i = 0; i < oldlen; i++) { - Page *p = (void *)((u8 *)old + i*psize); + Page *p = (void *)((u8 *)old + i*spec->psize); - for (usize j = 0; j < PAGE_LEN; j++) { + for (usize j = 0; j < PROBE_LEN; j++) { if (p->meta[j] < MIN_HASH1) continue; void *k = key(p, j, spec); void *v = val(p, j, spec); @@ -169,6 +133,7 @@ grow(Dict *d, Spec *spec) { d->pages = new; d->ntombs = 0; spec->free(old); +#endif return 0; } @@ -185,128 +150,148 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) { return -1; } - usize psize = psize_unsafe(spec); - u64 h0 = hash0(h, d->b); + usize h0 = hash0(h, d->b); u8 h1 = hash1(h, d->b); -#if PAGE_LEN == 32 +#if PROBE_LEN == 32 v32u8 vh1 = v32u8_fill(h1); v32u8 vempty = v32u8_fill(EMPTY); -#elif PAGE_LEN == 16 +#elif PROBE_LEN == 16 v16u8 vh1 = v16u8_fill(h1); v16u8 vempty = v16u8_fill(EMPTY); #else u64 vh1; memset(&vh1, h1, sizeof vh1); #endif - usize pn = h0; - Page *createp = 0; - usize createj; - bool createtomb; - for (usize i = 1;; i++) { - Page *p = (void *)((u8 *)d->pages + pn*psize); + Page *pages = d->data + (USIZE_C(1) << d->b); + bool create_slot_found = false; + usize create_idx; + bool create_replacing_tomb; + /* i is the index of a metadata vector (i.e., a u8[PROBE_LEN]). + * We increment it quadratically. */ + usize meta_mask = (USIZE_C(1) << (d->b - PROBE_LEN_BITS)) - 1u; + for (usize i = h0, jump = 1;; i = (i + jump) & meta_mask, jump++) { + u8 *meta = d->data + i * PROBE_LEN; /* We must load the metadata vector as little endian, so that low bits * in the vector represent earlier slots in the table and hence ctz * gives us earlier slots first. In the SIMD cases, we know we're on * x86, so a memcpy suffices. In the general fallback case, we use * readl64, which does a byte swap on big endian machines. */ -#if PAGE_LEN == 32 - v32u8 meta; memcpy(&meta, &p->meta, sizeof meta); - for (u32 m = v32u8_msb(v32u8_cmpeq(meta, vh1)); m != 0; m &= m - 1) { +#if PROBE_LEN == 32 + v32u8 vmeta; memcpy(&vmeta, meta, sizeof vmeta); + /* m is a bit array indicating where vmeta equals h1. Thus, ctz(m) + * is the least j such that meta[j] == h1 (provided m != 0). */ + for (u32 m = v32u8_msb(v32u8_cmpeq(vmeta, vh1)); m != 0; m &= m - 1) { int j = r_ctz32(m); -#elif PAGE_LEN == 16 - v16u8 meta; memcpy(&meta, &p->meta, sizeof meta); - for (u16 m = v16u8_msb(v16u8_cmpeq(meta, vh1)); m != 0; m &= m - 1) { +#elif PROBE_LEN == 16 + v16u8 vmeta; memcpy(&vmeta, meta, sizeof vmeta); + for (u16 m = v16u8_msb(v16u8_cmpeq(vmeta, vh1)); m != 0; m &= m - 1) { int j = r_ctz16(m); #else - u64 meta = r_readl64(&p->meta); - u64 z = vh1 ^ meta; - for (int j = r_r8search64(z); j < 8; z |= U64_C(1) << (8*j), j = r_r8search64(z)) { + u64 vmeta = r_readl64(meta); + u64 z = vh1 ^ vmeta; + /* z now has 0s exactly where vmeta had h1. Thus, we can use r8search64 + * on z to find the indices j of h1 in vmeta. After each iteration, we + * mask in an arbitrary bit into the jth byte so that the next + * iteration gets a different j. */ + for (int j = r_r8search64(z); j < 8; z |= U64_C(1)<<(8*j), j = r_r8search64(z)) { #endif - if likely (spec->eq(key(p, j, spec), k, spec)) { + usize e = i * PROBE_LEN + j; + Page *p = pages + (e - e%PAGE_LEN) * (spec->ksize + spec->vsize); + if likely (spec->eq(p + (e%PAGE_LEN) * spec->ksize, k, spec)) { if (mode == DELETE) { -#if PAGE_LEN == 32 - if (v32u8_msb(v32u8_cmpeq(meta, vempty)) != 0) { -#elif PAGE_LEN == 16 - if (v16u8_msb(v16u8_cmpeq(meta, vempty)) != 0) { +#if PROBE_LEN == 32 + if (v32u8_msb(v32u8_cmpeq(vmeta, vempty)) != 0) { +#elif PROBE_LEN == 16 + if (v16u8_msb(v16u8_cmpeq(vmeta, vempty)) != 0) { #else - if (r_r8search64(meta) < 8) { + if (r_r8search64(vmeta) < 8) { #endif - p->meta[j] = EMPTY; + meta[j] = EMPTY; } else { - p->meta[j] = TOMBSTONE; + meta[j] = TOMBSTONE; d->ntombs += 1; } d->count -= 1; } - *v = val(p, j, spec); + *v = p + PAGE_LEN * spec->ksize + (e%PAGE_LEN) * spec->vsize; return 1; } } /* Search for an EMPTY slot. */ -#if PAGE_LEN == 32 - u32 m0 = v32u8_msb(v32u8_cmpeq(meta, vempty)); -#elif PAGE_LEN == 16 - u16 m0 = v16u8_msb(v16u8_cmpeq(meta, vempty)); +#if PROBE_LEN == 32 + u32 m0 = v32u8_msb(v32u8_cmpeq(vmeta, vempty)); +#elif PROBE_LEN == 16 + u16 m0 = v16u8_msb(v16u8_cmpeq(vmeta, vempty)); #else - int j0 = r_r8search64(meta); + int j0 = r_r8search64(vmeta); #endif /* In mode CREATE, we need to look for an EMPTY or TOMBSTONE slot in * case we don't find an existing slot with the given key. We * prioritize filling TOMBSTONE slots to decrease the load factor. */ - if (mode == CREATE && !createp) { -#if PAGE_LEN == 32 + if (mode == CREATE && !create_slot_found) { +#if PROBE_LEN == 32 v32u8 vtomb = v32u8_fill(TOMBSTONE); - u32 m1 = v32u8_msb(v32u8_cmpeq(meta, vtomb)); + u32 m1 = v32u8_msb(v32u8_cmpeq(vmeta, vtomb)); if (m1 != 0) { - createp = p; createj = r_ctz32(m1); createtomb = true; + create_slot_found = true; + create_idx = i * PROBE_LEN + r_ctz32(m1); + create_replacing_tomb = true; } else if (m0 != 0) { - createp = p; createj = r_ctz32(m0); createtomb = false; + create_slot_found = true; + create_idx = i * PROBE_LEN + r_ctz32(m0); + create_replacing_tomb = false; break; } -#elif PAGE_LEN == 16 +#elif PROBE_LEN == 16 v16u8 vtomb = v16u8_fill(TOMBSTONE); - u16 m1 = v16u8_msb(v16u8_cmpeq(meta, vtomb)); + u16 m1 = v16u8_msb(v16u8_cmpeq(vmeta, vtomb)); if (m1 != 0) { - createp = p; createj = r_ctz16(m1); createtomb = true; + create_slot_found = true; + create_idx = i * PROBE_LEN + r_ctz16(m1); + create_replacing_tomb = true; } else if (m0 != 0) { - createp = p; createj = r_ctz16(m0); createtomb = false; + create_slot_found = true; + create_idx = i * PROBE_LEN + r_ctz16(m0); + create_replacing_tomb = false; break; } #else u64 vtomb; memset(&vtomb, TOMBSTONE, sizeof vtomb); - int j1 = r_r8search64(meta ^ vtomb); + int j1 = r_r8search64(vmeta ^ vtomb); if (j1 != 8) { - createp = p; createj = j1; createtomb = true; + create_slot_found = true; + create_idx = j1; + create_replacing_tomb = true; } else if (j0 != 8) { - createp = p; createj = j0; createtomb = false; + create_slot_found = true; + create_idx = j0; + create_replacing_tomb = false; break; } #endif } /* If the page contains an EMPTY slot, the key can't be any further. */ -#if PAGE_LEN > 8 +#if PROBE_LEN > 8 if (m0 != 0) break; #else if (j0 != 8) break; #endif - - /* Quadratic probing between pages */ - pn = (pn + i) & ((USIZE_C(1) << d->b) - 1u); } if (mode == CREATE) { d->count += 1; - if (createtomb) d->ntombs -= 1; - createp->meta[createj] = h1; + if (create_replacing_tomb) d->ntombs -= 1; + d->data[create_idx] = h1; + Page *p = pages + (create_idx - create_idx%PAGE_LEN) * (spec->ksize + spec->vsize); /* TODO: try returning a pointer to the key slot instead, so that the * compiler can generate copy code when ksize is a constant */ - memcpy(key(createp, createj, spec), k, spec->ksize); - *v = val(createp, createj, spec); + memcpy(p + (create_idx%PAGE_LEN) * spec->ksize, k, spec->ksize); + *v = p + PAGE_LEN * spec->ksize + (create_idx%PAGE_LEN) * spec->vsize; } return 0; @@ -319,23 +304,29 @@ r_dict_init(Dict *d, usize hint, Spec *spec) { /* Impose maximum on hint to prevent overflow and the following while * loop from being infinite. See exceeds_lf. */ hint = MIN(hint, (U64_MAX / LF_DEN) >> 1); - u8 b = 1; + u8 b = MAX(R_CACHE_LINE_BITS, PROBE_LEN_BITS); while (exceeds_lf(hint, b)) b += 1; assert(b < USIZE_BITS); d->b = b; usize len = USIZE_C(1) << b; - usize psize = psize_safe(spec); - if (r_mul_will_overflow_(len, psize)) { - errno = ENOMEM; - return -1; - } - d->pages = spec->allocz(len * psize); - if (!d->pages) return -1; + if (r_mul_will_overflow_(len, 1u + spec->ksize + spec->vsize)) + goto alloc_error; + usize dsize = len * (1u + spec->ksize + spec->vsize); + if (dsize > USIZE_MAX - (R_CACHE_LINE_SIZE - 1)) + goto alloc_error; + d->mem = spec->allocz((R_CACHE_LINE_SIZE - 1) + dsize); + if (!d->mem) + goto alloc_error; + d->data = (void *)(((uptr)d->mem + (R_CACHE_LINE_SIZE - 1)) & -((uptr)R_CACHE_LINE_SIZE)); d->seed = r_prand64(); return 0; + +alloc_error: + errno = ENOMEM; + return -1; } int @@ -355,14 +346,7 @@ r_dict_delete(Dict *d, void **v, void *k, u64 h, Spec *spec) { void r_dict_clear(Dict *d, Spec *spec) { - usize psize = psize_unsafe(spec); - - usize len = USIZE_C(1) << d->b; - for (usize i = 0; i < len; i++) { - Page *p = (void *)((u8 *)d->pages + i*psize); - memset(p->meta, EMPTY, sizeof p->meta); - } - + memset(d->data, EMPTY, USIZE_C(1) << d->b); d->count = 0; d->ntombs = 0;