commit ae39fe778a747c38cdd3824e21ddbca7e8aef742
parent 35506348ea8ea822e9ac8a55b001773d55ef36f0
Author: Robert Russell <robertrussell.72001@gmail.com>
Date: Sun, 4 Jun 2023 09:52:09 -0700
Change hash table structure
See a previous commit message.
Diffstat:
| M | inc/dict.h | | | 6 | ++++-- |
| M | src/dict.c | | | 250 | +++++++++++++++++++++++++++++++++++++------------------------------------------ |
2 files changed, 121 insertions(+), 135 deletions(-)
diff --git a/inc/dict.h b/inc/dict.h
@@ -56,7 +56,8 @@ typedef struct D { \
u64 seed; \
usize count; \
usize ntombs; \
- void *pages; \
+ void *mem; \
+ u8 *data; \
V default_val; \
} D;
@@ -81,7 +82,8 @@ static inline UNUSED int R_DICT_METHOD(init,##__VA_ARGS__)(D *d, usize hint) { \
static inline UNUSED void R_DICT_METHOD(free,##__VA_ARGS__)(D *d) { \
if (d->niter > 0) \
r_warnf("dict freed with %"PRId32" iterators alive", d->niter); \
- R_DICT_FREE(d->pages); \
+ R_DICT_FREE(d->mem); \
+ d->data = 0; /* Ensure hard error on use after free. */ \
} \
static inline UNUSED bool R_DICT_METHOD(get,##__VA_ARGS__)(D *d, V *v, K k) { \
u64 h = R_DICT_HASH(&k, d->seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
diff --git a/src/dict.c b/src/dict.c
@@ -7,16 +7,21 @@
#include "rand.h"
#include "rcx.h"
-#if defined(__GNUC__) && defined(R_HAVE_AVX2) && 0
+#if defined(__GNUC__) && defined(R_HAVE_AVX2)
#include "simd.h"
-#define PAGE_LEN 32u
-#elif defined(__GNUC__) && defined(R_HAVE_SSE2) && 0
+#define PROBE_LEN 32u
+#define PROBE_LEN_BITS 5u
+#elif defined(__GNUC__) && defined(R_HAVE_SSE2)
#include "simd.h"
-#define PAGE_LEN 16u
+#define PROBE_LEN 16u
+#define PROBE_LEN_BITS 4u
#else
-#define PAGE_LEN 8u
+#define PROBE_LEN 8u
+#define PROBE_LEN_BITS 3u
#endif
+/* PAGE_LEN should be a multiple of 8 for packing and alignment. */
+#define PAGE_LEN 8u
#define LF_NUM 3u
#define LF_DEN 4u
#define EMPTY 0u
@@ -25,7 +30,7 @@
typedef struct r_dict_spec Spec;
typedef struct r_dict Dict;
-typedef struct page Page;
+typedef u8 Page; /* For readability only; a page isn't really a u8. */
enum scan_mode {
ACCESS = 0,
@@ -34,24 +39,19 @@ enum scan_mode {
};
struct r_dict {
- u8 b;
+ u8 b; /* Log2 of number of slots */
u32 niter;
u64 seed;
usize count;
usize ntombs;
- Page *pages;
+ void *mem;
+ u8 *data; /* Cache line-aligned pointer into mem */
/* V default_val; */
};
-struct page {
- u8 meta[PAGE_LEN];
- /* K key[PAGE_LEN]; */
- /* V val[PAGE_LEN]; */
-};
-
-static inline u64
+static inline usize
hash0(u64 h, u8 b) {
- return h & ((U64_C(1) << b) - 1u);
+ return h & ((U64_C(1) << (b - PROBE_LEN_BITS)) - 1u);
}
static inline u8
@@ -64,56 +64,19 @@ hash1(u64 h, u8 b) {
static inline bool
exceeds_lf(u64 n, u8 b) {
/* Note that the math here is 64-bit to avoid overflow. */
- return LF_DEN * n > LF_NUM * PAGE_LEN * (U64_C(1) << b);
-}
-
-/* TODO: try computing psize when in dict.h instead. */
-
-/* Compute Page size *without* overflow checks. */
-static inline usize
-psize_unsafe(Spec *spec) {
- return sizeof(Page) + PAGE_LEN*spec->ksize + PAGE_LEN*spec->vsize;
-}
-
-/* Compute Page size *with* overflow checks. */
-static inline usize
-psize_safe(Spec *spec) {
- usize psize = sizeof(Page);
-
- usize ksize = spec->ksize;
- assert(!r_mul_will_overflow_(PAGE_LEN, ksize)
- && PAGE_LEN*ksize <= USIZE_MAX - psize);
- psize += PAGE_LEN * ksize;
-
- usize vsize = spec->vsize;
- assert(!r_mul_will_overflow_(PAGE_LEN, vsize)
- && PAGE_LEN*vsize <= USIZE_MAX - psize);
- psize += PAGE_LEN * vsize;
-
- return psize;
-}
-
-static inline void *
-key(Page *p, usize j, Spec *spec) {
- return (u8 *)(p+1) + j*spec->ksize;
-}
-
-static inline void *
-val(Page *p, usize j, Spec *spec) {
- return (u8 *)(p+1) + PAGE_LEN*spec->ksize + j*spec->vsize;
+ return LF_DEN * n > LF_NUM * (U64_C(1) << b);
}
static void
insert(Page *pages, u8 b, void *k, void *v, u64 h, Spec *spec) {
assert(false);
-
- usize psize = psize_unsafe(spec);
- u64 h0 = hash0(h, b);
+#if 0
+ usize h0 = hash0(h, b);
u8 h1 = hash1(h, b);
usize pn = h0;
for (usize i = 1;; i++) {
- Page *p = (void *)((u8 *)pages + pn*psize);
+ Page *p = (void *)((u8 *)pages + pn*spec->psize);
/* TODO: this is wrong. Should probably just generalize scan and
* replace insert with scan. Otherwise we need a bunch of #ifs here. */
@@ -127,17 +90,18 @@ insert(Page *pages, u8 b, void *k, void *v, u64 h, Spec *spec) {
return;
}
- j = (j + 1) & (PAGE_LEN - 1);
+ j = (j + 1) & (PROBE_LEN - 1);
} while (j != h1);
pn = (pn + i) & ((USIZE_C(1) << b) - 1u);
}
+#endif
}
static int
grow(Dict *d, Spec *spec) {
- usize psize = psize_unsafe(spec);
-
+ assert(false);
+#if 0
u8 oldb = d->b;
u8 newb = oldb + 1;
assert(newb < USIZE_BITS);
@@ -146,17 +110,17 @@ grow(Dict *d, Spec *spec) {
Page *old = d->pages;
usize newlen = USIZE_C(1) << newb;
- if (r_mul_will_overflow_(newlen, psize)) {
+ if (r_mul_will_overflow_(newlen, spec->psize)) {
errno = ENOMEM;
return -1;
}
- Page *new = spec->allocz(newlen * psize);
+ Page *new = spec->allocz(newlen * spec->psize);
if (!new) return -1;
for (usize i = 0; i < oldlen; i++) {
- Page *p = (void *)((u8 *)old + i*psize);
+ Page *p = (void *)((u8 *)old + i*spec->psize);
- for (usize j = 0; j < PAGE_LEN; j++) {
+ for (usize j = 0; j < PROBE_LEN; j++) {
if (p->meta[j] < MIN_HASH1) continue;
void *k = key(p, j, spec);
void *v = val(p, j, spec);
@@ -169,6 +133,7 @@ grow(Dict *d, Spec *spec) {
d->pages = new;
d->ntombs = 0;
spec->free(old);
+#endif
return 0;
}
@@ -185,128 +150,148 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) {
return -1;
}
- usize psize = psize_unsafe(spec);
- u64 h0 = hash0(h, d->b);
+ usize h0 = hash0(h, d->b);
u8 h1 = hash1(h, d->b);
-#if PAGE_LEN == 32
+#if PROBE_LEN == 32
v32u8 vh1 = v32u8_fill(h1);
v32u8 vempty = v32u8_fill(EMPTY);
-#elif PAGE_LEN == 16
+#elif PROBE_LEN == 16
v16u8 vh1 = v16u8_fill(h1);
v16u8 vempty = v16u8_fill(EMPTY);
#else
u64 vh1; memset(&vh1, h1, sizeof vh1);
#endif
- usize pn = h0;
- Page *createp = 0;
- usize createj;
- bool createtomb;
- for (usize i = 1;; i++) {
- Page *p = (void *)((u8 *)d->pages + pn*psize);
+ Page *pages = d->data + (USIZE_C(1) << d->b);
+ bool create_slot_found = false;
+ usize create_idx;
+ bool create_replacing_tomb;
+ /* i is the index of a metadata vector (i.e., a u8[PROBE_LEN]).
+ * We increment it quadratically. */
+ usize meta_mask = (USIZE_C(1) << (d->b - PROBE_LEN_BITS)) - 1u;
+ for (usize i = h0, jump = 1;; i = (i + jump) & meta_mask, jump++) {
+ u8 *meta = d->data + i * PROBE_LEN;
/* We must load the metadata vector as little endian, so that low bits
* in the vector represent earlier slots in the table and hence ctz
* gives us earlier slots first. In the SIMD cases, we know we're on
* x86, so a memcpy suffices. In the general fallback case, we use
* readl64, which does a byte swap on big endian machines. */
-#if PAGE_LEN == 32
- v32u8 meta; memcpy(&meta, &p->meta, sizeof meta);
- for (u32 m = v32u8_msb(v32u8_cmpeq(meta, vh1)); m != 0; m &= m - 1) {
+#if PROBE_LEN == 32
+ v32u8 vmeta; memcpy(&vmeta, meta, sizeof vmeta);
+ /* m is a bit array indicating where vmeta equals h1. Thus, ctz(m)
+ * is the least j such that meta[j] == h1 (provided m != 0). */
+ for (u32 m = v32u8_msb(v32u8_cmpeq(vmeta, vh1)); m != 0; m &= m - 1) {
int j = r_ctz32(m);
-#elif PAGE_LEN == 16
- v16u8 meta; memcpy(&meta, &p->meta, sizeof meta);
- for (u16 m = v16u8_msb(v16u8_cmpeq(meta, vh1)); m != 0; m &= m - 1) {
+#elif PROBE_LEN == 16
+ v16u8 vmeta; memcpy(&vmeta, meta, sizeof vmeta);
+ for (u16 m = v16u8_msb(v16u8_cmpeq(vmeta, vh1)); m != 0; m &= m - 1) {
int j = r_ctz16(m);
#else
- u64 meta = r_readl64(&p->meta);
- u64 z = vh1 ^ meta;
- for (int j = r_r8search64(z); j < 8; z |= U64_C(1) << (8*j), j = r_r8search64(z)) {
+ u64 vmeta = r_readl64(meta);
+ u64 z = vh1 ^ vmeta;
+ /* z now has 0s exactly where vmeta had h1. Thus, we can use r8search64
+ * on z to find the indices j of h1 in vmeta. After each iteration, we
+ * mask in an arbitrary bit into the jth byte so that the next
+ * iteration gets a different j. */
+ for (int j = r_r8search64(z); j < 8; z |= U64_C(1)<<(8*j), j = r_r8search64(z)) {
#endif
- if likely (spec->eq(key(p, j, spec), k, spec)) {
+ usize e = i * PROBE_LEN + j;
+ Page *p = pages + (e - e%PAGE_LEN) * (spec->ksize + spec->vsize);
+ if likely (spec->eq(p + (e%PAGE_LEN) * spec->ksize, k, spec)) {
if (mode == DELETE) {
-#if PAGE_LEN == 32
- if (v32u8_msb(v32u8_cmpeq(meta, vempty)) != 0) {
-#elif PAGE_LEN == 16
- if (v16u8_msb(v16u8_cmpeq(meta, vempty)) != 0) {
+#if PROBE_LEN == 32
+ if (v32u8_msb(v32u8_cmpeq(vmeta, vempty)) != 0) {
+#elif PROBE_LEN == 16
+ if (v16u8_msb(v16u8_cmpeq(vmeta, vempty)) != 0) {
#else
- if (r_r8search64(meta) < 8) {
+ if (r_r8search64(vmeta) < 8) {
#endif
- p->meta[j] = EMPTY;
+ meta[j] = EMPTY;
} else {
- p->meta[j] = TOMBSTONE;
+ meta[j] = TOMBSTONE;
d->ntombs += 1;
}
d->count -= 1;
}
- *v = val(p, j, spec);
+ *v = p + PAGE_LEN * spec->ksize + (e%PAGE_LEN) * spec->vsize;
return 1;
}
}
/* Search for an EMPTY slot. */
-#if PAGE_LEN == 32
- u32 m0 = v32u8_msb(v32u8_cmpeq(meta, vempty));
-#elif PAGE_LEN == 16
- u16 m0 = v16u8_msb(v16u8_cmpeq(meta, vempty));
+#if PROBE_LEN == 32
+ u32 m0 = v32u8_msb(v32u8_cmpeq(vmeta, vempty));
+#elif PROBE_LEN == 16
+ u16 m0 = v16u8_msb(v16u8_cmpeq(vmeta, vempty));
#else
- int j0 = r_r8search64(meta);
+ int j0 = r_r8search64(vmeta);
#endif
/* In mode CREATE, we need to look for an EMPTY or TOMBSTONE slot in
* case we don't find an existing slot with the given key. We
* prioritize filling TOMBSTONE slots to decrease the load factor. */
- if (mode == CREATE && !createp) {
-#if PAGE_LEN == 32
+ if (mode == CREATE && !create_slot_found) {
+#if PROBE_LEN == 32
v32u8 vtomb = v32u8_fill(TOMBSTONE);
- u32 m1 = v32u8_msb(v32u8_cmpeq(meta, vtomb));
+ u32 m1 = v32u8_msb(v32u8_cmpeq(vmeta, vtomb));
if (m1 != 0) {
- createp = p; createj = r_ctz32(m1); createtomb = true;
+ create_slot_found = true;
+ create_idx = i * PROBE_LEN + r_ctz32(m1);
+ create_replacing_tomb = true;
} else if (m0 != 0) {
- createp = p; createj = r_ctz32(m0); createtomb = false;
+ create_slot_found = true;
+ create_idx = i * PROBE_LEN + r_ctz32(m0);
+ create_replacing_tomb = false;
break;
}
-#elif PAGE_LEN == 16
+#elif PROBE_LEN == 16
v16u8 vtomb = v16u8_fill(TOMBSTONE);
- u16 m1 = v16u8_msb(v16u8_cmpeq(meta, vtomb));
+ u16 m1 = v16u8_msb(v16u8_cmpeq(vmeta, vtomb));
if (m1 != 0) {
- createp = p; createj = r_ctz16(m1); createtomb = true;
+ create_slot_found = true;
+ create_idx = i * PROBE_LEN + r_ctz16(m1);
+ create_replacing_tomb = true;
} else if (m0 != 0) {
- createp = p; createj = r_ctz16(m0); createtomb = false;
+ create_slot_found = true;
+ create_idx = i * PROBE_LEN + r_ctz16(m0);
+ create_replacing_tomb = false;
break;
}
#else
u64 vtomb; memset(&vtomb, TOMBSTONE, sizeof vtomb);
- int j1 = r_r8search64(meta ^ vtomb);
+ int j1 = r_r8search64(vmeta ^ vtomb);
if (j1 != 8) {
- createp = p; createj = j1; createtomb = true;
+ create_slot_found = true;
+ create_idx = j1;
+ create_replacing_tomb = true;
} else if (j0 != 8) {
- createp = p; createj = j0; createtomb = false;
+ create_slot_found = true;
+ create_idx = j0;
+ create_replacing_tomb = false;
break;
}
#endif
}
/* If the page contains an EMPTY slot, the key can't be any further. */
-#if PAGE_LEN > 8
+#if PROBE_LEN > 8
if (m0 != 0) break;
#else
if (j0 != 8) break;
#endif
-
- /* Quadratic probing between pages */
- pn = (pn + i) & ((USIZE_C(1) << d->b) - 1u);
}
if (mode == CREATE) {
d->count += 1;
- if (createtomb) d->ntombs -= 1;
- createp->meta[createj] = h1;
+ if (create_replacing_tomb) d->ntombs -= 1;
+ d->data[create_idx] = h1;
+ Page *p = pages + (create_idx - create_idx%PAGE_LEN) * (spec->ksize + spec->vsize);
/* TODO: try returning a pointer to the key slot instead, so that the
* compiler can generate copy code when ksize is a constant */
- memcpy(key(createp, createj, spec), k, spec->ksize);
- *v = val(createp, createj, spec);
+ memcpy(p + (create_idx%PAGE_LEN) * spec->ksize, k, spec->ksize);
+ *v = p + PAGE_LEN * spec->ksize + (create_idx%PAGE_LEN) * spec->vsize;
}
return 0;
@@ -319,23 +304,29 @@ r_dict_init(Dict *d, usize hint, Spec *spec) {
/* Impose maximum on hint to prevent overflow and the following while
* loop from being infinite. See exceeds_lf. */
hint = MIN(hint, (U64_MAX / LF_DEN) >> 1);
- u8 b = 1;
+ u8 b = MAX(R_CACHE_LINE_BITS, PROBE_LEN_BITS);
while (exceeds_lf(hint, b)) b += 1;
assert(b < USIZE_BITS);
d->b = b;
usize len = USIZE_C(1) << b;
- usize psize = psize_safe(spec);
- if (r_mul_will_overflow_(len, psize)) {
- errno = ENOMEM;
- return -1;
- }
- d->pages = spec->allocz(len * psize);
- if (!d->pages) return -1;
+ if (r_mul_will_overflow_(len, 1u + spec->ksize + spec->vsize))
+ goto alloc_error;
+ usize dsize = len * (1u + spec->ksize + spec->vsize);
+ if (dsize > USIZE_MAX - (R_CACHE_LINE_SIZE - 1))
+ goto alloc_error;
+ d->mem = spec->allocz((R_CACHE_LINE_SIZE - 1) + dsize);
+ if (!d->mem)
+ goto alloc_error;
+ d->data = (void *)(((uptr)d->mem + (R_CACHE_LINE_SIZE - 1)) & -((uptr)R_CACHE_LINE_SIZE));
d->seed = r_prand64();
return 0;
+
+alloc_error:
+ errno = ENOMEM;
+ return -1;
}
int
@@ -355,14 +346,7 @@ r_dict_delete(Dict *d, void **v, void *k, u64 h, Spec *spec) {
void
r_dict_clear(Dict *d, Spec *spec) {
- usize psize = psize_unsafe(spec);
-
- usize len = USIZE_C(1) << d->b;
- for (usize i = 0; i < len; i++) {
- Page *p = (void *)((u8 *)d->pages + i*psize);
- memset(p->meta, EMPTY, sizeof p->meta);
- }
-
+ memset(d->data, EMPTY, USIZE_C(1) << d->b);
d->count = 0;
d->ntombs = 0;