commit 3260a7d245b9f6f9b764c4cc47cb029b93a67f46
parent 2c0567085f8ddb562cb1d26a666e34e1004e96ee
Author: Robert Russell <robert@rr3.xyz>
Date: Sun, 12 Jan 2025 23:19:20 -0800
Add full width signed multiplies
Diffstat:
3 files changed, 84 insertions(+), 37 deletions(-)
diff --git a/inc/bits.h b/inc/bits.h
@@ -205,6 +205,33 @@ r_rzb64(u64 n) {
}
+/* ----- Sign extension ----- */
+
+static inline u8
+r_sext8(u8 x, uint b) {
+ uint c = 8 - b;
+ return (u8)((i8)(x << c) >> c);
+}
+
+static inline u16
+r_sext16(u16 x, uint b) {
+ uint c = 16 - b;
+ return (u16)((i16)(x << c) >> c);
+}
+
+static inline u32
+r_sext32(u32 x, uint b) {
+ uint c = 32 - b;
+ return (u32)((i32)(x << c) >> c);
+}
+
+static inline u64
+r_sext64(u64 x, uint b) {
+ uint c = 64 - b;
+ return (u64)((i64)(x << c) >> c);
+}
+
+
/* ----- Ternary add and subtract ----- */
/* We implement ternary add/sub on arbitrary unsigned integers instead of with
@@ -292,34 +319,34 @@ r_sub64(u64 *h, u64 *l, u64 x, u64 y, u64 z) {
/* ----- Full width multiply ----- */
static inline void
-r_mul8(u8 *h, u8 *l, u8 x, u8 y) {
+r_mulu8(u8 *h, u8 *l, u8 x, u8 y) {
u16 hl = (u16)x * (u16)y;
*h = hl >> 8;
*l = hl;
}
static inline void
-r_mul16(u16 *h, u16 *l, u16 x, u16 y) {
+r_mulu16(u16 *h, u16 *l, u16 x, u16 y) {
u32 hl = (u32)x * (u32)y;
*h = hl >> 16;
*l = hl;
}
static inline void
-r_mul32(u32 *h, u32 *l, u32 x, u32 y) {
+r_mulu32(u32 *h, u32 *l, u32 x, u32 y) {
u64 hl = (u64)x * (u64)y;
*h = hl >> 32;
*l = hl;
}
static inline void
-r_mul64(u64 *h, u64 *l, u64 x, u64 y) {
+r_mulu64(u64 *h, u64 *l, u64 x, u64 y) {
#ifdef R_HAVE_128
u128 hl = (u128)x * (u128)y;
*h = hl >> 64;
*l = hl;
#else
- const u64 m = (U64_C(1)<<32) - 1;
+ const u64 m = (U64_C(1) << 32) - 1;
u64 x0 = x & m;
u64 x1 = x >> 32;
@@ -331,10 +358,57 @@ r_mul64(u64 *h, u64 *l, u64 x, u64 y) {
u64 x1y0 = x1 * y0;
u64 x1y1 = x1 * y1;
- u64 c = ((x0y1&m) + (x1y0&m) + (x0y0>>32)) >> 32;
+ u64 c = ((x0y1 & m) + (x1y0 & m) + (x0y0 >> 32)) >> 32;
+
+ *h = x1y1 + (x0y1 >> 32) + (x1y0 >> 32) + c;
+ *l = x0y0 + (x0y1 << 32) + (x1y0 << 32);
+#endif
+}
+
+static inline void
+r_muls8(i8 *h, u8 *l, i8 x, i8 y) {
+ i16 hl = (i16)x * (i16)y;
+ *h = hl >> 8;
+ *l = hl;
+}
+
+static inline void
+r_muls16(i16 *h, u16 *l, i16 x, i16 y) {
+ i32 hl = (i32)x * (i32)y;
+ *h = hl >> 16;
+ *l = hl;
+}
+
+static inline void
+r_muls32(i32 *h, u32 *l, i32 x, i32 y) {
+ i64 hl = (i64)x * (i64)y;
+ *h = hl >> 32;
+ *l = hl;
+}
+
+static inline void
+r_muls64(i64 *h, u64 *l, i64 x, i64 y) {
+#ifdef R_HAVE_128
+ i128 hl = (i128)x * (i128)y;
+ *h = hl >> 64;
+ *l = hl;
+#else
+ const u64 m = (U64_C(1) << 32) - 1;
- *h = x1y1 + (x0y1>>32) + (x1y0>>32) + c;
- *l = x0y0 + (x0y1<<32) + (x1y0<<32);
+ u64 x0 = x & m;
+ i64 x1 = x >> 32;
+ u64 y0 = y & m;
+ i64 y1 = y >> 32;
+
+ u64 x0y0 = x0 * y0;
+ i64 x0y1 = x0 * y1;
+ i64 x1y0 = x1 * y0;
+ i64 x1y1 = x1 * y1;
+
+ u64 c = ((x0y1 & m) + (x1y0 & m) + (x0y0 >> 32)) >> 32;
+
+ *h = x1y1 + (x0y1 >> 32) + (x1y0 >> 32) + c;
+ *l = x0y0 + (x0y1 << 32) + (x1y0 << 32);
#endif
}
@@ -372,33 +446,6 @@ r_swap64(u64 n) {
#endif
-/* ----- Sign extension ----- */
-
-static inline u8
-r_sext8(u8 x, uint b) {
- uint c = 8 - b;
- return (u8)((i8)(x << c) >> c);
-}
-
-static inline u16
-r_sext16(u16 x, uint b) {
- uint c = 16 - b;
- return (u16)((i16)(x << c) >> c);
-}
-
-static inline u32
-r_sext32(u32 x, uint b) {
- uint c = 32 - b;
- return (u32)((i32)(x << c) >> c);
-}
-
-static inline u64
-r_sext64(u64 x, uint b) {
- uint c = 64 - b;
- return (u64)((i64)(x << c) >> c);
-}
-
-
/* ----- Endian conversions ----- */
/* There is 2x redundancy here (e.g., ltoh = htol), but this allows code using
diff --git a/inc/rand.h b/inc/rand.h
@@ -14,7 +14,7 @@ extern u64 r_hash_key[4];
static inline u64
r_wymix_(u64 x, u64 y) {
u64 h, l;
- r_mul64(&h, &l, x, y);
+ r_mulu64(&h, &l, x, y);
return h ^ l;
}
diff --git a/src/rand.c b/src/rand.c
@@ -57,7 +57,7 @@ r_hash_(void *data, u64 len, u64 seed, u64 (*key)[4]) {
a ^= (*key)[1];
b ^= seed;
- r_mul64(&b, &a, a, b);
+ r_mulu64(&b, &a, a, b);
return mix(a ^ (*key)[0] ^ len, b ^ (*key)[1]);
}