rcx

miscellaneous C library
git clone git://git.rr3.xyz/rcx
Log | Files | Refs | README | LICENSE

bits.h (16558B)


      1 #pragma once
      2 
      3 #include <stdbool.h>
      4 #include <string.h>
      5 
      6 #include "def.h"
      7 
      8 
      9 /* ----- Bit rotates ----- */
     10 
     11 static inline u8  r_rotl8 (u8  n, uint c) { return (n << (c &  7u)) | (n >> (-c &  7u)); }
     12 static inline u16 r_rotl16(u16 n, uint c) { return (n << (c & 15u)) | (n >> (-c & 15u)); }
     13 static inline u32 r_rotl32(u32 n, uint c) { return (n << (c & 31u)) | (n >> (-c & 31u)); }
     14 static inline u64 r_rotl64(u64 n, uint c) { return (n << (c & 63u)) | (n >> (-c & 63u)); }
     15 
     16 static inline u8  r_rotr8 (u8  n, uint c) { return (n >> (c &  7u)) | (n << (-c &  7u)); }
     17 static inline u16 r_rotr16(u16 n, uint c) { return (n >> (c & 15u)) | (n << (-c & 15u)); }
     18 static inline u32 r_rotr32(u32 n, uint c) { return (n >> (c & 31u)) | (n << (-c & 31u)); }
     19 static inline u64 r_rotr64(u64 n, uint c) { return (n >> (c & 63u)) | (n << (-c & 63u)); }
     20 
     21 
     22 /* ----- Population count ----- */
     23 
     24 /* The GCC builtin's do not use hardware popcnt unless an appropriate -march
     25  * is specified, in which case __POPCNT__ is defined. Otherwise, GCC emits
     26  * calls to libgcc which are slower than the below custom implementions. */
     27 
     28 #if defined(__GNUC__) && defined(__POPCNT__)
     29 
     30 static inline int r_popcnt8 (u8  n) { return __builtin_popcount(n); }
     31 static inline int r_popcnt16(u16 n) { return __builtin_popcount(n); }
     32 static inline int r_popcnt32(u32 n) { return __builtin_popcountl(n); }
     33 static inline int r_popcnt64(u64 n) { return __builtin_popcountll(n); }
     34 
     35 #else
     36 
     37 /* See Warren, "Hacker's Delight", 2 ed., sec. 5.1. */
     38 
     39 static inline int
     40 r_popcnt8(u8 n) {
     41 	static u8 popcnt4[16] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
     42 	return popcnt4[n&0x0f] + popcnt4[n>>4];
     43 }
     44 
     45 static inline int
     46 r_popcnt16(u16 n) {
     47 	return r_popcnt8(n&0xff) + r_popcnt8(n>>8);
     48 }
     49 
     50 static inline int
     51 r_popcnt32(u32 n) {
     52 	n = n - ((n>>1) & U32_C(0x55555555));
     53 	n = (n & U32_C(0x33333333)) + ((n>>2) & U32_C(0x33333333));
     54 	n = (n + (n>>4)) & U32_C(0x0f0f0f0f);
     55 	n = (u32)(n * U32_C(0x01010101)) >> 24;
     56 	return n;
     57 }
     58 
     59 static inline int
     60 r_popcnt64(u64 n) {
     61 	n = n - ((n>>1) & U64_C(0x5555555555555555));
     62 	n = (n & U64_C(0x3333333333333333)) + ((n>>2) & U64_C(0x3333333333333333));
     63 	n = (n + (n>>4)) & U64_C(0x0f0f0f0f0f0f0f0f);
     64 	n = (u64)(n * U64_C(0x0101010101010101)) >> 56;
     65 	return n;
     66 }
     67 
     68 #endif
     69 
     70 
     71 /* ----- Count trailing zeros ----- */
     72 
     73 /* The ctz functions are implemented with the following theory: Let n be a
     74  * b bit integer. We handle the case n == 0 separately, so assume n != 0. Then
     75  * ctzb(n) is the index of least significant 1 bit in n. We can isolate this
     76  * bit as n & -n. Thus, we have reduced computing ctzb to determining the index
     77  * of a single bit. For this, we construct a minimal perfect hash function H;
     78  * i.e., given a b bit integer with a single bit set, H outputs a integer in
     79  * [0..b). We then apply a suitable permutation P to the output of H to get the
     80  * bit index.
     81  *
     82  * The hash function H is constructed with the notion of de Bruijn cycles: Let
     83  * A be an alphabet with k symbols and let l > 0. An (A,l)-de Bruijn cycle is a
     84  * length k^l cyclic string over A which has all length l strings over A as a
     85  * substring. It is a fact that de Bruijn cycles exist for any A and l. For our
     86  * application, we take A = {0,1} and l = lg(b). For example, here is a
     87  * de Bruijn cycle with b = 8 (hence l = 3):
     88  *     
     89  *     0 0 0 1 1 1 0 1  =  0x1d
     90  *     ---------------
     91  *     0 0 0
     92  *       0 0 1
     93  *         0 1 1
     94  *           1 1 1
     95  *             1 1 0
     96  *               1 0 1
     97  *     0           0 1    (wrap-around)
     98  *     0 0           1    (wrap-around)
     99  *
    100  * Clearly, when such a de Bruijn cycle is rotated left by 0,1,...,b-1, all
    101  * possible integers [0..b) appear on its top l bits. If the de Bruijn cycle
    102  * has l 0's on the left (as in the above example), then the top l bits are the
    103  * same whether we rotate left or shift left (because left shift brings in 0's
    104  * on the right). (Of course, any de Bruijn cycle can be rotated until l 0's
    105  * appear at the left.) But shifting left is equivalent to multiplying by an
    106  * integer with exactly 1 bit set. It is now clear how to define H:
    107  *
    108  *     H(n) = (c * n) >> (b - l)
    109  *
    110  * where c is any ({0,1},l)-de Bruijn cycle starting with l 0's. Once c is
    111  * fixed, the desired permutation P is easily computed. */
    112 
    113 /* Somehow, the de Bruijn cycle method beats the code generated by
    114  * __builtin_clz{,l,ll} (using the tzcnt instruction) unless an appropriate
    115  * -march is specified, in which case GCC recognizes that the de Bruijn code
    116  * (at least in the 32-bit and 64-bit cases) implements ctz and it regresses
    117  * to using the tzcnt instruction, thereby tying with the builtin. Ugh. */
    118 
    119 static inline int
    120 r_ctz8(u8 n) {
    121 	static u8 cycle = U8_C(0x1d);
    122 	static u8 perm[8] = {0, 1, 6, 2, 7, 5, 4, 3};
    123 	if (n == 0) return 8;
    124 	return perm[(u8)((n & -n) * cycle) >> (8 - 3)];
    125 }
    126 
    127 static inline int
    128 r_ctz16(u16 n) {
    129 	static u16 cycle = U16_C(0x0f65);
    130 	static u8 perm[16] = {
    131 		0, 1, 11, 2, 14, 12, 8, 3, 15, 10, 13, 7, 9, 6, 5, 4,
    132 	};
    133 	if (n == 0) return 16;
    134 	return perm[(u16)((n & -n) * cycle) >> (16 - 4)];
    135 }
    136 
    137 static inline int
    138 r_ctz32(u32 n) {
    139 	/* cycle and perm are from Warren, "Hacker's Delight", 2 ed., p. 112. */
    140 	static u32 cycle = U32_C(0x04d7651f);
    141 	static u8 perm[32] = {
    142 		 0,  1,  2, 24,  3, 19,  6, 25, 22,  4, 20, 10, 16,  7, 12, 26,
    143 		31, 23, 18,  5, 21,  9, 15, 11, 30, 17,  8, 14, 29, 13, 28, 27,
    144 	};
    145 	if (n == 0) return 32;
    146 	return perm[(u32)((n & -n) * cycle) >> (32 - 5)];
    147 }
    148 
    149 static inline int
    150 r_ctz64(u64 n) {
    151 	/* cycle and perm are from Knuth, TAOCP, vol. 4A, p. 142. */
    152 	static u64 cycle = U64_C(0x03f79d71b4ca8b09);
    153 	static u8 perm[64] = {
    154 		 0,  1, 56,  2, 57, 49, 28,  3, 61, 58, 42, 50, 38, 29, 17,  4,
    155 		62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12,  5,
    156 		63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
    157 		54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19,  9, 13,  8,  7,  6,
    158 	};
    159 	if (n == 0) return 64;
    160 	return perm[(u64)((n & -n) * cycle) >> (64 - 6)];
    161 }
    162 
    163 
    164 /* ----- Rightmost zero byte searching ----- */
    165 
    166 /* See Warren, "Hacker's Delight", 2 ed., sec. 6.1. The basic idea is to map
    167  * the rightmost 0x00 byte to 0x80 and all bytes right of the rightmost 0x00 to
    168  * 0x00, and then use ctz. When the argument n has no zero byte, rzb returns
    169  * sizeof n. By first XORing with a suitable value, these functions can be used
    170  * to find arbitrary bytes. Also, by replacing the masks within the definition,
    171  * this method adapts to searching for arbitrary length sequences of 0 bits
    172  * aligned in a particular way. */
    173 
    174 static inline int
    175 r_rzb16(u16 n) {
    176 	n = (n - U16_C(0x0101)) & ~n & U16_C(0x8080);
    177 	return r_ctz16(n) / 8u;
    178 }
    179 
    180 static inline int
    181 r_rzb32(u32 n) {
    182 	n = (n - U32_C(0x01010101)) & ~n & U32_C(0x80808080);
    183 	return r_ctz32(n) / 8u;
    184 }
    185 
    186 static inline int
    187 r_rzb64(u64 n) {
    188 	n = (n - U64_C(0x0101010101010101)) & ~n & U64_C(0x8080808080808080);
    189 	return r_ctz64(n) / 8u;
    190 }
    191 
    192 
    193 /* ----- Sign extension ----- */
    194 
    195 static inline u8
    196 r_sext8(u8 x, uint b) {
    197 	uint c = 8 - b;
    198 	return (u8)((i8)(x << c) >> c);
    199 }
    200 
    201 static inline u16
    202 r_sext16(u16 x, uint b) {
    203 	uint c = 16 - b;
    204 	return (u16)((i16)(x << c) >> c);
    205 }
    206 
    207 static inline u32
    208 r_sext32(u32 x, uint b) {
    209 	uint c = 32 - b;
    210 	return (u32)((i32)(x << c) >> c);
    211 }
    212 
    213 static inline u64
    214 r_sext64(u64 x, uint b) {
    215 	uint c = 64 - b;
    216 	return (u64)((i64)(x << c) >> c);
    217 }
    218 
    219 
    220 /* ----- Ternary add and subtract ----- */
    221 
    222 /* We implement ternary add/sub on arbitrary unsigned integers instead of with
    223  * the third argument contrained to be a 1 bit carry/borrow, because compilers
    224  * (or at least GCC) suck at generating good code with carries/borrows, so we
    225  * might as well take the extra flexibility. Actually, the GCC 14.2 code gen
    226  * for these functions is slightly better than the new __builtin_addc{,l,ll}
    227  * intrinsics on my machine (according to a microbenchmark). */
    228 
    229 static inline void
    230 r_add8(u8 *h, u8 *l, u8 x, u8 y, u8 z) {
    231 	u16 hl = (u16)x + (u16)y + (u16)z;
    232 	*h = hl >> 8;
    233 	*l = hl;
    234 }
    235 
    236 static inline void
    237 r_add16(u16 *h, u16 *l, u16 x, u16 y, u16 z) {
    238 	u32 hl = (u32)x + (u32)y + (u32)z;
    239 	*h = hl >> 16;
    240 	*l = hl;
    241 }
    242 
    243 static inline void
    244 r_add32(u32 *h, u32 *l, u32 x, u32 y, u32 z) {
    245 	u64 hl = (u64)x + (u64)y + (u64)z;
    246 	*h = hl >> 32;
    247 	*l = hl;
    248 }
    249 
    250 static inline void
    251 r_add64(u64 *h, u64 *l, u64 x, u64 y, u64 z) {
    252 #ifdef R_HAVE_128
    253 	u128 hl = (u128)x + (u128)y + (u128)z;
    254 	*h = hl >> 64;
    255 	*l = hl;
    256 #else
    257 	u64 s = x + y;
    258 	bool c0 = s < x;
    259 	u64 t = s + z;
    260 	bool c1 = t < s;
    261 	*h = c0 + c1;
    262 	*l = t;
    263 #endif
    264 }
    265 
    266 static inline void
    267 r_sub8(u8 *h, u8 *l, u8 x, u8 y, u8 z) {
    268 	u16 hl = (u16)x - (u16)y - (u16)z;
    269 	*h = hl >> 8;
    270 	*l = hl;
    271 }
    272 
    273 static inline void
    274 r_sub16(u16 *h, u16 *l, u16 x, u16 y, u16 z) {
    275 	u32 hl = (u32)x - (u32)y - (u32)z;
    276 	*h = hl >> 16;
    277 	*l = hl;
    278 }
    279 
    280 static inline void
    281 r_sub32(u32 *h, u32 *l, u32 x, u32 y, u32 z) {
    282 	u64 hl = (u64)x - (u64)y - (u64)z;
    283 	*h = hl >> 32;
    284 	*l = hl;
    285 }
    286 
    287 static inline void
    288 r_sub64(u64 *h, u64 *l, u64 x, u64 y, u64 z) {
    289 #ifdef R_HAVE_128
    290 	u128 hl = (u128)x - (u128)y - (u128)z;
    291 	*h = hl >> 64;
    292 	*l = hl;
    293 #else
    294 	u64 s = x - y;
    295 	bool c0 = s > x;
    296 	u64 t = s - z;
    297 	bool c1 = t > s;
    298 	*h = -c0 - c1;
    299 	*l = t;
    300 #endif
    301 }
    302 
    303 
    304 /* ----- Full width multiply ----- */
    305 
    306 static inline void
    307 r_mulu8(u8 *h, u8 *l, u8 x, u8 y) {
    308 	u16 hl = (u16)x * (u16)y;
    309 	*h = hl >> 8;
    310 	*l = hl;
    311 }
    312 
    313 static inline void
    314 r_mulu16(u16 *h, u16 *l, u16 x, u16 y) {
    315 	u32 hl = (u32)x * (u32)y;
    316 	*h = hl >> 16;
    317 	*l = hl;
    318 }
    319 
    320 static inline void
    321 r_mulu32(u32 *h, u32 *l, u32 x, u32 y) {
    322 	u64 hl = (u64)x * (u64)y;
    323 	*h = hl >> 32;
    324 	*l = hl;
    325 }
    326 
    327 static inline void
    328 r_mulu64(u64 *h, u64 *l, u64 x, u64 y) {
    329 #ifdef R_HAVE_128
    330 	u128 hl = (u128)x * (u128)y;
    331 	*h = hl >> 64;
    332 	*l = hl;
    333 #else
    334 	const u64 m = (U64_C(1) << 32) - 1;
    335 
    336 	u64 x0 = x & m;
    337 	u64 x1 = x >> 32;
    338 	u64 y0 = y & m;
    339 	u64 y1 = y >> 32;
    340 
    341 	u64 x0y0 = x0 * y0;
    342 	u64 x0y1 = x0 * y1;
    343 	u64 x1y0 = x1 * y0;
    344 	u64 x1y1 = x1 * y1;
    345 
    346 	u64 c = ((x0y1 & m) + (x1y0 & m) + (x0y0 >> 32)) >> 32;
    347 
    348 	*h = x1y1 + (x0y1 >> 32) + (x1y0 >> 32) + c;
    349 	*l = x0y0 + (x0y1 << 32) + (x1y0 << 32);
    350 #endif
    351 }
    352 
    353 static inline void
    354 r_muls8(i8 *h, u8 *l, i8 x, i8 y) {
    355 	i16 hl = (i16)x * (i16)y;
    356 	*h = hl >> 8;
    357 	*l = hl;
    358 }
    359 
    360 static inline void
    361 r_muls16(i16 *h, u16 *l, i16 x, i16 y) {
    362 	i32 hl = (i32)x * (i32)y;
    363 	*h = hl >> 16;
    364 	*l = hl;
    365 }
    366 
    367 static inline void
    368 r_muls32(i32 *h, u32 *l, i32 x, i32 y) {
    369 	i64 hl = (i64)x * (i64)y;
    370 	*h = hl >> 32;
    371 	*l = hl;
    372 }
    373 
    374 static inline void
    375 r_muls64(i64 *h, u64 *l, i64 x, i64 y) {
    376 #ifdef R_HAVE_128
    377 	i128 hl = (i128)x * (i128)y;
    378 	*h = hl >> 64;
    379 	*l = hl;
    380 #else
    381 	const u64 m = (U64_C(1) << 32) - 1;
    382 
    383 	u64 x0 = x & m;
    384 	i64 x1 = x >> 32;
    385 	u64 y0 = y & m;
    386 	i64 y1 = y >> 32;
    387 
    388 	u64 x0y0 = x0 * y0;
    389 	i64 x0y1 = x0 * y1;
    390 	i64 x1y0 = x1 * y0;
    391 	i64 x1y1 = x1 * y1;
    392 
    393 	u64 c = ((x0y1 & m) + (x1y0 & m) + (x0y0 >> 32)) >> 32;
    394 
    395 	*h = x1y1 + (x0y1 >> 32) + (x1y0 >> 32) + c;
    396 	*l = x0y0 + (x0y1 << 32) + (x1y0 << 32);
    397 #endif
    398 }
    399 
    400 
    401 /* ----- Floor division ----- */
    402 
    403 static inline void
    404 r_divs8(i8 *q, i8 *r, i8 x, i8 y) {
    405 	i8 qq = x / y, rr = x % y;
    406 	bool neg = (x < 0) ^ (y < 0);
    407 	*q = qq - ((rr != 0) & neg);
    408 	*r = neg ? rr + y : rr;
    409 }
    410 
    411 static inline void
    412 r_divs16(i16 *q, i16 *r, i16 x, i16 y) {
    413 	i16 qq = x / y, rr = x % y;
    414 	bool neg = (x < 0) ^ (y < 0);
    415 	*q = qq - ((rr != 0) & neg);
    416 	*r = neg ? rr + y : rr;
    417 }
    418 
    419 static inline void
    420 r_divs32(i32 *q, i32 *r, i32 x, i32 y) {
    421 	i32 qq = x / y, rr = x % y;
    422 	bool neg = (x < 0) ^ (y < 0);
    423 	*q = qq - ((rr != 0) & neg);
    424 	*r = neg ? rr + y : rr;
    425 }
    426 
    427 static inline void
    428 r_divs64(i64 *q, i64 *r, i64 x, i64 y) {
    429 	i64 qq = x / y, rr = x % y;
    430 	bool neg = (x < 0) ^ (y < 0);
    431 	*q = qq - ((rr != 0) & neg);
    432 	*r = neg ? rr + y : rr;
    433 }
    434 
    435 
    436 /* ----- Byte swaps/reversals ----- */
    437 
    438 #ifdef __GNUC__
    439 
    440 static inline u16 r_swap16(u16 n) { return __builtin_bswap16(n); }
    441 static inline u32 r_swap32(u32 n) { return __builtin_bswap32(n); }
    442 static inline u64 r_swap64(u64 n) { return __builtin_bswap64(n); }
    443 
    444 #else
    445 
    446 /* See Warren, "Hacker's Delight", 2 ed., sec. 7.1. */
    447 
    448 static inline u16
    449 r_swap16(u16 n) {
    450 	return (n << 8) | (n >> 8);
    451 }
    452 
    453 static inline u32
    454 r_swap32(u32 n) {
    455 	n = ((n & U32_C(0x00ff00ff)) << 8) | ((n >> 8) & U32_C(0x00ff00ff));
    456 	return (n << 16) | (n >> 16);
    457 }
    458 
    459 static inline u64
    460 r_swap64(u64 n) {
    461 	n = ((n & U64_C(0x00ff00ff00ff00ff)) <<  8) | ((n >>  8) & U64_C(0x00ff00ff00ff00ff));
    462 	n = ((n & U64_C(0x0000ffff0000ffff)) << 16) | ((n >> 16) & U64_C(0x0000ffff0000ffff));
    463 	return (n << 32) | (n >> 32);
    464 }
    465 
    466 #endif
    467 
    468 
    469 /* ----- Endian conversions ----- */
    470 
    471 /* There is 2x redundancy here (e.g., ltoh = htol), but this allows code using
    472  * these functions to make the intent clear. */
    473 
    474 #if defined(R_LITTLE_ENDIAN)
    475 static inline u16 r_ltoh16(u16 n) { return n; }
    476 static inline u32 r_ltoh32(u32 n) { return n; }
    477 static inline u64 r_ltoh64(u64 n) { return n; }
    478 static inline u16 r_btoh16(u16 n) { return r_swap16(n); }
    479 static inline u32 r_btoh32(u32 n) { return r_swap32(n); }
    480 static inline u64 r_btoh64(u64 n) { return r_swap64(n); }
    481 static inline u16 r_htol16(u16 n) { return n; }
    482 static inline u32 r_htol32(u32 n) { return n; }
    483 static inline u64 r_htol64(u64 n) { return n; }
    484 static inline u16 r_htob16(u16 n) { return r_swap16(n); }
    485 static inline u32 r_htob32(u32 n) { return r_swap32(n); }
    486 static inline u64 r_htob64(u64 n) { return r_swap64(n); }
    487 #elif defined(R_BIG_ENDIAN)
    488 static inline u16 r_btoh16(u16 n) { return n; }
    489 static inline u32 r_btoh32(u32 n) { return n; }
    490 static inline u64 r_btoh64(u64 n) { return n; }
    491 static inline u16 r_ltoh16(u16 n) { return r_swap16(n); }
    492 static inline u32 r_ltoh32(u32 n) { return r_swap32(n); }
    493 static inline u64 r_ltoh64(u64 n) { return r_swap64(n); }
    494 static inline u16 r_htob16(u16 n) { return n; }
    495 static inline u32 r_htob32(u32 n) { return n; }
    496 static inline u64 r_htob64(u64 n) { return n; }
    497 static inline u16 r_htol16(u16 n) { return r_swap16(n); }
    498 static inline u32 r_htol32(u32 n) { return r_swap32(n); }
    499 static inline u64 r_htol64(u64 n) { return r_swap64(n); }
    500 #endif
    501 
    502 
    503 /* ----- Binary decoding ----- */
    504 
    505 #if defined(R_LITTLE_ENDIAN)
    506 static inline u16 r_readl16(void *p) { u16 v; memcpy(&v, p, 2); return v; }
    507 static inline u32 r_readl32(void *p) { u32 v; memcpy(&v, p, 4); return v; }
    508 static inline u64 r_readl64(void *p) { u64 v; memcpy(&v, p, 8); return v; }
    509 static inline u16 r_readb16(void *p) { return r_swap16(r_readl16(p)); }
    510 static inline u32 r_readb32(void *p) { return r_swap32(r_readl32(p)); }
    511 static inline u64 r_readb64(void *p) { return r_swap64(r_readl64(p)); }
    512 static inline u16 r_readh16(void *p) { return r_readl16(p); }
    513 static inline u32 r_readh32(void *p) { return r_readl32(p); }
    514 static inline u64 r_readh64(void *p) { return r_readl64(p); }
    515 #elif defined(R_BIG_ENDIAN)
    516 static inline u16 r_readb16(void *p) { u16 v; memcpy(&v, p, 2); return v; }
    517 static inline u32 r_readb32(void *p) { u32 v; memcpy(&v, p, 4); return v; }
    518 static inline u64 r_readb64(void *p) { u64 v; memcpy(&v, p, 8); return v; }
    519 static inline u16 r_readl16(void *p) { return r_swap16(r_readb16(p)); }
    520 static inline u32 r_readl32(void *p) { return r_swap32(r_readb32(p)); }
    521 static inline u64 r_readl64(void *p) { return r_swap64(r_readb64(p)); }
    522 static inline u16 r_readh16(void *p) { return r_readb16(p); }
    523 static inline u32 r_readh32(void *p) { return r_readb32(p); }
    524 static inline u64 r_readh64(void *p) { return r_readb64(p); }
    525 #endif
    526 
    527 
    528 /* ----- Binary encoding ----- */
    529 
    530 #if defined(R_LITTLE_ENDIAN)
    531 static inline void r_writel16(void *p, u16 v) { memcpy(p, &v, 2); }
    532 static inline void r_writel32(void *p, u32 v) { memcpy(p, &v, 4); }
    533 static inline void r_writel64(void *p, u64 v) { memcpy(p, &v, 8); }
    534 static inline void r_writeb16(void *p, u16 v) { r_writel16(p, r_swap16(v)); }
    535 static inline void r_writeb32(void *p, u32 v) { r_writel32(p, r_swap32(v)); }
    536 static inline void r_writeb64(void *p, u64 v) { r_writel64(p, r_swap64(v)); }
    537 static inline void r_writeh16(void *p, u16 v) { r_writel16(p, v); }
    538 static inline void r_writeh32(void *p, u32 v) { r_writel32(p, v); }
    539 static inline void r_writeh64(void *p, u64 v) { r_writel64(p, v); }
    540 #elif defined(R_BIG_ENDIAN)
    541 static inline void r_writeb16(void *p, u16 v) { memcpy(&v, p, 2); }
    542 static inline void r_writeb32(void *p, u32 v) { memcpy(&v, p, 4); }
    543 static inline void r_writeb64(void *p, u64 v) { memcpy(&v, p, 8); }
    544 static inline void r_writel16(void *p, u16 v) { r_writeb16(p, r_swap16(v)); }
    545 static inline void r_writel32(void *p, u32 v) { r_writeb32(p, r_swap32(v)); }
    546 static inline void r_writel64(void *p, u64 v) { r_writeb64(p, r_swap64(v)); }
    547 static inline void r_writeh16(void *p, u16 v) { r_writeb16(p, v); }
    548 static inline void r_writeh32(void *p, u32 v) { r_writeb32(p, v); }
    549 static inline void r_writeh64(void *p, u64 v) { r_writeb64(p, v); }
    550 #endif