sandpiles.c (3571B)
1 #include <errno.h> 2 #include <rcx/all.h> 3 #include <rcx/simd.h> 4 #include <stdio.h> 5 #include <string.h> 6 7 #ifndef R_HAVE_AVX2 8 #error "AVX2 support required" 9 #endif 10 11 #define SP_MAX_DIMEN U32_MAX 12 13 typedef struct rgba Rgba; 14 typedef struct palette Palette; 15 typedef struct farbfeld Farbfeld; 16 17 struct rgba { 18 u16 r, g, b, a; 19 }; 20 21 struct palette { 22 Rgba rgba[4]; 23 }; 24 25 struct farbfeld { 26 usize size; 27 void *img; 28 }; 29 30 u32 * 31 sp_new(usize w, usize h) { 32 REQUIRE(w <= SP_MAX_DIMEN && h <= SP_MAX_DIMEN, "sp_new: too large"); 33 return r_eallocz((w + 2) * (h + 2) * sizeof(u32)); // TODO: align? 34 } 35 36 void 37 sp_set(usize w, usize h, u32 *sp, usize x, usize y, u32 s) { 38 ASSERT(x < w && y < h, "sp_set: out of bounds"); 39 sp[(y + 1) * (w + 2) + (x + 1)] = s; 40 } 41 42 u32 * 43 sp_stabilize4(usize w, usize h, u32 *sp) { 44 u32 *sand[2]; 45 sand[0] = sp; 46 sand[1] = sp_new(w, h); 47 48 isize nxv = (isize)w / 8; // Number of x vectors that fit in w 49 v8u32 v3 = v8u32_fill(3); 50 for (usize i = 0;; i = !i) { 51 usize unstable = 0; 52 53 for (isize y = 1; y <= h; y++) { 54 isize j = y * ((isize)w + 2) + 1; 55 56 for (isize xv = 0; xv < nxv; xv++, j += 8) { 57 v8u32 a = v8u32_loadu((v8u32a1 *)&sand[i][j]); 58 a = v8u32_and(a, v3); 59 60 #define ADD(dx, dy) \ 61 do { \ 62 isize dj = (dy) * ((isize)w + 2) + (dx); \ 63 v8u32 b = v8u32_loadu((v8u32a1 *)&sand[i][j + dj]); \ 64 b = v8u32_sri(b, 2); \ 65 a = v8u32_add(a, b); \ 66 } while (0) 67 ADD(+1, +0); 68 ADD(+0, +1); 69 ADD(-1, +0); 70 ADD(+0, -1); 71 #undef ADD 72 73 v8u32 g = v8u32_cmpgt(a, v3); 74 unstable += !v8u32_testz(g, g); 75 76 v8u32_storeu((v8u32a1 *)&sand[!i][j], a); 77 } 78 79 // TODO: Try dealing with tail with masked vector instead? Note 80 // that this would require a minimum width/height of 3. 81 for (isize x = 8*nxv; x < (isize)w; x++, j++) { 82 u32 a = sand[i][j]; 83 a = a & 3; 84 85 #define ADD(dx, dy) \ 86 do { \ 87 isize dj = (dy) * ((isize)w + 2) + (dx); \ 88 u32 b = sand[i][j + dj]; \ 89 b = b >> 2; \ 90 a = a + b; \ 91 } while (0) 92 ADD(+1, +0); 93 ADD(+0, +1); 94 ADD(-1, +0); 95 ADD(+0, -1); 96 #undef ADD 97 98 unstable += a > 3; 99 100 sand[!i][j] = a; 101 } 102 } 103 104 if (!unstable) { 105 free(sand[i]); 106 return sand[!i]; 107 } 108 } 109 } 110 111 // TODO: void stabilize8(...) 112 113 Farbfeld 114 farbfeld(usize w, usize h, u32 *sp, Palette *palette) { 115 usize size = 8u + 4u + 4u + w * h * 8u; 116 void *img = r_ealloc(size); 117 u8 *cur = img; 118 119 // Write header 120 memcpy(cur, "farbfeld", 8); 121 r_writeb32(cur + 8, w); 122 r_writeb32(cur + 12, h); 123 cur += 16; 124 125 // Encode palette 126 u16 enc_palette[4][4]; 127 for (usize i = 0; i < 4; i++) { 128 enc_palette[i][0] = r_htob16(palette->rgba[i].r); 129 enc_palette[i][1] = r_htob16(palette->rgba[i].g); 130 enc_palette[i][2] = r_htob16(palette->rgba[i].b); 131 enc_palette[i][3] = r_htob16(palette->rgba[i].a); 132 } 133 134 // Write pixels 135 for (usize y = 1; y <= h; y++) { 136 for (usize x = 1; x <= w; x++) { 137 u32 s = sp[y * (w + 2) + x]; 138 memcpy(cur, enc_palette[s < 4 ? s : 3], 8); 139 cur += 8; 140 } 141 } 142 143 return (Farbfeld){.size = size, .img = img}; 144 } 145 146 int 147 main(void) { 148 usize w = 500; 149 usize h = 500; 150 u32 *sp = sp_new(w, h); 151 for (usize y = 150; y < h-150; y++) { 152 for (usize x = 150; x < w-150; x++) 153 sp_set(w, h, sp, x, y, 20); 154 } 155 sp = sp_stabilize4(w, h, sp); 156 157 Palette palette = { 158 .rgba = { 159 { 0x4a4a, 0x4242, 0x3838, 0xffff }, 160 { 0x4d4d, 0x5353, 0x5959, 0xffff }, 161 { 0x5050, 0x8484, 0x8484, 0xffff }, 162 { 0x7979, 0xc9c9, 0x9e9e, 0xffff }, 163 } 164 }; 165 Farbfeld ff = farbfeld(w, h, sp, &palette); 166 if (r_write_all(0, 1, ff.img, ff.size) < 0) 167 throw("write: %s", strerror(errno)); 168 }