spstabilize8.c (2101B)
1 #include <rcx/all.h> 2 #include <rcx/simd.h> 3 #include <stdio.h> 4 5 #include "common.h" 6 7 #ifndef R_HAVE_AVX2 8 #error "AVX2 support required" 9 #endif 10 11 #define USAGE "usage: %s\n" 12 13 Sandpile 14 stabilize(Sandpile sp) { 15 usize w = sp.w; 16 usize h = sp.h; 17 18 u32 *sand[2]; 19 sand[0] = sp.sand; 20 sand[1] = r_eallocz((w + 2) * (h + 2) * sizeof(u32)); 21 22 isize nxv = (isize)w / 8; // Number of x vectors that fit in w 23 v8u32 v7 = v8u32_fill(7); 24 for (usize i = 0;; i = !i) { 25 usize unstable = 0; 26 27 for (isize y = 1; y <= h; y++) { 28 isize j = y * ((isize)w + 2) + 1; 29 30 for (isize xv = 0; xv < nxv; xv++, j += 8) { 31 v8u32 a = v8u32_loadu((v8u32a1 *)&sand[i][j]); 32 a = v8u32_and(a, v7); 33 34 #define ADD(dx, dy) \ 35 do { \ 36 isize dj = (dy) * ((isize)w + 2) + (dx); \ 37 v8u32 b = v8u32_loadu((v8u32a1 *)&sand[i][j + dj]); \ 38 b = v8u32_sri(b, 3); \ 39 a = v8u32_add(a, b); \ 40 } while (0) 41 ADD(+1, +0); 42 ADD(+1, +1); 43 ADD(+0, +1); 44 ADD(-1, +1); 45 ADD(-1, +0); 46 ADD(-1, -1); 47 ADD(+0, -1); 48 ADD(+1, -1); 49 #undef ADD 50 51 v8u32 g = v8u32_cmpgt(a, v7); 52 unstable += !v8u32_testz(g, g); 53 54 v8u32_storeu((v8u32a1 *)&sand[!i][j], a); 55 } 56 57 // TODO: Try dealing with tail with masked vector instead? Note 58 // that this would require a minimum width/height of 3. 59 for (isize x = 8*nxv; x < (isize)w; x++, j++) { 60 u32 a = sand[i][j]; 61 a = a & 3; 62 63 #define ADD(dx, dy) \ 64 do { \ 65 isize dj = (dy) * ((isize)w + 2) + (dx); \ 66 u32 b = sand[i][j + dj]; \ 67 b = b >> 2; \ 68 a = a + b; \ 69 } while (0) 70 ADD(+1, +0); 71 ADD(+1, +1); 72 ADD(+0, +1); 73 ADD(-1, +1); 74 ADD(-1, +0); 75 ADD(-1, -1); 76 ADD(+0, -1); 77 ADD(+1, -1); 78 #undef ADD 79 80 unstable += a > 3; 81 82 sand[!i][j] = a; 83 } 84 } 85 86 if (!unstable) { 87 free(sand[i]); 88 return (Sandpile){.w = w, .h = h, .sand = sand[!i]}; 89 } 90 } 91 } 92 93 int 94 main(int argc, char **argv) { 95 CHECK_FOR_HELP_OPTION(USAGE, argc, argv); 96 97 if (argc != 1) { 98 fprintf(stderr, USAGE, argv[0]); 99 return 1; 100 } 101 102 Sandpile sp = sp_fread(stdin); 103 sp = stabilize(sp); 104 sp_fwrite(stdout, sp); 105 }