main.c (1216B)
1 #include <rcx/all.h> 2 #include <rcx/bench.h> 3 #include <stdio.h> 4 5 #include "func.h" 6 7 extern u64 scan_avx1024_whoa(u64 *y, u64 *x); 8 9 u64 in[32], out[32]; 10 volatile u64 out_total; 11 12 #define BENCHMARK(f) \ 13 void \ 14 benchmark_##f(u64 N) { \ 15 r_bench_start(); \ 16 for (u64 n = 0; n < N; n++) \ 17 out_total = f(out, in); \ 18 r_bench_stop(); \ 19 } 20 21 BENCHMARK(scan_scalar1024) 22 BENCHMARK(scan_scalar2048) 23 BENCHMARK(scan_scalar2048_unrolled) 24 BENCHMARK(scan_avx1024_serial) 25 BENCHMARK(scan_avx1024) 26 BENCHMARK(scan_avx1024_whoa) 27 28 int 29 main(void) { 30 printf("%lu\n", scan_avx1024_whoa(out, (u64[16]){ 31 0x0, 0x1, 0x2, 0x3, 32 0x4, 0x5, 0x6, 0x7, 33 0x8, 0x9, 0xa, 0xb, 34 0xc, 0xd, 0xe, 0xf, 35 })); 36 printf("%lu %lu %lu %lu\n", out[0], out[1], out[2], out[3]); 37 printf("%lu %lu %lu %lu\n", out[4], out[5], out[6], out[7]); 38 printf("%lu %lu %lu %lu\n", out[8], out[9], out[10], out[11]); 39 printf("%lu %lu %lu %lu\n", out[12], out[13], out[14], out[15]); 40 // return 0; 41 r_bench(benchmark_scan_scalar1024, 3000); 42 r_bench(benchmark_scan_scalar2048, 3000); 43 r_bench(benchmark_scan_scalar2048_unrolled, 3000); 44 r_bench(benchmark_scan_avx1024_serial, 3000); 45 r_bench(benchmark_scan_avx1024, 3000); 46 r_bench(benchmark_scan_avx1024_whoa, 3000); 47 }