unicode.c (810B)
1 #include "rcx.h" 2 #include "unicode.h" 3 #include "../gen/ucattab.inc" 4 5 static char ucats[] = 6 "Lu\0Ll\0Lt\0Lm\0Lo\0" 7 "Mn\0Mc\0Me\0" 8 "Nd\0Nl\0No\0" 9 "Pc\0Pd\0Ps\0Pe\0Pi\0Pf\0Po\0" 10 "Sm\0Sc\0Sk\0So\0" 11 "Zs\0Zl\0Zp\0" 12 "Cc\0Cf\0Cs\0Co\0Cn"; 13 14 /* TODO: Optimization opportunity: Do the Latin 1 check inside a static inline 15 * function inside the header file. */ 16 17 char * 18 r_unicode_category(rune r) { 19 if (r <= 0xff) /* Latin 1 */ 20 return &ucats[3 * ucatl1tab[r]]; 21 22 if (r > RUNE_MAX) 23 return 0; 24 25 /* Binary search ucattab */ 26 usize l = 0; 27 usize u = LEN(ucattab); 28 while (l < u) { 29 usize i = l + (u-l)/2; 30 u32 entry = ucattab[i]; 31 if (r < (entry & 0xffffff)) 32 u = i; 33 else if (i + 1 < LEN(ucattab) && r >= (ucattab[i+1] & 0xffffff)) 34 l = i + 1; 35 else 36 return &ucats[3 * (entry >> 24)]; 37 } 38 39 return 0; 40 }