rcx

miscellaneous C library
git clone git://git.rr3.xyz/rcx
Log | Files | Refs | README | LICENSE

unicode.c (810B)


      1 #include "rcx.h"
      2 #include "unicode.h"
      3 #include "../gen/ucattab.inc"
      4 
      5 static char ucats[] =
      6 	"Lu\0Ll\0Lt\0Lm\0Lo\0"
      7 	"Mn\0Mc\0Me\0"
      8 	"Nd\0Nl\0No\0"
      9 	"Pc\0Pd\0Ps\0Pe\0Pi\0Pf\0Po\0"
     10 	"Sm\0Sc\0Sk\0So\0"
     11 	"Zs\0Zl\0Zp\0"
     12 	"Cc\0Cf\0Cs\0Co\0Cn";
     13 
     14 /* TODO: Optimization opportunity: Do the Latin 1 check inside a static inline
     15  * function inside the header file. */
     16 
     17 char *
     18 r_unicode_category(rune r) {
     19 	if (r <= 0xff) /* Latin 1 */
     20 		return &ucats[3 * ucatl1tab[r]];
     21 
     22 	if (r > RUNE_MAX)
     23 		return 0;
     24 
     25 	/* Binary search ucattab */
     26 	usize l = 0;
     27 	usize u = LEN(ucattab);
     28 	while (l < u) {
     29 		usize i = l + (u-l)/2;
     30 		u32 entry = ucattab[i];
     31 		if (r < (entry & 0xffffff))
     32 			u = i;
     33 		else if (i + 1 < LEN(ucattab) && r >= (ucattab[i+1] & 0xffffff))
     34 			l = i + 1;
     35 		else
     36 			return &ucats[3 * (entry >> 24)];
     37 	}
     38 
     39 	return 0;
     40 }