rcx

library of miscellaneous bits of C code
git clone git://git.rr3.xyz/rcx
Log | Files | Refs | README | LICENSE

commit bacaaf039091bd29603f4125cbc69c9e834fc71b
parent e167b6dbf0e90d90a749f18c87a15e4718d52869
Author: robert <robertrussell.72001@gmail.com>
Date:   Wed, 10 Aug 2022 14:34:31 -0700

Namespace all utf8 functions

Diffstat:
Minc/cext/utf8.h | 12++++++------
Msrc/utf8.c | 20++++++++++----------
2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/inc/cext/utf8.h b/inc/cext/utf8.h @@ -2,13 +2,13 @@ #include "cext/def.h" -#define UTF8_SIZE 4 +#define CEXT_UTF8_SIZE 4 /* Return the number of bytes needed to encode c, or 0 if c is an invalid - * codepoint. If s is nonnull, then it must have length >= utf8encode(0, c), - * which is guaranteed to be at most UTF8_SIZE; in this case, if c is a valid - * codepoint, then encode c into s. */ -usize utf8encode(char *s, rune c); + * codepoint. If s is nonnull, then it must have length >= + * cext_utf8_encode(0, c), which is guaranteed to be at most CEXT_UTF8_SIZE; + * in this case, if c is a valid codepoint, then encode c into s. */ +usize cext_utf8_encode(char *s, rune c); /* Decode the first rune in s and return the number of consumed bytes. If this * succeeds and c is nonnull, then set *c to the decoded rune. Otherwise, no @@ -18,4 +18,4 @@ usize utf8encode(char *s, rune c); * - n > 0 iff the first min(n+1,slen) bytes of s are not a prefix of any * valid rune (but if n < slen, then s[n] might be the first byte of a * valid rune). */ -usize utf8decode(rune *c, char *s, usize slen); +usize cext_utf8_decode(rune *c, char *s, usize slen); diff --git a/src/utf8.c b/src/utf8.c @@ -10,18 +10,18 @@ static const rune utf8min[] = { 0x0, 0x80, 0x800, 0x10000}; static const rune utf8max[] = {0x7F, 0x7FF, 0xFFFF, 0x10FFFF}; static bool -utf8overlong(rune c, usize len) { +utf8_overlong(rune c, usize len) { return c < utf8min[len-1]; } static bool -utf8encodable(rune c) { +utf8_encodable(rune c) { return c <= RUNE_MAX && (c < SURROGATE_MIN || c > SURROGATE_MAX); } static usize -utf8len(rune c) { - if (!utf8encodable(c)) +utf8_len(rune c) { + if (!utf8_encodable(c)) return 0; usize len = 1; @@ -31,8 +31,8 @@ utf8len(rune c) { } usize -utf8encode(char *s, rune c) { - usize len = utf8len(c); +cext_utf8_encode(char *s, rune c) { + usize len = utf8_len(c); if (!s || len == 0) return len; @@ -46,7 +46,7 @@ utf8encode(char *s, rune c) { } usize -utf8decode(rune *c, char *s, usize slen) { +cext_utf8_decode(rune *c, char *s, usize slen) { if (c) *c = RUNE_BAD; @@ -55,11 +55,11 @@ utf8decode(rune *c, char *s, usize slen) { /* Determine encoded sequence length based on first byte */ usize len = 1; - for (; len <= UTF8_SIZE; len++) { + for (; len <= CEXT_UTF8_SIZE; len++) { if (((uchar)s[0] & utf8mask[len-1]) == utf8byte[len-1]) break; } - if (len > UTF8_SIZE) /* Invalid leading byte? */ + if (len > CEXT_UTF8_SIZE) /* Invalid leading byte? */ return 1; /* Decode codepoint */ @@ -73,7 +73,7 @@ utf8decode(rune *c, char *s, usize slen) { if (len > slen) return 0; /* Looks good so far, but not enough input */ - if (c && utf8encodable(r) && !utf8overlong(r, len)) + if (c && utf8_encodable(r) && !utf8_overlong(r, len)) *c = r; return len; }