Index: citrus_utf8.c =================================================================== RCS file: /cvsroot/src/lib/libc/citrus/modules/citrus_utf8.c,v retrieving revision 1.15 diff -u -r1.15 citrus_utf8.c --- citrus_utf8.c 22 Mar 2006 00:08:09 -0000 1.15 +++ citrus_utf8.c 27 Feb 2007 12:03:34 -0000 @@ -166,11 +166,17 @@ return -1; /*out of range*/ } +static __inline int +_UTF8_surrogate(wchar_t wc) +{ + return wc >= 0xd800 && wc <= 0xdfff; +} + static __inline void /*ARGSUSED*/ _citrus_UTF8_init_state(_UTF8EncodingInfo *ei, _UTF8State *s) { - memset(s, 0, sizeof(*s)); + s->chlen = 0; } static __inline void @@ -197,7 +203,6 @@ const char *s0; int c; int i; - int chlenbak; _DIAGASSERT(nresult != 0); _DIAGASSERT(s != NULL); @@ -208,45 +213,28 @@ if (s0 == NULL) { _citrus_UTF8_init_state(ei, psenc); *nresult = 0; /* state independent */ - return (0); + return 0; } - chlenbak = psenc->chlen; - /* make sure we have the first byte in the buffer */ - switch (psenc->chlen) { - case 0: - if (n < 1) { + if (psenc->chlen == 0) { + if (n-- < 1) goto restart; - } - psenc->ch[0] = *s0++; - psenc->chlen = 1; - n--; - break; - case 1: case 2: case 3: case 4: case 5: - break; - default: - /* illegal state */ - goto ilseq; + psenc->ch[psenc->chlen++] = *s0++; } c = _UTF8_count[psenc->ch[0] & 0xff]; - if (c == 0) + if (c < 1 || c < psenc->chlen) goto ilseq; - while (psenc->chlen < c) { - if (n < 1) { - goto restart; - } - psenc->ch[psenc->chlen] = *s0++; - psenc->chlen++; - n--; - } - switch (c) { - case 1: + if (c == 1) wchar = psenc->ch[0] & 0xff; - break; - case 2: case 3: case 4: case 5: case 6: + else { + while (psenc->chlen < c) { + if (n-- < 1) + goto restart; + psenc->ch[psenc->chlen++] = *s0++; + } wchar = psenc->ch[0] & (0x7f >> c); for (i = 1; i < c; i++) { if ((psenc->ch[i] & 0xc0) != 0x80) @@ -254,38 +242,25 @@ wchar <<= 6; wchar |= (psenc->ch[i] & 0x3f); } - - _DIAGASSERT(findlen(wchar) == c); - - break; - default: - /* illegal state */ - goto ilseq; + if (_UTF8_surrogate(wchar) || _UTF8_findlen(wchar) != c) + goto ilseq; } - + if (pwc != NULL) + *pwc = wchar; + *nresult = (wchar == 0) ? 0 : s0 - *s; *s = s0; - psenc->chlen = 0; - if (pwc) - *pwc = wchar; - - if (!wchar) - *nresult = 0; - else - *nresult = c - chlenbak; - - return (0); + return 0; ilseq: - psenc->chlen = 0; *nresult = (size_t)-1; - return (EILSEQ); + return EILSEQ; restart: *s = s0; *nresult = (size_t)-2; - return (0); + return 0; } static int @@ -298,6 +273,10 @@ _DIAGASSERT(nresult != 0); _DIAGASSERT(s != NULL); + if (_UTF8_surrogate(wc)) { + ret = EILSEQ; + goto err; + } cnt = _UTF8_findlen(wc); if (cnt <= 0 || cnt > 6) { /* invalid UCS4 value */