diff options
Diffstat (limited to 'src/common/tuklib_mbstr_width.c')
| -rw-r--r-- | src/common/tuklib_mbstr_width.c | 34 |
1 files changed, 28 insertions, 6 deletions
diff --git a/src/common/tuklib_mbstr_width.c b/src/common/tuklib_mbstr_width.c index 7a8bf0707518..98c611d8f38d 100644 --- a/src/common/tuklib_mbstr_width.c +++ b/src/common/tuklib_mbstr_width.c @@ -12,7 +12,7 @@ #include "tuklib_mbstr.h" #include <string.h> -#if defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) +#ifdef HAVE_MBRTOWC # include <wchar.h> #endif @@ -24,9 +24,17 @@ tuklib_mbstr_width(const char *str, size_t *bytes) if (bytes != NULL) *bytes = len; -#if !(defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)) + return tuklib_mbstr_width_mem(str, len); +} + + +extern size_t +tuklib_mbstr_width_mem(const char *str, size_t len) +{ +#ifndef HAVE_MBRTOWC // In single-byte mode, the width of the string is the same // as its length. + (void)str; return len; #else @@ -41,21 +49,35 @@ tuklib_mbstr_width(const char *str, size_t *bytes) while (i < len) { wchar_t wc; const size_t ret = mbrtowc(&wc, str + i, len - i, &state); - if (ret < 1 || ret > len) + if (ret < 1 || ret > len - i) return (size_t)-1; i += ret; +#ifdef HAVE_WCWIDTH const int wc_width = wcwidth(wc); if (wc_width < 0) return (size_t)-1; width += (size_t)wc_width; +#else + // Without wcwidth() (like in a native Windows build), + // assume that one multibyte char == one column. With + // UTF-8, this is less bad than one byte == one column. + // This way quite a few languages will be handled correctly + // in practice; CJK chars will be very wrong though. + ++width; +#endif } - // Require that the string ends in the initial shift state. - // This way the caller can be combine the string with other - // strings without needing to worry about the shift states. + // It's good to check that the string ended in the initial state. + // However, in practice this is redundant: + // + // - No one will use this code with character sets that have + // locking shift states. + // + // - We already checked that mbrtowc() didn't return (size_t)-2 + // which would indicate a partial multibyte character. if (!mbsinit(&state)) return (size_t)-1; |
