summaryrefslogtreecommitdiff
path: root/src/common/tuklib_mbstr_width.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/common/tuklib_mbstr_width.c')
-rw-r--r--src/common/tuklib_mbstr_width.c34
1 files changed, 28 insertions, 6 deletions
diff --git a/src/common/tuklib_mbstr_width.c b/src/common/tuklib_mbstr_width.c
index 7a8bf0707518..98c611d8f38d 100644
--- a/src/common/tuklib_mbstr_width.c
+++ b/src/common/tuklib_mbstr_width.c
@@ -12,7 +12,7 @@
#include "tuklib_mbstr.h"
#include <string.h>
-#if defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
+#ifdef HAVE_MBRTOWC
# include <wchar.h>
#endif
@@ -24,9 +24,17 @@ tuklib_mbstr_width(const char *str, size_t *bytes)
if (bytes != NULL)
*bytes = len;
-#if !(defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH))
+ return tuklib_mbstr_width_mem(str, len);
+}
+
+
+extern size_t
+tuklib_mbstr_width_mem(const char *str, size_t len)
+{
+#ifndef HAVE_MBRTOWC
// In single-byte mode, the width of the string is the same
// as its length.
+ (void)str;
return len;
#else
@@ -41,21 +49,35 @@ tuklib_mbstr_width(const char *str, size_t *bytes)
while (i < len) {
wchar_t wc;
const size_t ret = mbrtowc(&wc, str + i, len - i, &state);
- if (ret < 1 || ret > len)
+ if (ret < 1 || ret > len - i)
return (size_t)-1;
i += ret;
+#ifdef HAVE_WCWIDTH
const int wc_width = wcwidth(wc);
if (wc_width < 0)
return (size_t)-1;
width += (size_t)wc_width;
+#else
+ // Without wcwidth() (like in a native Windows build),
+ // assume that one multibyte char == one column. With
+ // UTF-8, this is less bad than one byte == one column.
+ // This way quite a few languages will be handled correctly
+ // in practice; CJK chars will be very wrong though.
+ ++width;
+#endif
}
- // Require that the string ends in the initial shift state.
- // This way the caller can be combine the string with other
- // strings without needing to worry about the shift states.
+ // It's good to check that the string ended in the initial state.
+ // However, in practice this is redundant:
+ //
+ // - No one will use this code with character sets that have
+ // locking shift states.
+ //
+ // - We already checked that mbrtowc() didn't return (size_t)-2
+ // which would indicate a partial multibyte character.
if (!mbsinit(&state))
return (size_t)-1;