From 754bbf7a25a8dda49b5d08ef0d0443bbf5af0e36 Mon Sep 17 00:00:00 2001 From: Craig Jennings Date: Sun, 7 Apr 2024 13:41:34 -0500 Subject: new repository --- devdocs/c/string%2Fmultibyte%2Fmblen.html | 73 +++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 devdocs/c/string%2Fmultibyte%2Fmblen.html (limited to 'devdocs/c/string%2Fmultibyte%2Fmblen.html') diff --git a/devdocs/c/string%2Fmultibyte%2Fmblen.html b/devdocs/c/string%2Fmultibyte%2Fmblen.html new file mode 100644 index 00000000..46facf2a --- /dev/null +++ b/devdocs/c/string%2Fmultibyte%2Fmblen.html @@ -0,0 +1,73 @@ +

mblen

Defined in header <stdlib.h>
int mblen( const char* s, size_t n );
+

Determines the size, in bytes, of the multibyte character whose first byte is pointed to by s.

+

If s is a null pointer, resets the global conversion state and(until C23) determined whether shift sequences are used.

+

This function is equivalent to the call mbtowc((wchar_t*)0, s, n), except that conversion state of mbtowc is unaffected.

+

Parameters

+ + +
s - pointer to the multibyte character
n - limit on the number of bytes in s that can be examined

Return value

If s is not a null pointer, returns the number of bytes that are contained in the multibyte character or -1 if the first bytes pointed to by s do not form a valid multibyte character or ​0​ if s is pointing at the null charcter '\0'.

+

If s is a null pointer, resets its internal conversion state to represent the initial shift state and(until C23) returns ​0​ if the current multibyte encoding is not state-dependent (does not use shift sequences) or a non-zero value if the current multibyte encoding is state-dependent (uses shift sequences).

+

Notes

+ + + +

Each call to mblen updates the internal global conversion state (a static object of type mbstate_t, only known to this function). If the multibyte encoding uses shift states, care must be taken to avoid backtracking or multiple scans. In any case, multiple threads should not call mblen without synchronization: mbrlen may be used instead.

+
(until C23)

mblen is not allowed to have an internal state.

+
(since C23)

Example

#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+ 
+// the number of characters in a multibyte string is the sum of mblen()'s
+// note: the simpler approach is mbstowcs(NULL, str, sz)
+size_t strlen_mb(const char* ptr)
+{
+    size_t result = 0;
+    const char* end = ptr + strlen(ptr);
+    mblen(NULL, 0); // reset the conversion state
+    while(ptr < end) {
+        int next = mblen(ptr, end - ptr);
+        if (next == -1) {
+           perror("strlen_mb");
+           break;
+        }
+        ptr += next;
+        ++result;
+    }
+    return result;
+}
+ 
+void dump_bytes(const char* str)
+{
+    for (const char* end = str + strlen(str); str != end; ++str)
+        printf("%02X ", (unsigned char)str[0]);
+    printf("\n");
+}
+ 
+int main(void)
+{
+    setlocale(LC_ALL, "en_US.utf8");
+    const char* str = "z\u00df\u6c34\U0001f34c";
+    printf("The string \"%s\" consists of %zu characters, but %zu bytes: ",
+            str, strlen_mb(str), strlen(str));
+    dump_bytes(str);
+}

Possible output:

+
The string "zß水🍌" consists of 4 characters, but 10 bytes: 7A C3 9F E6 B0 B4 F0 9F 8D 8C

References

See also

+ + +
converts the next multibyte character to wide character
(function)
+
(C95)
returns the number of bytes in the next multibyte character, given state
(function)
C++ documentation for mblen
+

+ © cppreference.com
Licensed under the Creative Commons Attribution-ShareAlike Unported License v3.0.
+ https://en.cppreference.com/w/c/string/multibyte/mblen +

+
-- cgit v1.2.3