Mstdlib-1.24.0
m_utf8.h
1/* The MIT License (MIT)
2 *
3 * Copyright (c) 2018 Monetra Technologies, LLC.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 * THE SOFTWARE.
22 */
23
24#ifndef __M_UTF8_H__
25#define __M_UTF8_H__
26
27/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
28
29#include <mstdlib/base/m_defs.h>
30#include <mstdlib/base/m_types.h>
31
32/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
33
34__BEGIN_DECLS
35
36/*! \addtogroup m_utf8 utf-8 Handling
37 * \ingroup mstdlib_base
38 *
39 * Targets unicode 10.0.
40 *
41 * \note Non-characters are considered an error conditions because
42 * they do not have a defined meaning.
43 *
44 * A utf-8 sequence is defined as the variable number of bytes that represent
45 * a single utf-8 display character.
46 *
47 * @{
48 */
49
50/*! Error codes. */
51typedef enum {
52 M_UTF8_ERROR_SUCCESS, /*!< Success. */
53 M_UTF8_ERROR_BAD_START, /*!< Start of byte sequence is invalid. */
54 M_UTF8_ERROR_TRUNCATED, /*!< The utf-8 character length exceeds the data length. */
55 M_UTF8_ERROR_EXPECT_CONTINUE, /*!< A conurbation marker was expected but not found. */
56 M_UTF8_ERROR_BAD_CODE_POINT, /*!< Code point is invalid. */
57 M_UTF8_ERROR_OVERLONG, /*!< Overlong encoding encountered. */
58 M_UTF8_ERROR_INVALID_PARAM /*!< Input parameter is invalid. */
60
61
62/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
63
64/*! Check if a given string is valid utf-8 encoded.
65 *
66 * \param[in] str utf-8 string.
67 * \param[out] endptr On success, will be set to the NULL terminator.
68 * On error, will be set to the character that caused the failure.
69 *
70 * \return M_TRUE if str is a valid utf-8 sequence. Otherwise, M_FALSE.
71 */
72M_API M_bool M_utf8_is_valid(const char *str, const char **endptr);
73
74
75/*! Check if a given code point is valid for utf-8.
76 *
77 * \param[in] cp Code point.
78 *
79 * \return M_TRUE if code point is valid for utf-8. Otherwise, M_FALSE.
80 */
81M_API M_bool M_utf8_is_valid_cp(M_uint32 cp);
82
83
84/*! Ge the number of utf-8 characters in a string.
85 *
86 * This is the number of characters not the number of bytes in the string.
87 * M_str_len will only return the same value if the string is only ascii.
88 *
89 * \param[in] str utf-8 string.
90 *
91 * \return Number of characters on success. On failure will return 0. Use
92 * M_str_isempty to determine if 0 is a failure or empty string.
93 */
94M_API size_t M_utf8_cnt(const char *str);
95
96
97/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
98
99/*! Read a utf-8 sequence as a code point.
100 *
101 * \param[in] str utf-8 string.
102 * \param[out] cp Code point. Can be NULL.
103 * \param[out] next Start of next character. Will point to NULL terminator
104 * if last character.
105 *
106 * \return Result.
107 */
108M_API M_utf8_error_t M_utf8_get_cp(const char *str, M_uint32 *cp, const char **next);
109
110
111/*! Read a utf-8 sequence.
112 *
113 * Output is _not_ NULL terminated.
114 *
115 * \param[in] str utf-8 string.
116 * \param[in] buf Buffer to put utf-8 sequence. Can be NULL.
117 * \param[in] buf_size Size of the buffer.
118 * \param[out] len Length of the sequence that was put into buffer.
119 * \param[out] next Start of next character. Will point to NULL terminator
120 * if last character.
121 *
122 * \return Result.
123 */
124M_API M_utf8_error_t M_utf8_get_chr(const char *str, char *buf, size_t buf_size, size_t *len, const char **next);
125
126
127/*! Read a utf-8 sequence into an M_buf_t.
128 *
129 * \param[in] str utf-8 string.
130 * \param[in] buf Buffer to put utf-8 sequence.
131 * \param[out] next Start of next character. Will point to NULL terminator
132 * if last character.
133 *
134 * \return Result.
135 */
136M_API M_utf8_error_t M_utf8_get_chr_buf(const char *str, M_buf_t *buf, const char **next);
137
138
139/*! Get the location of the next utf-8 sequence.
140 *
141 * Does not validate characters. Useful when parsing an invalid string and
142 * wanting to move past to ignore or replace invalid characters.
143 *
144 * \param[in] str utf-8 string.
145 *
146 * \return Pointer to next character in sequence.
147 */
148M_API char *M_utf8_next_chr(const char *str);
149
150
151/*! Convert a code point to a utf-8 sequence.
152 *
153 * Output is _not_ NULL terminated.
154 *
155 * \param[in] buf Buffer to put utf-8 sequence.
156 * \param[in] buf_size Size of the buffer.
157 * \param[out] len Length of the sequence that was put into buffer.
158 * \param[in] cp Code point to convert from.
159 *
160 * \return Result.
161 */
162M_API M_utf8_error_t M_utf8_from_cp(char *buf, size_t buf_size, size_t *len, M_uint32 cp);
163
164
165/*! Convert a code point to a utf-8 sequence writing to an M_buf_t.
166 *
167 * \param[in] buf Buffer to put utf-8 sequence.
168 * \param[in] cp Code point to convert from.
169 *
170 * \return Result.
171 */
173
174
175/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
176
177/*! Get the code point at a given index.
178 *
179 * Index is based on M_utf8_cnt _not_ the number of bytes.
180 * This causes a *full* scan of the string. Iteration should use
181 * M_utf8_get_cp.
182 *
183 * \param[in] str utf-8 string.
184 * \param[in] idx Index.
185 * \param[out] cp Code point.
186 *
187 * \return Result.
188 */
189M_API M_utf8_error_t M_utf8_cp_at(const char *str, size_t idx, M_uint32 *cp);
190
191
192/*! Get the utf-8 sequence at a given index.
193 *
194 * Index is based on M_utf8_cnt _not_ the number of bytes.
195 * This causes a *full* scan of the string. Iteration should use
196 * M_utf8_get_chr.
197 *
198 * \param[in] str utf-8 string.
199 * \param[in] buf Buffer to put utf-8 sequence.
200 * \param[in] buf_size Size of the buffer.
201 * \param[out] len Length of the sequence that was put into buffer.
202 * \param[in] idx Index.
203 *
204 * \return Result.
205 */
206M_API M_utf8_error_t M_utf8_chr_at(const char *str, char *buf, size_t buf_size, size_t *len, size_t idx);
207
208
209/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
210
211/*! \addtogroup m_utf8_case Case Folding
212 * \ingroup m_utf8
213 *
214 * The case folding as defined by the official UTF-8 mapping is utalized.
215 * UTF-8 does not have a one to one mapping for case folding. Multiple codes
216 * can fold to the same code point. Coversion to upper, then to lower, then
217 * back to upper can result in a different upper case string than the original
218 * input.
219 *
220 * For example, 0x004B (capital K) maps to 0x006B (lower k).
221 * 0x212A (kelvin sign) also maps to 0x006B. 0x006B maps to
222 * 0x004B. So converting 0x212A to lower then back to upper
223 * will output 0x004B.
224 *
225 * \note
226 * Not all characters have a case equivalent. These characters
227 * will return themselves when folded.
228 *
229 * @{
230 */
231
232/*! Convert a code point to the equivalent upper case code point.
233 *
234 * \param[in] cp Code point to convert.
235 * \param[out] upper_cp Equivalent upper case code point. Or cp if
236 * there is no equivalent.
237 *
238 * \return Result.
239 */
240M_API M_utf8_error_t M_utf8_toupper_cp(M_uint32 cp, M_uint32 *upper_cp);
241
242
243/*! Read a utf-8 sequence converting to upper case.
244 *
245 * Output is _not_ NULL terminated.
246 *
247 * \param[in] str utf-8 string.
248 * \param[in] buf Buffer to put utf-8 sequence. Can be NULL.
249 * \param[in] buf_size Size of the buffer.
250 * \param[out] len Length of the sequence that was put into buffer.
251 * \param[out] next Start of next character. Will point to NULL terminator
252 * if last character.
253 *
254 * \return Result.
255 */
256M_API M_utf8_error_t M_utf8_toupper_chr(const char *str, char *buf, size_t buf_size, size_t *len, const char **next);
257
258
259/*! Read a utf-8 sequence into an M_buf_t converting to upper case.
260 *
261 * \param[in] str utf-8 string.
262 * \param[in] buf Buffer to put upper case utf-8 sequence.
263 * \param[out] next Start of next character. Will point to NULL terminator
264 * if last character.
265 *
266 * \return Result.
267 */
268M_API M_utf8_error_t M_utf8_toupper_chr_buf(const char *str, M_buf_t *buf, const char **next);
269
270
271/*! Convert a utf-8 string to an upper case equivalent string.
272 *
273 * \param[in] str utf-8 string.
274 * \param[out] out Upper case utf-8 string.
275 *
276 * \return Result.
277 */
278M_API M_utf8_error_t M_utf8_toupper(const char *str, char **out);
279
280
281/*! Read a utf-8 string into an M_buf_t converting to upper case.
282 *
283 * \param[in] str utf-8 string.
284 * \param[in] buf Buffer to put upper case utf-8 string.
285 *
286 * \return Result.
287 */
288M_API M_utf8_error_t M_utf8_toupper_buf(const char *str, M_buf_t *buf);
289
290
291/*! Convert a code point to the equivalent loer case code point.
292 *
293 * \param[in] cp Code point to convert.
294 * \param[out] lower_cp Equivalent lower case code point. Or cp if
295 * there is no equivalent.
296 *
297 * \return Result.
298 */
299M_API M_utf8_error_t M_utf8_tolower_cp(M_uint32 cp, M_uint32 *lower_cp);
300
301
302/*! Read a utf-8 sequence converting to lower case.
303 *
304 * Output is _not_ NULL terminated.
305 *
306 * \param[in] str utf-8 string.
307 * \param[in] buf Buffer to put utf-8 sequence. Can be NULL.
308 * \param[in] buf_size Size of the buffer.
309 * \param[out] len Length of the sequence that was put into buffer.
310 * \param[out] next Start of next character. Will point to NULL terminator
311 * if last character.
312 *
313 * \return Result.
314 */
315M_API M_utf8_error_t M_utf8_tolower_chr(const char *str, char *buf, size_t buf_size, size_t *len, const char **next);
316
317
318/*! Read a utf-8 sequence into an M_buf_t converting to lower case.
319 *
320 * \param[in] str utf-8 string.
321 * \param[in] buf Buffer to put lower case utf-8 sequence.
322 * \param[out] next Start of next character. Will point to NULL terminator
323 * if last character.
324 *
325 * \return Result.
326 */
327M_API M_utf8_error_t M_utf8_tolower_chr_buf(const char *str, M_buf_t *buf, const char **next);
328
329
330/*! Convert a utf-8 string to an lower case equivalent string.
331 *
332 * \param[in] str utf-8 string.
333 * \param[out] out Lower case utf-8 string.
334 *
335 * \return Result.
336 */
337M_API M_utf8_error_t M_utf8_tolower(const char *str, char **out);
338
339
340/*! Read a utf-8 string into an M_buf_t converting to lower case.
341 *
342 * \param[in] str utf-8 string.
343 * \param[in] buf Buffer to put lower case utf-8 string.
344 *
345 * \return Result.
346 */
347M_API M_utf8_error_t M_utf8_tolower_buf(const char *str, M_buf_t *buf);
348
349
350/*! Convert a code point to the equivalent title case code point.
351 *
352 * \param[in] cp Code point to convert.
353 * \param[out] title_cp Equivalent title case code point. Or cp if
354 * there is no equivalent.
355 *
356 * \return Result.
357 */
358M_API M_utf8_error_t M_utf8_totitle_cp(M_uint32 cp, M_uint32 *title_cp);
359
360
361/*! Read a utf-8 sequence converting to title case.
362 *
363 * Output is _not_ NULL terminated.
364 *
365 * \param[in] str utf-8 string.
366 * \param[in] buf Buffer to put utf-8 sequence. Can be NULL.
367 * \param[in] buf_size Size of the buffer.
368 * \param[out] len Length of the sequence that was put into buffer.
369 * \param[out] next Start of next character. Will point to NULL terminator
370 * if last character.
371 *
372 * \return Result.
373 */
374M_API M_utf8_error_t M_utf8_totitle_chr(const char *str, char *buf, size_t buf_size, size_t *len, const char **next);
375
376
377/*! Read a utf-8 sequence into an M_buf_t converting to title case.
378 *
379 * \param[in] str utf-8 string.
380 * \param[in] buf Buffer to put title case utf-8 sequence.
381 * \param[out] next Start of next character. Will point to NULL terminator
382 * if last character.
383 *
384 * \return Result.
385 */
386M_API M_utf8_error_t M_utf8_totitle_chr_buf(const char *str, M_buf_t *buf, const char **next);
387
388
389/*! Convert a utf-8 string to an title case equivalent string.
390 *
391 * \param[in] str utf-8 string.
392 * \param[out] out Lower case utf-8 string.
393 *
394 * \return Result.
395 */
396M_API M_utf8_error_t M_utf8_totitle(const char *str, char **out);
397
398
399/*! Read a utf-8 string into an M_buf_t converting to title case.
400 *
401 * \param[in] str utf-8 string.
402 * \param[in] buf Buffer to put title case utf-8 string.
403 *
404 * \return Result.
405 */
406M_API M_utf8_error_t M_utf8_totitle_buf(const char *str, M_buf_t *buf);
407
408/*! @} */
409
410/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
411
412/*! \addtogroup m_utf8_check Checking/Validation
413 * \ingroup m_utf8
414 *
415 * UTF-8 Checking and Validation
416 *
417 * Validate if a UTF-8 sequence or string is comprised
418 * of a given type of characters.
419 *
420 * @{
421 */
422
423/*! Checks for a lower-case code point.
424 *
425 * Derived Core Properties: Lowercase.
426 * -> General Category: Ll + Other_Lowercase
427 *
428 * \param[in] cp Code point.
429 *
430 * \return M_TRUE if lowercase. Otherwise M_FALSE.
431 */
432M_API M_bool M_utf8_islower_cp(M_uint32 cp);
433
434
435/*! Checks if a utf-8 sequence is lower-case.
436 *
437 * \param[in] str utf-8 string.
438 * \param[out] next Start of next character. Will point to NULL terminator
439 * if last character.
440 *
441 * \return M_TRUE if lowercase. Otherwise M_FALSE.
442 */
443M_API M_bool M_utf8_islower_chr(const char *str, const char **next);
444
445
446/*! Checks if a utf-8 string is lower-case.
447 *
448 * \param[in] str utf-8 string.
449 *
450 * \return M_TRUE if lowercase. Otherwise M_FALSE.
451 */
452M_API M_bool M_utf8_islower(const char *str);
453
454
455/*! Checks for a upper-case code point.
456 *
457 * Derived Core Properties: Uppercase.
458 * -> General Category: Lu + Other_Uppercase
459 *
460 * \param[in] cp Code point.
461 *
462 * \return M_TRUE if uppercase. Otherwise M_FALSE.
463 */
464M_API M_bool M_utf8_isupper_cp(M_uint32 cp);
465
466
467/*! Checks if a utf-8 sequence is upper-case.
468 *
469 * \param[in] str utf-8 string.
470 * \param[out] next Start of next character. Will point to NULL terminator
471 * if last character.
472 *
473 * \return M_TRUE if uppercase. Otherwise M_FALSE.
474 */
475M_API M_bool M_utf8_isupper_chr(const char *str, const char **next);
476
477
478/*! Checks if a utf-8 string is upper-case.
479 *
480 * \param[in] str utf-8 string.
481 *
482 * \return M_TRUE if uppercase. Otherwise M_FALSE.
483 */
484M_API M_bool M_utf8_isupper(const char *str);
485
486
487/*! Checks for an alphabetic cp.
488 *
489 * Derived Core Properties: Alphabetic.
490 * -> Lowercase + Uppercase + Lt + Lm + Lo + Nl + Other_Alphabetic
491 *
492 * \param[in] cp Code point.
493 *
494 * \return M_TRUE if alphabetic. Otherwise M_FALSE.
495 */
496M_API M_bool M_utf8_isalpha_cp(M_uint32 cp);
497
498
499/*! Checks if a utf-8 sequence is alphabetic.
500 *
501 * \param[in] str utf-8 string.
502 * \param[out] next Start of next character. Will point to NULL terminator
503 * if last character.
504 *
505 * \return M_TRUE if alphabetic. Otherwise M_FALSE.
506 */
507M_API M_bool M_utf8_isalpha_chr(const char *str, const char **next);
508
509
510/*! Checks if a utf-8 string is alphabetic.
511 *
512 * \param[in] str utf-8 string.
513 *
514 * \return M_TRUE if alphabetic. Otherwise M_FALSE.
515 */
516M_API M_bool M_utf8_isalpha(const char *str);
517
518
519/*! Checks for an alphabetic or numeric cp.
520 *
521 * Alphabetic + Nd + Nl + No.
522 *
523 * \param[in] cp Code point.
524 *
525 * \return M_TRUE if alphanumeric. Otherwise M_FALSE.
526 */
527M_API M_bool M_utf8_isalnum_cp(M_uint32 cp);
528
529
530/*! Checks if a utf-8 sequence is alphabetic or numeric.
531 *
532 * \param[in] str utf-8 string.
533 * \param[out] next Start of next character. Will point to NULL terminator
534 * if last character.
535 *
536 * \return M_TRUE if alphanumeric. Otherwise M_FALSE.
537 */
538M_API M_bool M_utf8_isalnum_chr(const char *str, const char **next);
539
540
541/*! Checks if a utf-8 string is alphabetic or numeric.
542 *
543 * \param[in] str utf-8 string.
544 *
545 * \return M_TRUE if alphanumeric. Otherwise M_FALSE.
546 */
547M_API M_bool M_utf8_isalnum(const char *str);
548
549
550/*! Checks for numeric code point.
551 *
552 * General Category: Nd, Nl, No
553 *
554 * \param[in] cp Code point.
555 *
556 * \return M_TRUE if numeric. Otherwise M_FALSE.
557 */
558M_API M_bool M_utf8_isnum_cp(M_uint32 cp);
559
560
561/*! Checks if a utf-8 sequence is numeric.
562 *
563 * \param[in] str utf-8 string.
564 * \param[out] next Start of next character. Will point to NULL terminator
565 * if last character.
566 *
567 * \return M_TRUE if numeric. Otherwise M_FALSE.
568 */
569M_API M_bool M_utf8_isnum_chr(const char *str, const char **next);
570
571
572/*! Checks if a utf-8 string is numeric.
573 *
574 * \param[in] str utf-8 string.
575 *
576 * \return M_TRUE if numeric. Otherwise M_FALSE.
577 */
578M_API M_bool M_utf8_isnum(const char *str);
579
580
581/*! Checks for a control character code point.
582 *
583 * General Category: Cc
584 *
585 * \param[in] cp Code point.
586 *
587 * \return M_TRUE if control. Otherwise M_FALSE.
588 */
589M_API M_bool M_utf8_iscntrl_cp(M_uint32 cp);
590
591
592/*! Checks if a utf-8 sequence is a control character.
593 *
594 * \param[in] str utf-8 string.
595 * \param[out] next Start of next character. Will point to NULL terminator
596 * if last character.
597 *
598 * \return M_TRUE if control. Otherwise M_FALSE.
599 */
600M_API M_bool M_utf8_iscntrl_chr(const char *str, const char **next);
601
602
603/*! Checks if a utf-8 string is a control character.
604 *
605 * \param[in] str utf-8 string.
606 *
607 * \return M_TRUE if control. Otherwise M_FALSE.
608 */
609M_API M_bool M_utf8_iscntrl(const char *str);
610
611
612/*! Checks for a punctuation code point.
613 *
614 * General Category: Pc + Pd + Ps + Pe + Pi + Pf + Po
615 *
616 * \param[in] cp Code point.
617 *
618 * \return M_TRUE if punctuation. Otherwise M_FALSE.
619 */
620M_API M_bool M_utf8_ispunct_cp(M_uint32 cp);
621
622
623/*! Checks if a utf-8 sequence is punctuation.
624 *
625 * \param[in] str utf-8 string.
626 * \param[out] next Start of next character. Will point to NULL terminator
627 * if last character.
628 *
629 * \return M_TRUE if punctuation. Otherwise M_FALSE.
630 */
631M_API M_bool M_utf8_ispunct_chr(const char *str, const char **next);
632
633
634/*! Checks if a utf-8 string is punctuation.
635 *
636 * \param[in] str utf-8 string.
637 *
638 * \return M_TRUE if punctuation. Otherwise M_FALSE.
639 */
640M_API M_bool M_utf8_ispunct(const char *str);
641
642
643/*! Checks for a printable codepoint.
644 *
645 * Defined as tables L, M, N, P, S, ASCII space, and UniHan.
646 *
647 * \param[in] cp Code point.
648 *
649 * \return M_TRUE if printable. Otherwise M_FALSE.
650 */
651M_API M_bool M_utf8_isprint_cp(M_uint32 cp);
652
653
654/*! Checks if a utf-8 sequence is printable.
655 *
656 * Defined as tables L, M, N, P, S and ASCII space
657 *
658 * \param[in] str utf-8 string.
659 * \param[out] next Start of next character. Will point to NULL terminator
660 * if last character.
661 *
662 * \return M_TRUE if printable. Otherwise M_FALSE.
663 */
664M_API M_bool M_utf8_isprint_chr(const char *str, const char **next);
665
666
667/*! Checks if a utf-8 string is printable.
668 *
669 * Defined as tables L, M, N, P, S, ASCII space, and UniHan.
670 *
671 * \param[in] str utf-8 string.
672 *
673 * \return M_TRUE if printable. Otherwise M_FALSE.
674 */
675M_API M_bool M_utf8_isprint(const char *str);
676
677
678/*! Checks for a unihan codepoint.
679 *
680 * Defined as tables L, M, N, P, S, ASCII space, and UniHan.
681 *
682 * \param[in] cp Code point.
683 *
684 * \return M_TRUE if unihan. Otherwise M_FALSE.
685 */
686M_API M_bool M_utf8_isunihan_cp(M_uint32 cp);
687
688
689/*! Checks if a utf-8 sequence is unihan.
690 *
691 * \param[in] str utf-8 string.
692 * \param[out] next Start of next character. Will point to NULL terminator
693 * if last character.
694 *
695 * \return M_TRUE if unihan. Otherwise M_FALSE.
696 */
697M_API M_bool M_utf8_isunihan_chr(const char *str, const char **next);
698
699
700/*! Checks if a utf-8 string is unihan.
701 *
702 * \param[in] str utf-8 string.
703 *
704 * \return M_TRUE if unihan. Otherwise M_FALSE.
705 */
706M_API M_bool M_utf8_isunihan(const char *str);
707
708/*! @} */
709
710/*! @} */
711
712__END_DECLS
713
714#endif /* __M_UTF8_H__ */
struct M_buf M_buf_t
Definition: m_buf.h:77
M_utf8_error_t M_utf8_tolower(const char *str, char **out)
M_utf8_error_t M_utf8_tolower_chr(const char *str, char *buf, size_t buf_size, size_t *len, const char **next)
M_utf8_error_t M_utf8_tolower_cp(M_uint32 cp, M_uint32 *lower_cp)
M_utf8_error_t M_utf8_totitle(const char *str, char **out)
M_utf8_error_t M_utf8_toupper_chr_buf(const char *str, M_buf_t *buf, const char **next)
M_utf8_error_t M_utf8_totitle_cp(M_uint32 cp, M_uint32 *title_cp)
M_utf8_error_t M_utf8_toupper_cp(M_uint32 cp, M_uint32 *upper_cp)
M_utf8_error_t M_utf8_toupper_buf(const char *str, M_buf_t *buf)
M_utf8_error_t M_utf8_toupper(const char *str, char **out)
M_utf8_error_t M_utf8_totitle_chr(const char *str, char *buf, size_t buf_size, size_t *len, const char **next)
M_utf8_error_t M_utf8_totitle_chr_buf(const char *str, M_buf_t *buf, const char **next)
M_utf8_error_t M_utf8_totitle_buf(const char *str, M_buf_t *buf)
M_utf8_error_t M_utf8_tolower_chr_buf(const char *str, M_buf_t *buf, const char **next)
M_utf8_error_t M_utf8_tolower_buf(const char *str, M_buf_t *buf)
M_utf8_error_t M_utf8_toupper_chr(const char *str, char *buf, size_t buf_size, size_t *len, const char **next)
M_bool M_utf8_isprint_cp(M_uint32 cp)
M_bool M_utf8_isunihan_chr(const char *str, const char **next)
M_bool M_utf8_isprint(const char *str)
M_bool M_utf8_isalpha_cp(M_uint32 cp)
M_bool M_utf8_isunihan_cp(M_uint32 cp)
M_bool M_utf8_islower(const char *str)
M_bool M_utf8_isupper(const char *str)
M_bool M_utf8_ispunct(const char *str)
M_bool M_utf8_isalnum_chr(const char *str, const char **next)
M_bool M_utf8_isnum(const char *str)
M_bool M_utf8_ispunct_chr(const char *str, const char **next)
M_bool M_utf8_isalnum_cp(M_uint32 cp)
M_bool M_utf8_isnum_chr(const char *str, const char **next)
M_bool M_utf8_isupper_chr(const char *str, const char **next)
M_bool M_utf8_isupper_cp(M_uint32 cp)
M_bool M_utf8_isunihan(const char *str)
M_bool M_utf8_isnum_cp(M_uint32 cp)
M_bool M_utf8_islower_chr(const char *str, const char **next)
M_bool M_utf8_iscntrl_cp(M_uint32 cp)
M_bool M_utf8_isprint_chr(const char *str, const char **next)
M_bool M_utf8_iscntrl_chr(const char *str, const char **next)
M_bool M_utf8_isalpha(const char *str)
M_bool M_utf8_ispunct_cp(M_uint32 cp)
M_bool M_utf8_isalpha_chr(const char *str, const char **next)
M_bool M_utf8_iscntrl(const char *str)
M_bool M_utf8_isalnum(const char *str)
M_bool M_utf8_islower_cp(M_uint32 cp)
M_utf8_error_t M_utf8_get_chr(const char *str, char *buf, size_t buf_size, size_t *len, const char **next)
M_bool M_utf8_is_valid_cp(M_uint32 cp)
M_utf8_error_t
Definition: m_utf8.h:51
char * M_utf8_next_chr(const char *str)
M_bool M_utf8_is_valid(const char *str, const char **endptr)
size_t M_utf8_cnt(const char *str)
M_utf8_error_t M_utf8_from_cp(char *buf, size_t buf_size, size_t *len, M_uint32 cp)
M_utf8_error_t M_utf8_chr_at(const char *str, char *buf, size_t buf_size, size_t *len, size_t idx)
M_utf8_error_t M_utf8_get_chr_buf(const char *str, M_buf_t *buf, const char **next)
M_utf8_error_t M_utf8_get_cp(const char *str, M_uint32 *cp, const char **next)
M_utf8_error_t M_utf8_cp_at(const char *str, size_t idx, M_uint32 *cp)
M_utf8_error_t M_utf8_from_cp_buf(M_buf_t *buf, M_uint32 cp)
@ M_UTF8_ERROR_INVALID_PARAM
Definition: m_utf8.h:58
@ M_UTF8_ERROR_BAD_START
Definition: m_utf8.h:53
@ M_UTF8_ERROR_OVERLONG
Definition: m_utf8.h:57
@ M_UTF8_ERROR_TRUNCATED
Definition: m_utf8.h:54
@ M_UTF8_ERROR_BAD_CODE_POINT
Definition: m_utf8.h:56
@ M_UTF8_ERROR_EXPECT_CONTINUE
Definition: m_utf8.h:55
@ M_UTF8_ERROR_SUCCESS
Definition: m_utf8.h:52