| // © 2017 and later: Unicode, Inc. and others. | |
| // License & terms of use: http://www.unicode.org/copyright.html | |
| // casemap.h | |
| // created: 2017jan12 Markus W. Scherer | |
| /** | |
| * \file | |
| * \brief C++ API: Low-level C++ case mapping functions. | |
| */ | |
| U_NAMESPACE_BEGIN | |
| class BreakIterator; | |
| class ByteSink; | |
| class Edits; | |
| /** | |
| * Low-level C++ case mapping functions. | |
| * | |
| * @stable ICU 59 | |
| */ | |
| class U_COMMON_API CaseMap U_FINAL : public UMemory { | |
| public: | |
| /** | |
| * Lowercases a UTF-16 string and optionally records edits. | |
| * Casing is locale-dependent and context-sensitive. | |
| * The result may be longer or shorter than the original. | |
| * The source string and the destination buffer must not overlap. | |
| * | |
| * @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
| * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. | |
| * @param src The original string. | |
| * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
| * @param dest A buffer for the result string. The result will be NUL-terminated if | |
| * the buffer is large enough. | |
| * The contents is undefined in case of failure. | |
| * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then | |
| * dest may be NULL and the function will only return the length of the result | |
| * without writing any of the result string. | |
| * @param edits Records edits for index mapping, working with styled text, | |
| * and getting only changes (if any). | |
| * The Edits contents is undefined if any error occurs. | |
| * This function calls edits->reset() first unless | |
| * options includes U_EDITS_NO_RESET. edits can be NULL. | |
| * @param errorCode Reference to an in/out error code value | |
| * which must not indicate a failure before the function call. | |
| * @return The length of the result string, if successful. | |
| * When the result would be longer than destCapacity, | |
| * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
| * | |
| * @see u_strToLower | |
| * @stable ICU 59 | |
| */ | |
| static int32_t toLower( | |
| const char *locale, uint32_t options, | |
| const char16_t *src, int32_t srcLength, | |
| char16_t *dest, int32_t destCapacity, Edits *edits, | |
| UErrorCode &errorCode); | |
| /** | |
| * Uppercases a UTF-16 string and optionally records edits. | |
| * Casing is locale-dependent and context-sensitive. | |
| * The result may be longer or shorter than the original. | |
| * The source string and the destination buffer must not overlap. | |
| * | |
| * @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
| * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. | |
| * @param src The original string. | |
| * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
| * @param dest A buffer for the result string. The result will be NUL-terminated if | |
| * the buffer is large enough. | |
| * The contents is undefined in case of failure. | |
| * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then | |
| * dest may be NULL and the function will only return the length of the result | |
| * without writing any of the result string. | |
| * @param edits Records edits for index mapping, working with styled text, | |
| * and getting only changes (if any). | |
| * The Edits contents is undefined if any error occurs. | |
| * This function calls edits->reset() first unless | |
| * options includes U_EDITS_NO_RESET. edits can be NULL. | |
| * @param errorCode Reference to an in/out error code value | |
| * which must not indicate a failure before the function call. | |
| * @return The length of the result string, if successful. | |
| * When the result would be longer than destCapacity, | |
| * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
| * | |
| * @see u_strToUpper | |
| * @stable ICU 59 | |
| */ | |
| static int32_t toUpper( | |
| const char *locale, uint32_t options, | |
| const char16_t *src, int32_t srcLength, | |
| char16_t *dest, int32_t destCapacity, Edits *edits, | |
| UErrorCode &errorCode); | |
| /** | |
| * Titlecases a UTF-16 string and optionally records edits. | |
| * Casing is locale-dependent and context-sensitive. | |
| * The result may be longer or shorter than the original. | |
| * The source string and the destination buffer must not overlap. | |
| * | |
| * Titlecasing uses a break iterator to find the first characters of words | |
| * that are to be titlecased. It titlecases those characters and lowercases | |
| * all others. (This can be modified with options bits.) | |
| * | |
| * @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
| * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, | |
| * U_TITLECASE_NO_LOWERCASE, | |
| * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, | |
| * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. | |
| * @param iter A break iterator to find the first characters of words that are to be titlecased. | |
| * It is set to the source string (setText()) | |
| * and used one or more times for iteration (first() and next()). | |
| * If NULL, then a word break iterator for the locale is used | |
| * (or something equivalent). | |
| * @param src The original string. | |
| * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
| * @param dest A buffer for the result string. The result will be NUL-terminated if | |
| * the buffer is large enough. | |
| * The contents is undefined in case of failure. | |
| * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then | |
| * dest may be NULL and the function will only return the length of the result | |
| * without writing any of the result string. | |
| * @param edits Records edits for index mapping, working with styled text, | |
| * and getting only changes (if any). | |
| * The Edits contents is undefined if any error occurs. | |
| * This function calls edits->reset() first unless | |
| * options includes U_EDITS_NO_RESET. edits can be NULL. | |
| * @param errorCode Reference to an in/out error code value | |
| * which must not indicate a failure before the function call. | |
| * @return The length of the result string, if successful. | |
| * When the result would be longer than destCapacity, | |
| * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
| * | |
| * @see u_strToTitle | |
| * @see ucasemap_toTitle | |
| * @stable ICU 59 | |
| */ | |
| static int32_t toTitle( | |
| const char *locale, uint32_t options, BreakIterator *iter, | |
| const char16_t *src, int32_t srcLength, | |
| char16_t *dest, int32_t destCapacity, Edits *edits, | |
| UErrorCode &errorCode); | |
| /** | |
| * Case-folds a UTF-16 string and optionally records edits. | |
| * | |
| * Case folding is locale-independent and not context-sensitive, | |
| * but there is an option for whether to include or exclude mappings for dotted I | |
| * and dotless i that are marked with 'T' in CaseFolding.txt. | |
| * | |
| * The result may be longer or shorter than the original. | |
| * The source string and the destination buffer must not overlap. | |
| * | |
| * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, | |
| * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. | |
| * @param src The original string. | |
| * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
| * @param dest A buffer for the result string. The result will be NUL-terminated if | |
| * the buffer is large enough. | |
| * The contents is undefined in case of failure. | |
| * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then | |
| * dest may be NULL and the function will only return the length of the result | |
| * without writing any of the result string. | |
| * @param edits Records edits for index mapping, working with styled text, | |
| * and getting only changes (if any). | |
| * The Edits contents is undefined if any error occurs. | |
| * This function calls edits->reset() first unless | |
| * options includes U_EDITS_NO_RESET. edits can be NULL. | |
| * @param errorCode Reference to an in/out error code value | |
| * which must not indicate a failure before the function call. | |
| * @return The length of the result string, if successful. | |
| * When the result would be longer than destCapacity, | |
| * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
| * | |
| * @see u_strFoldCase | |
| * @stable ICU 59 | |
| */ | |
| static int32_t fold( | |
| uint32_t options, | |
| const char16_t *src, int32_t srcLength, | |
| char16_t *dest, int32_t destCapacity, Edits *edits, | |
| UErrorCode &errorCode); | |
| /** | |
| * Lowercases a UTF-8 string and optionally records edits. | |
| * Casing is locale-dependent and context-sensitive. | |
| * The result may be longer or shorter than the original. | |
| * | |
| * @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
| * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. | |
| * @param src The original string. | |
| * @param sink A ByteSink to which the result string is written. | |
| * sink.Flush() is called at the end. | |
| * @param edits Records edits for index mapping, working with styled text, | |
| * and getting only changes (if any). | |
| * The Edits contents is undefined if any error occurs. | |
| * This function calls edits->reset() first unless | |
| * options includes U_EDITS_NO_RESET. edits can be NULL. | |
| * @param errorCode Reference to an in/out error code value | |
| * which must not indicate a failure before the function call. | |
| * | |
| * @see ucasemap_utf8ToLower | |
| * @stable ICU 60 | |
| */ | |
| static void utf8ToLower( | |
| const char *locale, uint32_t options, | |
| StringPiece src, ByteSink &sink, Edits *edits, | |
| UErrorCode &errorCode); | |
| /** | |
| * Uppercases a UTF-8 string and optionally records edits. | |
| * Casing is locale-dependent and context-sensitive. | |
| * The result may be longer or shorter than the original. | |
| * | |
| * @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
| * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. | |
| * @param src The original string. | |
| * @param sink A ByteSink to which the result string is written. | |
| * sink.Flush() is called at the end. | |
| * @param edits Records edits for index mapping, working with styled text, | |
| * and getting only changes (if any). | |
| * The Edits contents is undefined if any error occurs. | |
| * This function calls edits->reset() first unless | |
| * options includes U_EDITS_NO_RESET. edits can be NULL. | |
| * @param errorCode Reference to an in/out error code value | |
| * which must not indicate a failure before the function call. | |
| * | |
| * @see ucasemap_utf8ToUpper | |
| * @stable ICU 60 | |
| */ | |
| static void utf8ToUpper( | |
| const char *locale, uint32_t options, | |
| StringPiece src, ByteSink &sink, Edits *edits, | |
| UErrorCode &errorCode); | |
| /** | |
| * Titlecases a UTF-8 string and optionally records edits. | |
| * Casing is locale-dependent and context-sensitive. | |
| * The result may be longer or shorter than the original. | |
| * | |
| * Titlecasing uses a break iterator to find the first characters of words | |
| * that are to be titlecased. It titlecases those characters and lowercases | |
| * all others. (This can be modified with options bits.) | |
| * | |
| * @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
| * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, | |
| * U_TITLECASE_NO_LOWERCASE, | |
| * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, | |
| * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. | |
| * @param iter A break iterator to find the first characters of words that are to be titlecased. | |
| * It is set to the source string (setUText()) | |
| * and used one or more times for iteration (first() and next()). | |
| * If NULL, then a word break iterator for the locale is used | |
| * (or something equivalent). | |
| * @param src The original string. | |
| * @param sink A ByteSink to which the result string is written. | |
| * sink.Flush() is called at the end. | |
| * @param edits Records edits for index mapping, working with styled text, | |
| * and getting only changes (if any). | |
| * The Edits contents is undefined if any error occurs. | |
| * This function calls edits->reset() first unless | |
| * options includes U_EDITS_NO_RESET. edits can be NULL. | |
| * @param errorCode Reference to an in/out error code value | |
| * which must not indicate a failure before the function call. | |
| * | |
| * @see ucasemap_utf8ToTitle | |
| * @stable ICU 60 | |
| */ | |
| static void utf8ToTitle( | |
| const char *locale, uint32_t options, BreakIterator *iter, | |
| StringPiece src, ByteSink &sink, Edits *edits, | |
| UErrorCode &errorCode); | |
| /** | |
| * Case-folds a UTF-8 string and optionally records edits. | |
| * | |
| * Case folding is locale-independent and not context-sensitive, | |
| * but there is an option for whether to include or exclude mappings for dotted I | |
| * and dotless i that are marked with 'T' in CaseFolding.txt. | |
| * | |
| * The result may be longer or shorter than the original. | |
| * | |
| * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. | |
| * @param src The original string. | |
| * @param sink A ByteSink to which the result string is written. | |
| * sink.Flush() is called at the end. | |
| * @param edits Records edits for index mapping, working with styled text, | |
| * and getting only changes (if any). | |
| * The Edits contents is undefined if any error occurs. | |
| * This function calls edits->reset() first unless | |
| * options includes U_EDITS_NO_RESET. edits can be NULL. | |
| * @param errorCode Reference to an in/out error code value | |
| * which must not indicate a failure before the function call. | |
| * | |
| * @see ucasemap_utf8FoldCase | |
| * @stable ICU 60 | |
| */ | |
| static void utf8Fold( | |
| uint32_t options, | |
| StringPiece src, ByteSink &sink, Edits *edits, | |
| UErrorCode &errorCode); | |
| /** | |
| * Lowercases a UTF-8 string and optionally records edits. | |
| * Casing is locale-dependent and context-sensitive. | |
| * The result may be longer or shorter than the original. | |
| * The source string and the destination buffer must not overlap. | |
| * | |
| * @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
| * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. | |
| * @param src The original string. | |
| * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
| * @param dest A buffer for the result string. The result will be NUL-terminated if | |
| * the buffer is large enough. | |
| * The contents is undefined in case of failure. | |
| * @param destCapacity The size of the buffer (number of bytes). If it is 0, then | |
| * dest may be NULL and the function will only return the length of the result | |
| * without writing any of the result string. | |
| * @param edits Records edits for index mapping, working with styled text, | |
| * and getting only changes (if any). | |
| * The Edits contents is undefined if any error occurs. | |
| * This function calls edits->reset() first unless | |
| * options includes U_EDITS_NO_RESET. edits can be NULL. | |
| * @param errorCode Reference to an in/out error code value | |
| * which must not indicate a failure before the function call. | |
| * @return The length of the result string, if successful. | |
| * When the result would be longer than destCapacity, | |
| * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
| * | |
| * @see ucasemap_utf8ToLower | |
| * @stable ICU 59 | |
| */ | |
| static int32_t utf8ToLower( | |
| const char *locale, uint32_t options, | |
| const char *src, int32_t srcLength, | |
| char *dest, int32_t destCapacity, Edits *edits, | |
| UErrorCode &errorCode); | |
| /** | |
| * Uppercases a UTF-8 string and optionally records edits. | |
| * Casing is locale-dependent and context-sensitive. | |
| * The result may be longer or shorter than the original. | |
| * The source string and the destination buffer must not overlap. | |
| * | |
| * @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
| * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. | |
| * @param src The original string. | |
| * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
| * @param dest A buffer for the result string. The result will be NUL-terminated if | |
| * the buffer is large enough. | |
| * The contents is undefined in case of failure. | |
| * @param destCapacity The size of the buffer (number of bytes). If it is 0, then | |
| * dest may be NULL and the function will only return the length of the result | |
| * without writing any of the result string. | |
| * @param edits Records edits for index mapping, working with styled text, | |
| * and getting only changes (if any). | |
| * The Edits contents is undefined if any error occurs. | |
| * This function calls edits->reset() first unless | |
| * options includes U_EDITS_NO_RESET. edits can be NULL. | |
| * @param errorCode Reference to an in/out error code value | |
| * which must not indicate a failure before the function call. | |
| * @return The length of the result string, if successful. | |
| * When the result would be longer than destCapacity, | |
| * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
| * | |
| * @see ucasemap_utf8ToUpper | |
| * @stable ICU 59 | |
| */ | |
| static int32_t utf8ToUpper( | |
| const char *locale, uint32_t options, | |
| const char *src, int32_t srcLength, | |
| char *dest, int32_t destCapacity, Edits *edits, | |
| UErrorCode &errorCode); | |
| /** | |
| * Titlecases a UTF-8 string and optionally records edits. | |
| * Casing is locale-dependent and context-sensitive. | |
| * The result may be longer or shorter than the original. | |
| * The source string and the destination buffer must not overlap. | |
| * | |
| * Titlecasing uses a break iterator to find the first characters of words | |
| * that are to be titlecased. It titlecases those characters and lowercases | |
| * all others. (This can be modified with options bits.) | |
| * | |
| * @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
| * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, | |
| * U_TITLECASE_NO_LOWERCASE, | |
| * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, | |
| * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. | |
| * @param iter A break iterator to find the first characters of words that are to be titlecased. | |
| * It is set to the source string (setUText()) | |
| * and used one or more times for iteration (first() and next()). | |
| * If NULL, then a word break iterator for the locale is used | |
| * (or something equivalent). | |
| * @param src The original string. | |
| * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
| * @param dest A buffer for the result string. The result will be NUL-terminated if | |
| * the buffer is large enough. | |
| * The contents is undefined in case of failure. | |
| * @param destCapacity The size of the buffer (number of bytes). If it is 0, then | |
| * dest may be NULL and the function will only return the length of the result | |
| * without writing any of the result string. | |
| * @param edits Records edits for index mapping, working with styled text, | |
| * and getting only changes (if any). | |
| * The Edits contents is undefined if any error occurs. | |
| * This function calls edits->reset() first unless | |
| * options includes U_EDITS_NO_RESET. edits can be NULL. | |
| * @param errorCode Reference to an in/out error code value | |
| * which must not indicate a failure before the function call. | |
| * @return The length of the result string, if successful. | |
| * When the result would be longer than destCapacity, | |
| * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
| * | |
| * @see ucasemap_utf8ToTitle | |
| * @stable ICU 59 | |
| */ | |
| static int32_t utf8ToTitle( | |
| const char *locale, uint32_t options, BreakIterator *iter, | |
| const char *src, int32_t srcLength, | |
| char *dest, int32_t destCapacity, Edits *edits, | |
| UErrorCode &errorCode); | |
| /** | |
| * Case-folds a UTF-8 string and optionally records edits. | |
| * | |
| * Case folding is locale-independent and not context-sensitive, | |
| * but there is an option for whether to include or exclude mappings for dotted I | |
| * and dotless i that are marked with 'T' in CaseFolding.txt. | |
| * | |
| * The result may be longer or shorter than the original. | |
| * The source string and the destination buffer must not overlap. | |
| * | |
| * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, | |
| * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. | |
| * @param src The original string. | |
| * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
| * @param dest A buffer for the result string. The result will be NUL-terminated if | |
| * the buffer is large enough. | |
| * The contents is undefined in case of failure. | |
| * @param destCapacity The size of the buffer (number of bytes). If it is 0, then | |
| * dest may be NULL and the function will only return the length of the result | |
| * without writing any of the result string. | |
| * @param edits Records edits for index mapping, working with styled text, | |
| * and getting only changes (if any). | |
| * The Edits contents is undefined if any error occurs. | |
| * This function calls edits->reset() first unless | |
| * options includes U_EDITS_NO_RESET. edits can be NULL. | |
| * @param errorCode Reference to an in/out error code value | |
| * which must not indicate a failure before the function call. | |
| * @return The length of the result string, if successful. | |
| * When the result would be longer than destCapacity, | |
| * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
| * | |
| * @see ucasemap_utf8FoldCase | |
| * @stable ICU 59 | |
| */ | |
| static int32_t utf8Fold( | |
| uint32_t options, | |
| const char *src, int32_t srcLength, | |
| char *dest, int32_t destCapacity, Edits *edits, | |
| UErrorCode &errorCode); | |
| private: | |
| CaseMap() = delete; | |
| CaseMap(const CaseMap &other) = delete; | |
| CaseMap &operator=(const CaseMap &other) = delete; | |
| }; | |
| U_NAMESPACE_END | |