| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| UNIT * |
| FUNC (const UNIT *s, size_t n, |
| casing_prefix_context_t prefix_context, |
| casing_suffix_context_t suffix_context, |
| const char *iso639_language, |
| ucs4_t (*single_character_map) (ucs4_t), |
| size_t offset_in_rule, |
| uninorm_t nf, |
| UNIT *resultbuf, size_t *lengthp) |
| { |
| |
| UNIT *result; |
| size_t length; |
| size_t allocated; |
|
|
| |
| if (nf != NULL || resultbuf == NULL) |
| { |
| result = NULL; |
| allocated = 0; |
| } |
| else |
| { |
| result = resultbuf; |
| allocated = *lengthp; |
| } |
| length = 0; |
|
|
| { |
| const UNIT *s_end = s + n; |
|
|
| |
| |
| ucs4_t last_char_except_ignorable = |
| prefix_context.last_char_except_ignorable; |
|
|
| |
| |
| ucs4_t last_char_normal_or_above = |
| prefix_context.last_char_normal_or_above; |
|
|
| while (s < s_end) |
| { |
| ucs4_t uc; |
| int count = U_MBTOUC_UNSAFE (&uc, s, s_end - s); |
|
|
| ucs4_t mapped_uc[3]; |
| unsigned int mapped_count; |
|
|
| if (uc < 0x10000) |
| { |
| |
| char code[3]; |
|
|
| code[0] = (uc >> 8) & 0xff; |
| code[1] = uc & 0xff; |
|
|
| for (code[2] = 0; ; code[2]++) |
| { |
| const struct special_casing_rule *rule = |
| gl_unicase_special_lookup (code, 3); |
|
|
| if (rule == NULL) |
| break; |
|
|
| |
| |
| if (rule->language[0] == '\0' |
| || (iso639_language != NULL |
| && iso639_language[0] == rule->language[0] |
| && iso639_language[1] == rule->language[1])) |
| { |
| |
| int context = rule->context; |
| bool applies; |
|
|
| if (context < 0) |
| context = - context; |
| switch (context) |
| { |
| case SCC_ALWAYS: |
| applies = true; |
| break; |
|
|
| case SCC_FINAL_SIGMA: |
| |
| |
| |
| |
| |
| |
| |
| applies = uc_is_cased (last_char_except_ignorable); |
| |
| if (applies) |
| { |
| const UNIT *s2 = s + count; |
| for (;;) |
| { |
| if (s2 < s_end) |
| { |
| ucs4_t uc2; |
| int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2); |
| |
| |
| |
| |
| if (!uc_is_case_ignorable (uc2)) |
| { |
| applies = ! uc_is_cased (uc2); |
| break; |
| } |
| s2 += count2; |
| } |
| else |
| { |
| applies = ! uc_is_cased (suffix_context.first_char_except_ignorable); |
| break; |
| } |
| } |
| } |
| break; |
|
|
| case SCC_AFTER_SOFT_DOTTED: |
| |
| |
| |
| |
| applies = uc_is_property_soft_dotted (last_char_normal_or_above); |
| break; |
|
|
| case SCC_MORE_ABOVE: |
| |
| |
| |
| |
| { |
| const UNIT *s2 = s + count; |
| applies = false; |
| for (;;) |
| { |
| if (s2 < s_end) |
| { |
| ucs4_t uc2; |
| int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2); |
| int ccc = uc_combining_class (uc2); |
| if (ccc == UC_CCC_A) |
| { |
| applies = true; |
| break; |
| } |
| if (ccc == UC_CCC_NR) |
| break; |
| s2 += count2; |
| } |
| else |
| { |
| applies = ((suffix_context.bits & SCC_MORE_ABOVE_MASK) != 0); |
| break; |
| } |
| } |
| } |
| break; |
|
|
| case SCC_BEFORE_DOT: |
| |
| |
| |
| |
| |
| |
| { |
| const UNIT *s2 = s + count; |
| applies = false; |
| for (;;) |
| { |
| if (s2 < s_end) |
| { |
| ucs4_t uc2; |
| int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2); |
| if (uc2 == 0x0307) |
| { |
| applies = true; |
| break; |
| } |
| { |
| int ccc = uc_combining_class (uc2); |
| if (ccc == UC_CCC_A || ccc == UC_CCC_NR) |
| break; |
| } |
| s2 += count2; |
| } |
| else |
| { |
| applies = ((suffix_context.bits & SCC_BEFORE_DOT_MASK) != 0); |
| break; |
| } |
| } |
| } |
| break; |
|
|
| case SCC_AFTER_I: |
| |
| |
| |
| |
| applies = (last_char_normal_or_above == 'I'); |
| break; |
|
|
| default: |
| abort (); |
| } |
| if (rule->context < 0) |
| applies = !applies; |
|
|
| if (applies) |
| { |
| |
| |
| const unsigned short *mapped_in_rule = |
| (const unsigned short *)((const char *)rule + offset_in_rule); |
|
|
| if (mapped_in_rule[0] == 0) |
| mapped_count = 0; |
| else |
| { |
| mapped_uc[0] = mapped_in_rule[0]; |
| if (mapped_in_rule[1] == 0) |
| mapped_count = 1; |
| else |
| { |
| mapped_uc[1] = mapped_in_rule[1]; |
| if (mapped_in_rule[2] == 0) |
| mapped_count = 2; |
| else |
| { |
| mapped_uc[2] = mapped_in_rule[2]; |
| mapped_count = 3; |
| } |
| } |
| } |
| goto found_mapping; |
| } |
| } |
|
|
| |
| if (!rule->has_next) |
| break; |
| } |
| } |
|
|
| |
| |
| mapped_uc[0] = single_character_map (uc); |
| mapped_count = 1; |
|
|
| found_mapping: |
| |
| { |
| unsigned int i; |
|
|
| for (i = 0; i < mapped_count; i++) |
| { |
| ucs4_t muc = mapped_uc[i]; |
|
|
| |
| if (length < allocated) |
| { |
| int ret = U_UCTOMB (result + length, muc, allocated - length); |
| if (ret == -1) |
| { |
| errno = EINVAL; |
| goto fail; |
| } |
| if (ret >= 0) |
| { |
| length += ret; |
| goto done_appending; |
| } |
| } |
| { |
| size_t old_allocated = allocated; |
| size_t new_allocated = 2 * old_allocated; |
| if (new_allocated < 64) |
| new_allocated = 64; |
| if (new_allocated < old_allocated) |
| abort (); |
| { |
| UNIT *larger_result; |
| if (result == NULL) |
| { |
| larger_result = (UNIT *) malloc (new_allocated * sizeof (UNIT)); |
| if (larger_result == NULL) |
| { |
| errno = ENOMEM; |
| goto fail; |
| } |
| } |
| else if (result == resultbuf) |
| { |
| larger_result = (UNIT *) malloc (new_allocated * sizeof (UNIT)); |
| if (larger_result == NULL) |
| { |
| errno = ENOMEM; |
| goto fail; |
| } |
| U_CPY (larger_result, resultbuf, length); |
| } |
| else |
| { |
| larger_result = |
| (UNIT *) realloc (result, new_allocated * sizeof (UNIT)); |
| if (larger_result == NULL) |
| { |
| errno = ENOMEM; |
| goto fail; |
| } |
| } |
| result = larger_result; |
| allocated = new_allocated; |
| { |
| int ret = U_UCTOMB (result + length, muc, allocated - length); |
| if (ret == -1) |
| { |
| errno = EINVAL; |
| goto fail; |
| } |
| if (ret < 0) |
| abort (); |
| length += ret; |
| goto done_appending; |
| } |
| } |
| } |
| done_appending: ; |
| } |
| } |
|
|
| if (!uc_is_case_ignorable (uc)) |
| last_char_except_ignorable = uc; |
|
|
| { |
| int ccc = uc_combining_class (uc); |
| if (ccc == UC_CCC_A || ccc == UC_CCC_NR) |
| last_char_normal_or_above = uc; |
| } |
|
|
| s += count; |
| } |
| } |
|
|
| if (nf != NULL) |
| { |
| |
| UNIT *normalized_result; |
|
|
| normalized_result = U_NORMALIZE (nf, result, length, resultbuf, lengthp); |
| if (normalized_result == NULL) |
| goto fail; |
|
|
| free (result); |
| return normalized_result; |
| } |
|
|
| if (length == 0) |
| { |
| if (result == NULL) |
| { |
| |
| result = (UNIT *) malloc (1); |
| if (result == NULL) |
| { |
| errno = ENOMEM; |
| goto fail; |
| } |
| } |
| } |
| else if (result != resultbuf && length < allocated) |
| { |
| |
| UNIT *memory; |
|
|
| memory = (UNIT *) realloc (result, length * sizeof (UNIT)); |
| if (memory != NULL) |
| result = memory; |
| } |
|
|
| *lengthp = length; |
| return result; |
|
|
| fail: |
| if (result != resultbuf) |
| { |
| int saved_errno = errno; |
| free (result); |
| errno = saved_errno; |
| } |
| return NULL; |
| } |
|
|