File size: 1,847 Bytes
eba757f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | """
Rule-based phonetic transliteration engine (Singlish → Sinhala Unicode).
"""
from core.mappings import (
CONSONANTS, CONSONANTS_UNI,
VOWELS, VOWELS_UNI, VOWEL_MODIFIERS_UNI,
SPECIAL_CONSONANTS, SPECIAL_CONSONANTS_UNI,
SPECIAL_CHARS, SPECIAL_CHARS_UNI,
N_VOWELS,
)
def rule_based_transliterate(text: str) -> str:
"""
Convert Romanized Singlish text to Sinhala script using phonetic rules.
Replacement order matters: longer patterns are consumed first so that
greedy left-to-right substitution produces correct output.
"""
# 1. Special consonants (anusvara, visarga, etc.)
for pat, uni in zip(SPECIAL_CONSONANTS, SPECIAL_CONSONANTS_UNI):
text = text.replace(pat, uni)
# 2. Consonant + special-char combinations (e.g., kru → කෘ)
for sc, sc_uni in zip(SPECIAL_CHARS, SPECIAL_CHARS_UNI):
for cons, cons_uni in zip(CONSONANTS, CONSONANTS_UNI):
text = text.replace(cons + sc, cons_uni + sc_uni)
# 3. Consonant + ra + vowel clusters (e.g., kra → ක්රා)
for cons, cons_uni in zip(CONSONANTS, CONSONANTS_UNI):
for vow, vmod in zip(VOWELS, VOWEL_MODIFIERS_UNI):
text = text.replace(cons + "r" + vow, cons_uni + "්ර" + vmod)
text = text.replace(cons + "r", cons_uni + "්ර")
# 4. Consonant + vowel combinations
for cons, cons_uni in zip(CONSONANTS, CONSONANTS_UNI):
for j in range(N_VOWELS):
text = text.replace(cons + VOWELS[j], cons_uni + VOWEL_MODIFIERS_UNI[j])
# 5. Bare consonants → consonant + hal (virama)
for cons, cons_uni in zip(CONSONANTS, CONSONANTS_UNI):
text = text.replace(cons, cons_uni + "්")
# 6. Standalone vowels
for vow, vow_uni in zip(VOWELS, VOWELS_UNI):
text = text.replace(vow, vow_uni)
return text
|