| |
|
|
| import re |
|
|
| |
|
|
| ro_specials = [ |
| ['ඓ', 'ai'], |
| ['ඖ', 'au'], |
| ['ඍ', 'ṛ'], |
| ['ඎ', 'ṝ'], |
| ['ඐ', 'ḹ'], |
| ['අ', 'a'], |
| ['ආ', 'ā'], |
| ['ඇ', 'æ'], ['ඇ', 'Æ'], |
| ['ඈ', 'ǣ'], |
| ['ඉ', 'i'], |
| ['ඊ', 'ī'], |
| ['උ', 'u'], |
| ['ඌ', 'ū'], |
| ['එ', 'e'], |
| ['ඒ', 'ē'], |
| ['ඔ', 'o'], |
| ['ඕ', 'ō'], |
| ['ඞ්', 'ṅ'], |
| ['ං', 'ṁ'], ['ං', 'ṃ'], |
| ['ඃ', 'ḥ'], ['ඃ', 'Ḥ'], |
| ] |
|
|
| |
|
|
| ro_consonants = [ |
| ['ඛ', 'kh'], |
| ['ඨ', 'ṭh'], |
| ['ඝ', 'gh'], |
| ['ඡ', 'ch'], |
| ['ඣ', 'jh'], |
| ['ඦ', 'ñj'], |
| ['ඪ', 'ḍh'], |
| ['ඬ', 'ṇḍ'], |
| ['ථ', 'th'], |
| ['ධ', 'dh'], |
| ['ඵ', 'ph'], |
| ['භ', 'bh'], |
| ['ඹ', 'mb'], |
| ['ඳ', 'ṉd'], |
| ['ඟ', 'ṉg'], |
| ['ඥ', 'gn'], |
| ['ක', 'k'], |
| ['ග', 'g'], |
| ['ච', 'c'], |
| ['ජ', 'j'], |
| ['ඤ', 'ñ'], |
| ['ට', 'ṭ'], |
| ['ඩ', 'ḍ'], |
| ['ණ', 'ṇ'], |
| ['ත', 't'], |
| ['ද', 'd'], |
| ['න', 'n'], |
| ['ප', 'p'], |
| ['බ', 'b'], |
| ['ම', 'm'], |
| ['ය', 'y'], |
| ['ර', 'r'], |
| ['ල', 'l'], |
| ['ව', 'v'], |
| ['ශ', 'ś'], |
| ['ෂ', 'ş'], ['ෂ', 'ṣ'], |
| ['ස', 's'], |
| ['හ', 'h'], |
| ['ළ', 'ḷ'], |
| ['ෆ', 'f'] |
| ] |
|
|
| |
|
|
| ro_combinations = [ |
| ['', '', '්'], |
| ['', 'a', ''], |
| ['', 'ā', 'ා'], |
| ['', 'æ', 'ැ'], |
| ['', 'ǣ', 'ෑ'], |
| ['', 'i', 'ි'], |
| ['', 'ī', 'ී'], |
| ['', 'u', 'ු'], |
| ['', 'ū', 'ූ'], |
| ['', 'e', 'ෙ'], |
| ['', 'ē', 'ේ'], |
| ['', 'ai', 'ෛ'], |
| ['', 'o', 'ො'], |
| ['', 'ō', 'ෝ'], |
| ['', 'ṛ', 'ෘ'], |
| ['', 'ṝ', 'ෲ'], |
| ['', 'au', 'ෞ'], |
| ['', 'ḹ', 'ෳ'] |
| ] |
|
|
| |
|
|
| def create_conso_combi(combinations, consonants): |
| conso_combi = [] |
| for combi in combinations: |
| for conso in consonants: |
| base_sinh = conso[0] + combi[2] |
| base_rom = combi[0] + conso[1] + combi[1] |
| conso_combi.append((base_sinh, base_rom)) |
| return conso_combi |
|
|
| ro_conso_combi = create_conso_combi(ro_combinations, ro_consonants) |
|
|
| |
| def replace_all(text, mapping): |
|
|
| |
| mapping = sorted(mapping, key=lambda x: len(x[0]), reverse=True) |
| for sinh, rom in mapping: |
| text = re.sub(sinh, rom, text) |
| return text |
|
|
| |
| def sinhala_to_roman(text): |
|
|
| |
| text = text.replace("\u200D", "") |
|
|
| |
| text = replace_all(text, ro_conso_combi) |
|
|
| |
| text = replace_all(text, ro_specials) |
| return text |
|
|