| import re
|
| from indextts.utils.front import TextNormalizer
|
| from indextts.text.hindi_phonemizer import hindi_to_phoneme
|
|
|
|
|
| def test_hindi_not_through_english_phonemizer():
|
| txt = "मुझे बहुत खुशी हो रही है क्योंकि आज मेरा सपना पूरा हो गया।"
|
| tn = TextNormalizer()
|
| tn.load()
|
| normalized = tn.normalize(txt)
|
|
|
| assert re.search(r"[\u0900-\u097F]", normalized), "Normalized Hindi should contain Devanagari script"
|
|
|
|
|
| tokens = hindi_to_phoneme(txt)
|
| joined = " ".join(tokens)
|
| assert normalized == joined or joined in normalized, "Hindi must be routed to hindi_to_phoneme tokens"
|
|
|
|
|
| def test_english_and_chinese_preserved():
|
| tn = TextNormalizer()
|
| tn.load()
|
| en = "This is a test of the emergency broadcast system."
|
| zh = "我爱你。"
|
| norm_en = tn.normalize(en)
|
| norm_zh = tn.normalize(zh)
|
|
|
| assert re.search(r"[A-Za-z]", norm_en)
|
|
|
| assert re.search(r"[\u4e00-\u9fff]", norm_zh)
|
|
|