| from viphoneme import vi2IPA | |
| import re | |
| _whitespace_re = re.compile(r'\s+') | |
| def lowercase(text): | |
| return text.lower() | |
| def collapse_whitespace(text): | |
| return re.sub(_whitespace_re, ' ', text) | |
| def vietnamese_cleaner(text): | |
| print(text,'test1') | |
| text = lowercase(text) # PL_BERT_CASE | |
| phonemes = vi2IPA(text) | |
| phonemes = collapse_whitespace(phonemes) | |
| return phonemes | |
| # import MeCab | |
| # import pykakasi | |
| # mc = MeCab.Tagger("-Owakati") | |
| # kks = pykakasi.kakasi() | |
| # # define function to convert text to phonemes | |
| # def japanese_to_phonemes(text): | |
| # # convert text to hiragana | |
| # result = kks.convert(text) | |
| # hiragana = ''.join([item['hira'] for item in result]) | |
| # # convert hiragana to katakana | |
| # katakana = kks.convert(hiragana) | |
| # katakana = ''.join([item['kana'] for item in katakana]) | |
| # # convert katakana to romaji (phonemes) | |
| # romaji = kks.convert(katakana) | |
| # romaji = ''.join([item['hepburn'] for item in romaji]) | |
| # return romaji | |
| # def japanese_cleaner(text): | |
| # cleaned_text = mc.parse(text) | |
| # phonemes = japanese_to_phonemes(cleaned_text) | |
| # return phonemes |