from viphoneme import vi2IPA import re _whitespace_re = re.compile(r'\s+') def lowercase(text): return text.lower() def collapse_whitespace(text): return re.sub(_whitespace_re, ' ', text) def vietnamese_cleaner(text): print(text,'test1') text = lowercase(text) # PL_BERT_CASE phonemes = vi2IPA(text) phonemes = collapse_whitespace(phonemes) return phonemes # import MeCab # import pykakasi # mc = MeCab.Tagger("-Owakati") # kks = pykakasi.kakasi() # # define function to convert text to phonemes # def japanese_to_phonemes(text): # # convert text to hiragana # result = kks.convert(text) # hiragana = ''.join([item['hira'] for item in result]) # # convert hiragana to katakana # katakana = kks.convert(hiragana) # katakana = ''.join([item['kana'] for item in katakana]) # # convert katakana to romaji (phonemes) # romaji = kks.convert(katakana) # romaji = ''.join([item['hepburn'] for item in romaji]) # return romaji # def japanese_cleaner(text): # cleaned_text = mc.parse(text) # phonemes = japanese_to_phonemes(cleaned_text) # return phonemes