testSpeech / text /cleaners.py
CongBang's picture
Upload folder using huggingface_hub
e3c2b9c verified
from viphoneme import vi2IPA
import re
_whitespace_re = re.compile(r'\s+')
def lowercase(text):
return text.lower()
def collapse_whitespace(text):
return re.sub(_whitespace_re, ' ', text)
def vietnamese_cleaner(text):
print(text,'test1')
text = lowercase(text) # PL_BERT_CASE
phonemes = vi2IPA(text)
phonemes = collapse_whitespace(phonemes)
return phonemes
# import MeCab
# import pykakasi
# mc = MeCab.Tagger("-Owakati")
# kks = pykakasi.kakasi()
# # define function to convert text to phonemes
# def japanese_to_phonemes(text):
# # convert text to hiragana
# result = kks.convert(text)
# hiragana = ''.join([item['hira'] for item in result])
# # convert hiragana to katakana
# katakana = kks.convert(hiragana)
# katakana = ''.join([item['kana'] for item in katakana])
# # convert katakana to romaji (phonemes)
# romaji = kks.convert(katakana)
# romaji = ''.join([item['hepburn'] for item in romaji])
# return romaji
# def japanese_cleaner(text):
# cleaned_text = mc.parse(text)
# phonemes = japanese_to_phonemes(cleaned_text)
# return phonemes