Spaces:

LEMAS-Project
/

LEMAS-Edit

Running on Zero

App Files Files Community

LEMAS-Edit / lemas_tts /infer /frontend.py

Approximetal

Upload folder using huggingface_hub

f36e46d verified 14 days ago

raw

history blame contribute delete

10.8 kB

	import os, re, regex
	import langid
	import uroman as ur
	import jieba, zhconv
	from num2words import num2words

	jieba.set_dictionary(dictionary_path=os.path.join(os.path.dirname(__file__) + "/../infer/text_norm/jieba_dict.txt"))
	# from pypinyin.core import Pinyin
	from pypinyin import pinyin, lazy_pinyin, Style

	from .text_norm.txt2pinyin import _PAUSE_SYMBOL, get_phoneme_from_char_and_pinyin
	from .text_norm.cn_tn import NSWNormalizer
	from .text_norm.tokenizer import TextTokenizer, txt2phone
	from pypinyin.contrib.tone_convert import to_initials, to_finals_tone3
	from pypinyin_dict.phrase_pinyin_data import large_pinyin # large_pinyin # cc_cedict
	large_pinyin.load()

	class TextNorm():
	def __init__(self, dtype="phone"):
	# my_pinyin = Pinyin(MyConverter())
	# self.pinyin_parser = my_pinyin.pinyin
	cmn_lexicon = open(os.path.join(os.path.dirname(__file__)+'/../infer/text_norm/pinyin-lexicon-r.txt'),'r', encoding="utf-8").readlines()
	cmn_lexicon = [x.strip().split() for x in cmn_lexicon]
	self.cmn_dict = {x[0]:x[1:] for x in cmn_lexicon}
	langid.set_languages(['es','pt','zh','en','de','fr','it','ru', 'vi','id','th','ja','ko','ar'])
	langs = {"en":"en-us", "it":"it", "es":"es", "pt":"pt-br", "fr":"fr-fr", "de":"de", "ru":"ru", "vi":"vi", "id":"id", "th":"th", "ja":"ja", "ko":"ko"} # "zh":"cmn", "cmn":"cmn", "ar":"ar-sa"}
	text_tokenizer = {}
	for k,v in langs.items():
	tokenizer = TextTokenizer(language=v, backend="espeak")
	lang = "zh" if k == "cmn" else k
	text_tokenizer[k] = (lang, tokenizer)
	self.text_tokenizer = text_tokenizer
	self.cn_tn = NSWNormalizer()
	self.dtype = dtype

	def detect_lang(self, text):
	lang, _ = langid.classify(text)[0]
	return lang

	def sil_type(self, time_s):
	if round(time_s) < 0.4:
	return ""
	elif round(time_s) >= 0.4 and round(time_s) < 0.8:
	return "#1"
	elif round(time_s) >= 0.8 and round(time_s) < 1.5:
	return "#2"
	elif round(time_s) >= 1.5 and round(time_s) < 3.0:
	return "#3"
	elif round(time_s) >= 3.0:
	return "#4"


	def add_sil_raw(self, sub_list, start_time, end_time, target_transcript):
	txt = []
	txt_list = [x["word"] for x in sub_list]
	sil = self.sil_type(sub_list[0]["start"])
	if len(sil) > 0:
	txt.append(sil)
	txt.append(txt_list[0])
	for i in range(1, len(sub_list)):
	if sub_list[i]["start"] >= start_time and sub_list[i]["end"] <= end_time:
	txt.append(target_transcript)
	target_transcript = ""
	else:
	sil = self.sil_type(sub_list[i]["start"] - sub_list[i-1]["end"])
	if len(sil) > 0:
	txt.append(sil)
	txt.append(txt_list[i])
	return ' '.join(txt)

	def add_sil(self, sub_list, start_time, end_time, target_transcript, src_lang, tar_lang):
	txts = []
	txt_list = [x["word"] for x in sub_list]
	sil = self.sil_type(sub_list[0]["start"])
	if len(sil) > 0:
	txts.append([src_lang, sil])

	if sub_list[0]["start"] < start_time:
	txts.append([src_lang, txt_list[0]])
	for i in range(1, len(sub_list)):
	if sub_list[i]["start"] >= start_time and sub_list[i]["end"] <= end_time:
	txts.append([tar_lang, target_transcript])
	target_transcript = ""
	else:
	sil = self.sil_type(sub_list[i]["start"] - sub_list[i-1]["end"])
	if len(sil) > 0:
	txts.append([src_lang, sil])
	txts.append([src_lang, txt_list[i]])

	target_txt = [txts[0]]
	for txt in txts[1:]:
	if txt[1] == "":
	continue
	if txt[0] != target_txt[-1][0]:
	target_txt.append([txt[0], ""])
	target_txt[-1][-1] += " " + txt[1]

	return target_txt

	def replace_numbers_with_words(self, sentence, lang="en"):
	sentence = re.sub(r'(\d+)', r' \1 ', sentence) # add spaces around numbers

	def replace_with_words(match):
	num = match.group(0)
	try:
	return num2words(num, lang=lang) # Convert numbers to words
	except:
	return num # In case num2words fails (unlikely with digits but just to be safe)
	return re.sub(r'\b\d+\b', replace_with_words, sentence) # Regular expression that matches numbers


	def get_prompt(self, sub_list, start_time, end_time, src_lang):
	txts = []
	txt_list = [x["word"] for x in sub_list]

	if start_time <= sub_list[0]["start"]:
	sil = self.sil_type(sub_list[0]["start"])
	if len(sil) > 0:
	txts.append([src_lang, sil])
	txts.append([src_lang, txt_list[0]])

	for i in range(1, len(sub_list)):
	# if sub_list[i]["start"] <= start_time and sub_list[i]["end"] <= end_time:
	# txts.append([tar_lang, target_transcript])
	# target_transcript = ""
	if sub_list[i]["start"] >= start_time and sub_list[i]["end"] <= end_time:
	sil = self.sil_type(sub_list[i]["start"] - sub_list[i-1]["end"])
	if len(sil) > 0:
	txts.append([src_lang, sil])
	txts.append([src_lang, txt_list[i]])

	target_txt = [txts[0]]
	for txt in txts[1:]:
	if txt[1] == "":
	continue
	if txt[0] != target_txt[-1][0]:
	target_txt.append([txt[0], ""])
	target_txt[-1][-1] += " " + txt[1]
	return target_txt


	def txt2pinyin(self, text):
	txts, phonemes = [], []
	texts = re.split(r"(#\d)", text)
	print("before norm: ", texts)
	for text in texts:
	if text in {'#1', '#2', '#3', '#4'}:
	txts.append(text)
	phonemes.append(text)
	continue
	text = self.cn_tn.normalize(text.strip())

	text_list = list(jieba.cut(text))
	print("jieba cut: ", text, text_list)
	for words in text_list:
	if words in _PAUSE_SYMBOL:
	# phonemes[-1] += _PAUSE_SYMBOL[words]
	phonemes.append(_PAUSE_SYMBOL[words])
	# phonemes.append('#1')
	txts[-1] += words
	elif re.search("[\u4e00-\u9fa5]+", words):
	# pinyin = self.pinyin_parser(words, style=Style.TONE3, errors="ignore")
	pinyin = lazy_pinyin(words, style=Style.TONE3, tone_sandhi=True, neutral_tone_with_five=True)
	new_pinyin = []
	for x in pinyin:
	x = "".join(x)
	if "#" not in x:
	new_pinyin.append(x)
	else:
	phonemes.append(words)
	continue
	# new_pinyin = change_tone_in_bu_or_yi(words, new_pinyin) if len(words)>1 and words[-1] not in {"一","不"} else new_pinyin
	phoneme = get_phoneme_from_char_and_pinyin(words, new_pinyin)
	phonemes += phoneme
	txts += list(words)
	elif re.search(r"[a-zA-Z]", words) or re.search(r"#[1-4]", words):
	phonemes.append(words.upper())
	txts.append(words.upper())
	# phonemes.append("#1")
	# phones = " ".join(phonemes)
	return txts, phonemes


	def txt2pin_phns(self, text):
	text = re.sub(r'(?<! )(' + r'[^\w\s]' + r')', r' \1', text)
	text = re.sub(r'\s+', ' ', text).strip()

	# print(text.split(" "))
	res_list = []
	for txt in text.split(" "):
	if txt in self.cmn_dict:
	# res_list += ["(zh)" + x for x in self.cmn_dict[txt]]
	res_list.append("(zh)")
	res_list.append(to_initials(txt, strict=False))
	res_list.append(to_finals_tone3(txt, neutral_tone_with_five=True))
	elif txt == '':
	continue
	elif txt[0] in {"#1", "#2", "#3", "#4"} or not bool(regex.search(r'\p{L}', txt[0][0])):
	if len(res_list) > 0 and res_list[-1] == "_":
	res_list.pop()
	res_list += [txt]
	continue
	else:
	if len(res_list) > 0 and res_list[-1] == "_":
	res_list.pop()
	lang = langid.classify(txt)[0]
	lang = lang if lang in self.text_tokenizer else "en"
	tokenizer = self.text_tokenizer[lang][1]
	ipa = tokenizer.backend.phonemize([txt], separator=tokenizer.separator, strip=True, njobs=1)
	phns = ipa[0] if ipa[0][0] == "(" else f"({lang})_" + ipa[0]
	res_list += phns.replace("_", "\|_\|").split("\|")

	# lang = phns.split(")")[0][1:]
	# phns = phns[len(lang)+3:].replace("_", "\|_\|")
	# phns = phns.split("\|")
	# for i in range(len(phns)):
	# if phns[i] not in {"#1", "#2", "#3", "#4", "_", ",", ".", "?", "!"}:
	# phns[i] = f"({lang})" + phns[i]
	# res_list += phns
	res_list.append("_")
	res = "\|".join(res_list)
	res = re.sub(r'(\\|_)+', '\|_', res)
	return res


	def text2phn(self, sentence, lang=None):
	if not lang:
	lang = langid.classify(sentence)[0]
	if re.search("[\u4e00-\u9fa5]+", sentence):
	txts, phones = self.txt2pinyin(sentence)
	transcript_norm = " ".join(phones)
	phones = self.txt2pin_phns(transcript_norm) # IPA mix Pinyin
	else:
	transcript = self.replace_numbers_with_words(sentence, lang=lang).split(' ')
	transcript_norm = sentence
	# All IPA
	phones = txt2phone(self.text_tokenizer[lang][1], transcript_norm.strip().replace(".", ",").replace("。", ","))
	phones = f"({lang})\|" + phones if phones[0] != "(" else phones
	return phones


	def text2norm(self, sentence, lang=None):
	if not lang:
	lang = langid.classify(sentence)[0]
	if re.search("[\u4e00-\u9fa5]+", sentence):
	txts, phones = self.txt2pinyin(sentence)
	transcript_norm = " ".join(phones)
	else:
	transcript = self.replace_numbers_with_words(sentence, lang=lang).split(' ')
	transcript_norm = sentence
	return (lang, transcript_norm)