formosan-tts / ipa /ipa.py
txya900619's picture
fix: ? ! can't parse to ipa error and add ignore_comma arg for future model
fdeff58
import re
def can_form_string(x, symbol_dict):
def helper(x, symbol_dict, matched_parts):
if not x:
return True, matched_parts
for key in symbol_dict.keys():
if x.startswith(key):
result, parts = helper(
x[len(key) :], symbol_dict, matched_parts + [key]
)
if result:
return True, parts
return False, []
return helper(x, symbol_dict, [])
def text_to_ipa(text, lang_tag, g2p, ignore_comma=True):
ipa = []
text = text.lower()
text = re.sub(r"[.?!]", "", text)
text = text.replace("'", "’")
words = text.split() # change in future
print(words)
for word in words:
ipa_parts = ""
extended_g2p = {**g2p[lang_tag], ",": "" if ignore_comma else ","}
result, matched_parts = can_form_string(word, extended_g2p)
if result is False:
print(word)
return ""
for matched_part in matched_parts:
ipa_parts = ipa_parts + g2p[lang_tag][matched_part]
ipa.append(ipa_parts)
ipa = (
" ".join(ipa)
.replace("g", "ɡ")
.replace("ʦ", "t͡s")
.replace("ʨ", "t͡ɕ")
.replace("R", "ʀ")
.replace("ʤ", "dʒ")
)
return ipa