|
|
import gradio as gr |
|
|
from collections.abc import Iterable |
|
|
from gruut import sentences |
|
|
from phonemap.g2pphonemizer import G2PPhonemizer |
|
|
from phonemap.charsiu import CharsiuPhonemizer |
|
|
from openphonemizer import OpenPhonemizer |
|
|
from cached_path import cached_path |
|
|
opphonemizer = OpenPhonemizer() |
|
|
opphonemizer_ar = OpenPhonemizer(str(cached_path('hf://openphonemizer/autoreg-ckpt/best_model.pt'))) |
|
|
|
|
|
g2p = G2PPhonemizer() |
|
|
charsiu = CharsiuPhonemizer() |
|
|
import phonemizer |
|
|
global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True, words_mismatch='ignore') |
|
|
from nltk import word_tokenize |
|
|
import nltk |
|
|
|
|
|
from dp.phonemizer import Phonemizer |
|
|
phonemizer = Phonemizer.from_checkpoint(str(cached_path('https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt'))) |
|
|
import re |
|
|
from num2words import num2words |
|
|
|
|
|
def replace_numbers_with_words(text): |
|
|
pattern = r'\d+' |
|
|
matches = re.findall(pattern, text) |
|
|
for match in matches: |
|
|
word_equivalent = num2words(int(match)) |
|
|
text = text.replace(match, word_equivalent) |
|
|
return text |
|
|
|
|
|
nltk.download('punkt') |
|
|
|
|
|
def gruut(text): |
|
|
phonemized = [] |
|
|
for sent in sentences(text, lang='en-us'): |
|
|
for word in sent: |
|
|
if isinstance(word.phonemes, Iterable): |
|
|
phonemized.append(''.join(word.phonemes)) |
|
|
elif isinstance(word.phonemes, str): |
|
|
phonemized.append(word.phonemes) |
|
|
phonemized_text = ' '.join(phonemized) |
|
|
return phonemized_text |
|
|
def g2pen(text): |
|
|
return g2p.phonemize(text) |
|
|
def docharsiu(text): |
|
|
return charsiu.phonemize(text) |
|
|
def phonemizerfunc(text): |
|
|
text = text.strip() |
|
|
text = text.replace('"', '') |
|
|
ps = global_phonemizer.phonemize([text]) |
|
|
ps = word_tokenize(ps[0]) |
|
|
ps = ' '.join(ps) |
|
|
return ps |
|
|
def dp(text): |
|
|
text = replace_numbers_with_words(text) |
|
|
return phonemizer(text, lang='en_us') |
|
|
def run_openphonemizer(text): |
|
|
return opphonemizer(text) |
|
|
def opar(text): |
|
|
return opphonemizer_ar(text) |
|
|
def run(t, r): |
|
|
if r == 'phonemizer': return phonemizerfunc(t) |
|
|
if r == 'openphonemizer': return run_openphonemizer(t) |
|
|
if r == 'gruut': return gruut(t) |
|
|
if r == 'deep_phonemizer': return dp(t) |
|
|
if r == 'g2p_en (buggy)': return g2pen(t) |
|
|
if r == 'openphonemizer autoregressive': return opar(t) |
|
|
return f'Error: Phonemizer {r} not found, please try another Phonemizer and create a Community discussion on HF.' |
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# PhonemizerHub") |
|
|
t = gr.Textbox(label="Text", placeholder="Enter text...", interactive=True) |
|
|
r = gr.Radio(choices=['phonemizer', 'openphonemizer', 'openphonemizer autoregressive', 'gruut', 'deep_phonemizer', 'g2p_en (buggy)'], value='openphonemizer', interactive=True, label='Select a phonemizer') |
|
|
b = gr.Button("Phonemize") |
|
|
out = gr.Textbox(interactive=False, label="Phonemes") |
|
|
b.click(run, inputs=[t, r], outputs=[out]) |
|
|
demo.queue().launch() |