File size: 2,134 Bytes
4cbdd15 ab655eb 4cbdd15 ab655eb 4cbdd15 ab655eb 4cbdd15 ab655eb 4cbdd15 5db1872 4cbdd15 6facbae b0cdf80 4cbdd15 83dd650 4cbdd15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | from tts_infer.tts import TextToMel, MelToWav
from tts_infer.num_to_word_on_sent import normalize_nums
# from ai4bharat.transliteration import XlitEngine
import gradio as gr
device = 'cpu'
def create_text_to_mel(glow_model_dir):
return TextToMel(glow_model_dir=glow_model_dir, device=device)
text_to_mel_female = create_text_to_mel('checkpoints/glow/female')
mel_to_wav_female = MelToWav(hifi_model_dir='checkpoints/hifi/female', device=device)
text_to_mel_male = create_text_to_mel('checkpoints/glow/male')
mel_to_wav_male = MelToWav(hifi_model_dir='checkpoints/hifi/male', device=device)
# def translit(text, lang):
# engine = XlitEngine(lang)
# words = [engine.translit_word(word, topk=1)[lang][0] for word in text.split()]
# updated_sent = ' '.join(words)
# return updated_sent
def run_tts(text, selected_voice):
lang = "pa" # Punjabi language code
text = text.replace('।', '.')
text_num_to_word = normalize_nums(text, lang) # converting numbers to words in lang
# text_num_to_word_and_transliterated = translit(text_num_to_word, lang) # transliterating English words to lang
if selected_voice == "Male Voice":
text_to_mel = text_to_mel_male
mel_to_wav = mel_to_wav_male
else:
text_to_mel = text_to_mel_female
mel_to_wav = mel_to_wav_female
mel = text_to_mel.generate_mel(text_num_to_word)
audio, sr = mel_to_wav.generate_wav(mel)
return sr, audio
# iface = gr.Interface(
# fn=run_tts,
# inputs=[
# "textbox",
# gr.inputs.Dropdown(
# choices=["Male Voice", "Female Voice"],
# default="Female Voice",
# label="Select Voice"
# )
# ],
# outputs="audio",
# title="Text to Speech Punjabi Language"
# )
# iface.launch()
iface = gr.Interface(
fn=run_tts,
inputs=[
gr.Textbox(label="Enter Text"),
gr.Radio(
choices=["Male Voice", "Female Voice"],
label="Select Voice"
)
],
outputs=gr.Audio(label="Generated Audio"),
title="Text to Speech Punjabi Language"
)
iface.launch()
|