|
|
import tempfile |
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
from neon_tts_plugin_coqui import CoquiTTS |
|
|
import whisper |
|
|
import requests |
|
|
import tempfile |
|
|
|
|
|
|
|
|
LANGUAGES = list(CoquiTTS.langs.keys()) |
|
|
default_lang = "en" |
|
|
|
|
|
|
|
|
|
|
|
title = "Talk to (almost) anyone in the world" |
|
|
|
|
|
|
|
|
coquiTTS = CoquiTTS() |
|
|
|
|
|
model_med = whisper.load_model("base") |
|
|
|
|
|
def tts(audio, language): |
|
|
|
|
|
transcribe, text, lang = whisper_stt(audio,language) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: |
|
|
coquiTTS.get_tts(text, fp, speaker = {"language" : language}) |
|
|
return fp.name |
|
|
|
|
|
def whisper_stt(audio,language): |
|
|
print("Inside Whisper TTS") |
|
|
|
|
|
audio = whisper.load_audio(audio) |
|
|
audio = whisper.pad_or_trim(audio) |
|
|
|
|
|
|
|
|
mel = whisper.log_mel_spectrogram(audio).to(model_med.device) |
|
|
|
|
|
|
|
|
_, probs = model_med.detect_language(mel) |
|
|
lang = max(probs, key=probs.get) |
|
|
print(f"Detected language: {max(probs, key=probs.get)}") |
|
|
|
|
|
|
|
|
options_transc = whisper.DecodingOptions(fp16 = False, language=lang, task='transcribe') |
|
|
options_transl = whisper.DecodingOptions(fp16 = False, language=language, task='translate') |
|
|
result_transc = whisper.decode(model_med, mel, options_transc) |
|
|
result_transl = whisper.decode(model_med, mel, options_transl) |
|
|
|
|
|
|
|
|
print(f"transcript is : {result_transc.text}") |
|
|
print(f"translation is : {result_transl.text}") |
|
|
|
|
|
return result_transc.text, result_transl.text, lang |
|
|
|
|
|
|
|
|
with gr.Blocks() as blocks: |
|
|
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>" |
|
|
+ title |
|
|
+ "</h1>") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
in_audio = gr.Audio(source="microphone",type="filepath", label='Record your voice here') |
|
|
radio = gr.Radio( |
|
|
label="Language", |
|
|
choices=LANGUAGES, |
|
|
value=default_lang |
|
|
) |
|
|
with gr.Row(): |
|
|
submit = gr.Button("Submit", variant="primary") |
|
|
audio = gr.Audio(label="Output", interactive=False) |
|
|
|
|
|
|
|
|
submit.click( |
|
|
tts, |
|
|
[in_audio, radio], |
|
|
[audio], |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
blocks.launch() |
|
|
|