Spaces:

Vikranth
/

talkaway

Runtime error

App Files Files Community

Vikranth commited on Oct 18, 2022

Commit

aa4b93b

1 Parent(s): a284ad0

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -19

app.py CHANGED Viewed

@@ -3,6 +3,9 @@ import tempfile
 import gradio as gr
 from neon_tts_plugin_coqui import CoquiTTS
 LANGUAGES = list(CoquiTTS.langs.keys())
@@ -10,37 +13,54 @@ default_lang = "en"
-title = "🐸💬 - NeonAI Coqui AI TTS Plugin"
-description = "🐸💬 - a deep learning toolkit for Text-to-Speech, battle-tested in research and production"
-info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)"
-badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui"
 coquiTTS = CoquiTTS()
-def tts(text: str, language: str):
-    print(text, language)
     # return output
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         coquiTTS.get_tts(text, fp, speaker = {"language" : language})
         return fp.name
 with gr.Blocks() as blocks:
     gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
                 + title
                 + "</h1>")
-    gr.Markdown(description)
     with gr.Row():# equal_height=False
         with gr.Column():# variant="panel"
-            textbox = gr.Textbox(
-                label="Input",
-                value=CoquiTTS.langs[default_lang]["sentence"],
-                max_lines=3,
-            )
             radio = gr.Radio(
                 label="Language",
                 choices=LANGUAGES,
@@ -49,15 +69,11 @@ with gr.Blocks() as blocks:
             with gr.Row():# mobile_collapse=False
                 submit = gr.Button("Submit", variant="primary")
         audio = gr.Audio(label="Output", interactive=False)
-    gr.Markdown(info)
-    gr.Markdown("<center>"
-                +f'<img src={badge} alt="visitors badge"/>'
-                +"</center>")
     # actions
     submit.click(
         tts,
-        [textbox, radio],
         [audio],
     )
     radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, textbox)

 import gradio as gr
 from neon_tts_plugin_coqui import CoquiTTS
+import whisper
+import requests
+import tempfile
 LANGUAGES = list(CoquiTTS.langs.keys())
+title = "Talk to (almost) anyone"
 coquiTTS = CoquiTTS()
+def tts(audio, language):
+    #print(text, language)
+    transcribe, text, lang = whisper_stt(audio,language)
     # return output
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         coquiTTS.get_tts(text, fp, speaker = {"language" : language})
         return fp.name
+def whisper_stt(audio,language):
+  print("Inside Whisper TTS")
+  # load audio and pad/trim it to fit 30 seconds
+  audio = whisper.load_audio(audio)
+  audio = whisper.pad_or_trim(audio)
+  # make log-Mel spectrogram and move to the same device as the model
+  mel = whisper.log_mel_spectrogram(audio).to(model_med.device)
+  # detect the spoken language
+  _, probs = model_med.detect_language(mel)
+  lang = max(probs, key=probs.get)
+  print(f"Detected language: {max(probs, key=probs.get)}")
+  # decode the audio
+  options_transc = whisper.DecodingOptions(fp16 = False, language=lang, task='transcribe') #lang
+  options_transl = whisper.DecodingOptions(fp16 = False, language=language, task='translate') #lang
+  result_transc = whisper.decode(model_med, mel, options_transc)
+  result_transl = whisper.decode(model_med, mel, options_transl)
+  # print the recognized text
+  print(f"transcript is : {result_transc.text}")
+  print(f"translation is : {result_transl.text}")
+  return result_transc.text, result_transl.text, lang
 with gr.Blocks() as blocks:
     gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
                 + title
                 + "</h1>")
     with gr.Row():# equal_height=False
         with gr.Column():# variant="panel"
+            in_audio = gr.Audio(source="microphone",type="filepath", label='Record your voice here')
             radio = gr.Radio(
                 label="Language",
                 choices=LANGUAGES,
             with gr.Row():# mobile_collapse=False
                 submit = gr.Button("Submit", variant="primary")
         audio = gr.Audio(label="Output", interactive=False)
     # actions
     submit.click(
         tts,
+        [in_audio, radio],
         [audio],
     )
     radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, textbox)