Vikranth commited on
Commit
aa4b93b
·
1 Parent(s): a284ad0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -19
app.py CHANGED
@@ -3,6 +3,9 @@ import tempfile
3
  import gradio as gr
4
 
5
  from neon_tts_plugin_coqui import CoquiTTS
 
 
 
6
 
7
 
8
  LANGUAGES = list(CoquiTTS.langs.keys())
@@ -10,37 +13,54 @@ default_lang = "en"
10
 
11
 
12
 
13
- title = "🐸💬 - NeonAI Coqui AI TTS Plugin"
14
- description = "🐸💬 - a deep learning toolkit for Text-to-Speech, battle-tested in research and production"
15
- info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)"
16
- badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui"
17
-
18
 
19
 
20
  coquiTTS = CoquiTTS()
21
 
22
 
23
- def tts(text: str, language: str):
24
- print(text, language)
 
25
  # return output
26
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
27
  coquiTTS.get_tts(text, fp, speaker = {"language" : language})
28
  return fp.name
29
 
30
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  with gr.Blocks() as blocks:
33
  gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
34
  + title
35
  + "</h1>")
36
- gr.Markdown(description)
37
  with gr.Row():# equal_height=False
38
  with gr.Column():# variant="panel"
39
- textbox = gr.Textbox(
40
- label="Input",
41
- value=CoquiTTS.langs[default_lang]["sentence"],
42
- max_lines=3,
43
- )
44
  radio = gr.Radio(
45
  label="Language",
46
  choices=LANGUAGES,
@@ -49,15 +69,11 @@ with gr.Blocks() as blocks:
49
  with gr.Row():# mobile_collapse=False
50
  submit = gr.Button("Submit", variant="primary")
51
  audio = gr.Audio(label="Output", interactive=False)
52
- gr.Markdown(info)
53
- gr.Markdown("<center>"
54
- +f'<img src={badge} alt="visitors badge"/>'
55
- +"</center>")
56
 
57
  # actions
58
  submit.click(
59
  tts,
60
- [textbox, radio],
61
  [audio],
62
  )
63
  radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, textbox)
 
3
  import gradio as gr
4
 
5
  from neon_tts_plugin_coqui import CoquiTTS
6
+ import whisper
7
+ import requests
8
+ import tempfile
9
 
10
 
11
  LANGUAGES = list(CoquiTTS.langs.keys())
 
13
 
14
 
15
 
16
+ title = "Talk to (almost) anyone"
 
 
 
 
17
 
18
 
19
  coquiTTS = CoquiTTS()
20
 
21
 
22
+ def tts(audio, language):
23
+ #print(text, language)
24
+ transcribe, text, lang = whisper_stt(audio,language)
25
  # return output
26
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
27
  coquiTTS.get_tts(text, fp, speaker = {"language" : language})
28
  return fp.name
29
 
30
+ def whisper_stt(audio,language):
31
+ print("Inside Whisper TTS")
32
+ # load audio and pad/trim it to fit 30 seconds
33
+ audio = whisper.load_audio(audio)
34
+ audio = whisper.pad_or_trim(audio)
35
+
36
+ # make log-Mel spectrogram and move to the same device as the model
37
+ mel = whisper.log_mel_spectrogram(audio).to(model_med.device)
38
+
39
+ # detect the spoken language
40
+ _, probs = model_med.detect_language(mel)
41
+ lang = max(probs, key=probs.get)
42
+ print(f"Detected language: {max(probs, key=probs.get)}")
43
+
44
+ # decode the audio
45
+ options_transc = whisper.DecodingOptions(fp16 = False, language=lang, task='transcribe') #lang
46
+ options_transl = whisper.DecodingOptions(fp16 = False, language=language, task='translate') #lang
47
+ result_transc = whisper.decode(model_med, mel, options_transc)
48
+ result_transl = whisper.decode(model_med, mel, options_transl)
49
+
50
+ # print the recognized text
51
+ print(f"transcript is : {result_transc.text}")
52
+ print(f"translation is : {result_transl.text}")
53
+
54
+ return result_transc.text, result_transl.text, lang
55
+
56
 
57
  with gr.Blocks() as blocks:
58
  gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
59
  + title
60
  + "</h1>")
 
61
  with gr.Row():# equal_height=False
62
  with gr.Column():# variant="panel"
63
+ in_audio = gr.Audio(source="microphone",type="filepath", label='Record your voice here')
 
 
 
 
64
  radio = gr.Radio(
65
  label="Language",
66
  choices=LANGUAGES,
 
69
  with gr.Row():# mobile_collapse=False
70
  submit = gr.Button("Submit", variant="primary")
71
  audio = gr.Audio(label="Output", interactive=False)
 
 
 
 
72
 
73
  # actions
74
  submit.click(
75
  tts,
76
+ [in_audio, radio],
77
  [audio],
78
  )
79
  radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, textbox)