Spaces:

awacke1
/

TTS-STT-Blocks

Build error

App Files Files Community

awacke1 commited on Jun 5, 2022

Commit

1f705ff

1 Parent(s): 89c118f

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -73

app.py CHANGED Viewed

@@ -23,6 +23,43 @@ def get_db_firestore():
 db = get_db_firestore()
 asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
 def transcribe(audio):
     text = asr(audio)["text"]
     return text
@@ -63,8 +100,19 @@ def selectall(text):
         doclist += r
     return doclist
-demo = gr.Blocks()
 with demo:
     #audio_file = gr.Audio(type="filepath")
     audio_file = gr.inputs.Audio(source="microphone", type="filepath")
@@ -72,91 +120,35 @@ with demo:
     label = gr.Label()
     saved = gr.Textbox()
     savedAll = gr.Textbox()
     b1 = gr.Button("Recognize Speech")
     b2 = gr.Button("Classify Sentiment")
     b3 = gr.Button("Save Speech to Text")
     b4 = gr.Button("Retrieve All")
     b1.click(speech_to_text, inputs=audio_file, outputs=text)
     b2.click(text_to_sentiment, inputs=text, outputs=label)
     b3.click(upsert, inputs=text, outputs=saved)
     b4.click(selectall, inputs=text, outputs=savedAll)
 demo.launch(share=True)
-MODEL_NAMES = [
-    # "en/ek1/tacotron2",
-    "en/ljspeech/tacotron2-DDC",
-    # "en/ljspeech/tacotron2-DDC_ph",
-    # "en/ljspeech/glow-tts",
-    # "en/ljspeech/tacotron2-DCA",
-    # "en/ljspeech/speedy-speech-wn",
-    # "en/ljspeech/vits",
-    # "en/vctk/sc-glow-tts",
-    # "en/vctk/vits",
-    # "en/sam/tacotron-DDC",
-    # "es/mai/tacotron2-DDC",
-    "fr/mai/tacotron2-DDC",
-    "zh-CN/baker/tacotron2-DDC-GST",
-    "nl/mai/tacotron2-DDC",
-    "de/thorsten/tacotron2-DCA",
-    # "ja/kokoro/tacotron2-DDC",
-]
-MODELS = {}
-manager = ModelManager()
-for MODEL_NAME in MODEL_NAMES:
-    print(f"downloading {MODEL_NAME}")
-    model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
-    vocoder_name: Optional[str] = model_item["default_vocoder"]
-    vocoder_path = None
-    vocoder_config_path = None
-    if vocoder_name is not None:
-        vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
-    synthesizer = Synthesizer(
-        model_path, config_path, None, vocoder_path, vocoder_config_path,
-    )
-    MODELS[MODEL_NAME] = synthesizer
-def tts(text: str, model_name: str):
-    print(text, model_name)
-    synthesizer = MODELS.get(model_name, None)
-    if synthesizer is None:
-        raise NameError("model not found")
-    wavs = synthesizer.tts(text)
-    # output = (synthesizer.output_sample_rate, np.array(wavs))
-    # return output
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
-        synthesizer.save_wav(wavs, fp)
-        return fp.name
-iface = gr.Interface(
-    fn=tts,
-    inputs=[
-        gr.inputs.Textbox(
-            label="Input",
-            default="Hello, how are you?",
-        ),
-        gr.inputs.Radio(
-            label="Pick a TTS Model",
-            choices=MODEL_NAMES,
-        ),
-    ],
-    outputs=gr.outputs.Audio(label="Output"),
-    title="🐸💬 - Coqui TTS",
-    theme="huggingface",
-    description="🐸💬 - a deep learning toolkit for Text-to-Speech, battle-tested in research and production",
-    article="more info at https://github.com/coqui-ai/TTS",
-)
-iface.launch()

 db = get_db_firestore()
 asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
+MODEL_NAMES = [
+    # "en/ek1/tacotron2",
+    "en/ljspeech/tacotron2-DDC",
+    # "en/ljspeech/tacotron2-DDC_ph",
+    # "en/ljspeech/glow-tts",
+    # "en/ljspeech/tacotron2-DCA",
+    # "en/ljspeech/speedy-speech-wn",
+    # "en/ljspeech/vits",
+    # "en/vctk/sc-glow-tts",
+    # "en/vctk/vits",
+    # "en/sam/tacotron-DDC",
+    # "es/mai/tacotron2-DDC",
+    "fr/mai/tacotron2-DDC",
+    "zh-CN/baker/tacotron2-DDC-GST",
+    "nl/mai/tacotron2-DDC",
+    "de/thorsten/tacotron2-DCA",
+    # "ja/kokoro/tacotron2-DDC",
+]
+MODELS = {}
+manager = ModelManager()
+for MODEL_NAME in MODEL_NAMES:
+    print(f"downloading {MODEL_NAME}")
+    model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
+    vocoder_name: Optional[str] = model_item["default_vocoder"]
+    vocoder_path = None
+    vocoder_config_path = None
+    if vocoder_name is not None:
+        vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
+    synthesizer = Synthesizer(
+        model_path, config_path, None, vocoder_path, vocoder_config_path,
+    )
+    MODELS[MODEL_NAME] = synthesizer
 def transcribe(audio):
     text = asr(audio)["text"]
     return text
         doclist += r
     return doclist
+def tts(text: str, model_name: str):
+    print(text, model_name)
+    synthesizer = MODELS.get(model_name, None)
+    if synthesizer is None:
+        raise NameError("model not found")
+    wavs = synthesizer.tts(text)
+    # output = (synthesizer.output_sample_rate, np.array(wavs))
+    # return output
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
+        synthesizer.save_wav(wavs, fp)
+        return fp.name
+demo = gr.Blocks()
 with demo:
     #audio_file = gr.Audio(type="filepath")
     audio_file = gr.inputs.Audio(source="microphone", type="filepath")
     label = gr.Label()
     saved = gr.Textbox()
     savedAll = gr.Textbox()
+    TTSchoice = gr.inputs.Radio( label="Pick a TTS Model", choices=MODEL_NAMES,   )
     b1 = gr.Button("Recognize Speech")
     b2 = gr.Button("Classify Sentiment")
     b3 = gr.Button("Save Speech to Text")
     b4 = gr.Button("Retrieve All")
+    b5 = gr.Button("Read It Back Aloud")
     b1.click(speech_to_text, inputs=audio_file, outputs=text)
     b2.click(text_to_sentiment, inputs=text, outputs=label)
     b3.click(upsert, inputs=text, outputs=saved)
     b4.click(selectall, inputs=text, outputs=savedAll)
+    b5.click(tts,  inputs=text,TTSchoice, outputs=Audio(label="Output"))
 demo.launch(share=True)
+#iface = gr.Interface(
+#    fn=tts,
+#    inputs=[
+#        gr.inputs.Textbox( label="Input", default="Hello, how are you?",  ),
+#        gr.inputs.Radio( label="Pick a TTS Model", choices=MODEL_NAMES,   ),
+#    ],
+#    outputs=gr.outputs.Audio(label="Output"),
+#    title="🐸💬 - Coqui TTS",
+#    theme="huggingface",
+#    description="🐸💬 - a deep learning toolkit for Text-to-Speech, battle-tested in research and production",
+#    article="more info at https://github.com/coqui-ai/TTS",
+#)
+#iface.launch()