Spaces:

harmonicsnail
/

Modern_TalkNET

Sleeping

App Files Files Community

harmonicsnail commited on Oct 28, 2025

Commit

16f5077

1 Parent(s): 7f6a0d4

updated app.py

Browse files

Files changed (1) hide show

app.py +92 -4

app.py CHANGED Viewed

@@ -1,7 +1,95 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+# app.py
 import gradio as gr
+import os
+import numpy as np
+import soundfile as sf
+import tempfile
+from model_inference import NetTALKWrapper
+# choose TTS backend: "gtts" or "coqui" (TTS) or "none"
+TTS_BACKEND = os.environ.get("TTS_BACKEND", "gtts")
+# load model once (fast startup if model is cached)
+MODEL_PATH = "nettalk_model.pt"
+model = NetTALKWrapper(MODEL_PATH)
+# optional: simple gTTS-based synth (works by speaking the phoneme string as text)
+def synthesize_gtts(phoneme_text):
+    from gtts import gTTS
+    tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+    # gTTS outputs mp3 -> convert to wav using soundfile via numpy? Simpler: save mp3 then load then re-save wav
+    mp3_tmp = tmp.name + ".mp3"
+    tts = gTTS(phoneme_text, lang="en")
+    tts.save(mp3_tmp)
+    # load mp3 with soundfile may not work; scipy can read via pydub if available.
+    try:
+        import pydub
+        audio = pydub.AudioSegment.from_mp3(mp3_tmp)
+        audio.export(tmp.name, format="wav")
+    except Exception:
+        # fallback: return mp3 (Gradio accepts mp3 as audio)
+        return mp3_tmp
+    return tmp.name
+# optional: Coqui TTS (phoneme-aware) - heavier but can take ARPAbet inputs
+def synthesize_coqui(arpabet):
+    # This requires the `TTS` package and an appropriate model that accepts phoneme input.
+    try:
+        from TTS.api import TTS
+    except Exception as e:
+        raise RuntimeError("TTS package not installed or failed to import.") from e
+    # choose a model name you installed / that exists; example placeholder:
+    tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
+    # Some TTS models accept `phoneme` argument or `phoneme_input=True`. Check the model docs.
+    wav = tts.tts(arpabet, speaker=None, phoneme_input=False)
+    # wav is a numpy array and sample rate accessible via tts.synthesizer.output_sample_rate
+    sr = tts.synthesizer.output_sample_rate if hasattr(tts.synthesizer, "output_sample_rate") else 22050
+    tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+    sf.write(tmp.name, wav, sr)
+    return tmp.name
+def predict_and_speak(word):
+    if not word or not word.strip():
+        return "Please enter a word", None
+    phonemes = model.predict(word)
+    audio_path = None
+    # Try preferred backend
+    try:
+        if TTS_BACKEND == "coqui":
+            audio_path = synthesize_coqui(phonemes)
+        else:
+            audio_path = synthesize_gtts(phonemes)
+    except Exception as e:
+        # If synth fails, still return phonemes and a None audio
+        print("Synthesis failed:", e)
+        audio_path = None
+    # gr.Audio accepts: filename (wav/mp3), numpy array, or (np, sr)
+    return phonemes, audio_path
+# ---- Gradio UI ----
+css = """
+body { background: linear-gradient(135deg,#0f172a,#020617); color: #e6eef8; }
+.gradio-container { max-width: 900px; margin: auto; padding: 20px; }
+"""
+with gr.Blocks(css=css, theme=gr.themes.Default()) as demo:
+    gr.Markdown("# 🧠 NetTALK → ARPAbet demo")
+    gr.Markdown("Enter a word, get predicted ARPAbet phonemes and a synthesized audio preview.")
+    with gr.Row():
+        word_in = gr.Textbox(label="Enter word", placeholder="example: 'computer'", lines=1)
+        run_btn = gr.Button("Predict")
+    phoneme_out = gr.Textbox(label="Predicted ARPAbet Phonemes")
+    audio_out = gr.Audio(label="Synthesized audio (preview)")
+    run_btn.click(fn=predict_and_speak, inputs=[word_in], outputs=[phoneme_out, audio_out])
+    gr.Markdown("Tip: Replace `preprocess()` and `decode_to_arpabet()` in `model_inference.py` with your real model code.")
+if __name__ == "__main__":
+    demo.launch()