Spaces:

harmonicsnail
/

Modern_TalkNET

Sleeping

File size: 1,548 Bytes

# app.py
import gradio as gr
import os
from model_inference import NetTALKWrapper

# Optional: set env var NETTALK_STATE_DICT to different filename if needed
STATE_DICT = os.environ.get("NETTALK_STATE_DICT", "nettalk_state_dict.pt")

# instantiate the model once
try:
    model = NetTALKWrapper(state_dict_path=STATE_DICT)
except Exception as e:
    # Gradio will show this on startup logs — helpful for debugging
    raise RuntimeError(f"Failed to load model: {e}")

def predict_phonemes(word: str):
    if not word or not word.strip():
        return "Please enter a word", None
    phonemes = model.predict_string(word)
    # return phoneme string; no audio here (you can add TTS later)
    return phonemes, None

css = """
.gradio-container { max-width: 900px; margin: auto; }
body { background: linear-gradient(135deg,#071024,#081226); color: #e6eef8; }
"""

with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
    gr.Markdown("# 🧠 NetTALK phoneme predictor")
    gr.Markdown("Enter a word and get ARPAbet phonemes predicted by the trained model.")
    with gr.Row():
        word = gr.Textbox(label="Enter word", placeholder="example: 'computer'", lines=1)
        btn = gr.Button("Predict")
    out_ph = gr.Textbox(label="Predicted ARPAbet Phonemes")
    # placeholder for future audio output
    out_audio = gr.Audio(label="Synthesized audio (optional)", visible=False)

    btn.click(predict_phonemes, inputs=word, outputs=[out_ph, out_audio])

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)