import gradio as gr
import torch
import soundfile as sf
from transformers import pipeline

# Load Hugging Face TTS model (CPU)
tts = pipeline(
    "text-to-speech",
    model="facebook/mms-tts-eng",
    device="cpu"
)

print("TTS model loaded successfully")

def generate_speech(text):

    if text.strip() == "":
        return None

    output = tts(text)

    audio = output["audio"]
    sample_rate = output["sampling_rate"]

    output_path = "speech.wav"

    sf.write(output_path, audio, sample_rate)

    return output_path


demo = gr.Interface(
    fn=generate_speech,
    inputs=gr.Textbox(
        label="Enter text",
        placeholder="Hello Subiksha, welcome to text to speech"
    ),
    outputs=gr.Audio(type="filepath"),
    title="Real Text-to-Speech System",
    description="Enter text and get real human speech"
)

demo.launch(
    server_name="0.0.0.0",
    server_port=7860,
    ssr_mode=False
)