TTS / app.py
BytesofSurajm's picture
Update app.py
8b8068e verified
import gradio as gr
import torch
import os
import soundfile as sf
from transformers import pipeline
# Try GPU Unsloth model, fallback to CPU-friendly TTS
try:
from unsloth import FastLanguageModel
model_id = "unsloth/sesame-csm-tts"
model, tokenizer = FastLanguageModel.from_pretrained(model_name=model_id)
tts_pipeline = pipeline("text-to-speech", model=model_id)
print("✅ Using Unsloth Sesame CSM TTS on GPU")
except Exception:
print("⚠️ GPU model unavailable or Unsloth not supported on CPU.")
model_id = "facebook/mms-tts-eng"
tts_pipeline = pipeline("text-to-speech", model=model_id)
def generate_tts(text):
if not text.strip():
return None, "⚠️ Please enter some text."
outputs = tts_pipeline(text)
# Ensure output directory exists
output_dir = "outputs"
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "output.wav")
# Save audio file
sf.write(output_path, outputs["audio"], outputs["sampling_rate"])
return output_path, "✅ Audio generated successfully!"
# Gradio UI
interface = gr.Interface(
fn=generate_tts,
inputs=gr.Textbox(label="Enter Text", placeholder="Type something..."),
outputs=[gr.Audio(label="Generated Speech"), gr.Textbox(label="Status")],
title="🎙️ Sesame CSM TTS Demo",
description="Generate speech using Sesame CSM TTS (GPU-supported)",
allow_flagging="never"
)
# Launch the interface
interface.launch()