Spaces:
Sleeping
Sleeping
File size: 1,484 Bytes
16b84f6 8b8068e 16b84f6 8b8068e 16b84f6 8b8068e 16b84f6 8b8068e 16b84f6 8b8068e 16b84f6 8b8068e 16b84f6 8b8068e 16b84f6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | import gradio as gr
import torch
import os
import soundfile as sf
from transformers import pipeline
# Try GPU Unsloth model, fallback to CPU-friendly TTS
try:
from unsloth import FastLanguageModel
model_id = "unsloth/sesame-csm-tts"
model, tokenizer = FastLanguageModel.from_pretrained(model_name=model_id)
tts_pipeline = pipeline("text-to-speech", model=model_id)
print("✅ Using Unsloth Sesame CSM TTS on GPU")
except Exception:
print("⚠️ GPU model unavailable or Unsloth not supported on CPU.")
model_id = "facebook/mms-tts-eng"
tts_pipeline = pipeline("text-to-speech", model=model_id)
def generate_tts(text):
if not text.strip():
return None, "⚠️ Please enter some text."
outputs = tts_pipeline(text)
# Ensure output directory exists
output_dir = "outputs"
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "output.wav")
# Save audio file
sf.write(output_path, outputs["audio"], outputs["sampling_rate"])
return output_path, "✅ Audio generated successfully!"
# Gradio UI
interface = gr.Interface(
fn=generate_tts,
inputs=gr.Textbox(label="Enter Text", placeholder="Type something..."),
outputs=[gr.Audio(label="Generated Speech"), gr.Textbox(label="Status")],
title="🎙️ Sesame CSM TTS Demo",
description="Generate speech using Sesame CSM TTS (GPU-supported)",
allow_flagging="never"
)
# Launch the interface
interface.launch()
|