Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import os | |
| import soundfile as sf | |
| from transformers import pipeline | |
| # Try GPU Unsloth model, fallback to CPU-friendly TTS | |
| try: | |
| from unsloth import FastLanguageModel | |
| model_id = "unsloth/sesame-csm-tts" | |
| model, tokenizer = FastLanguageModel.from_pretrained(model_name=model_id) | |
| tts_pipeline = pipeline("text-to-speech", model=model_id) | |
| print("✅ Using Unsloth Sesame CSM TTS on GPU") | |
| except Exception: | |
| print("⚠️ GPU model unavailable or Unsloth not supported on CPU.") | |
| model_id = "facebook/mms-tts-eng" | |
| tts_pipeline = pipeline("text-to-speech", model=model_id) | |
| def generate_tts(text): | |
| if not text.strip(): | |
| return None, "⚠️ Please enter some text." | |
| outputs = tts_pipeline(text) | |
| # Ensure output directory exists | |
| output_dir = "outputs" | |
| os.makedirs(output_dir, exist_ok=True) | |
| output_path = os.path.join(output_dir, "output.wav") | |
| # Save audio file | |
| sf.write(output_path, outputs["audio"], outputs["sampling_rate"]) | |
| return output_path, "✅ Audio generated successfully!" | |
| # Gradio UI | |
| interface = gr.Interface( | |
| fn=generate_tts, | |
| inputs=gr.Textbox(label="Enter Text", placeholder="Type something..."), | |
| outputs=[gr.Audio(label="Generated Speech"), gr.Textbox(label="Status")], | |
| title="🎙️ Sesame CSM TTS Demo", | |
| description="Generate speech using Sesame CSM TTS (GPU-supported)", | |
| allow_flagging="never" | |
| ) | |
| # Launch the interface | |
| interface.launch() | |