TTS_unreal / app.py
BenjaminPittsley's picture
Fix 503 crash: Add TOS agreement and auto-download sample voice
73a7287
import gradio as gr
from TTS.api import TTS
import os
import urllib.request
# Agree to Coqui TOS
os.environ["COQUI_TOS_AGREED"] = "1"
# Initialize TTS (XTTS-v2)
# This will download the model on the first run
# We use gpu=True if available, otherwise False
use_gpu = True
try:
import torch
if not torch.cuda.is_available():
use_gpu = False
except:
use_gpu = False
print(f"Initializing TTS with GPU={use_gpu}...")
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=use_gpu)
# Output folder
os.makedirs("output", exist_ok=True)
os.makedirs("voices", exist_ok=True)
# Ensure at least one voice exists
if not os.listdir("voices"):
print("No voices found. Downloading sample...")
try:
# Download a sample female voice
urllib.request.urlretrieve("https://huggingface.co/spaces/coqui/xtts/resolve/main/examples/female.wav", "voices/female_calm.wav")
print("Downloaded female_calm.wav")
except Exception as e:
print(f"Failed to download sample voice: {e}")
def generate_speech(text, voice_id):
"""
Generates speech from text using a specific voice clone.
voice_id: The name of the wav file in 'voices/' folder to clone.
"""
output_path = "output/output.wav"
# Map voice_id to a sample file
# You must upload these files to your Space's 'voices/' folder
speaker_wav = f"voices/{voice_id}.wav"
if not os.path.exists(speaker_wav):
# Fallback to the first available voice if specific one missing
available = os.listdir("voices")
if available:
print(f"Voice '{voice_id}' not found. Falling back to '{available[0]}'")
speaker_wav = f"voices/{available[0]}"
else:
return None, f"Error: Voice ID '{voice_id}' not found and no voices available."
# Generate
try:
tts.tts_to_file(
text=text,
file_path=output_path,
speaker_wav=speaker_wav,
language="en"
)
return output_path, "Success"
except Exception as e:
return None, f"Error generating speech: {str(e)}"
# Define Gradio Interface
iface = gr.Interface(
fn=generate_speech,
inputs=[
gr.Textbox(label="Text"),
gr.Textbox(label="Voice ID (e.g. 'narrator', 'aribeth')")
],
outputs=[
gr.Audio(label="Generated Audio", type="filepath"),
gr.Textbox(label="Status")
],
title="Unreal Engine TTS Backend (XTTS-v2)",
description="API for DGG_001 Game. Send POST requests to /run/predict"
)
iface.launch()