import gradio as gr from TTS.api import TTS import os import urllib.request # Agree to Coqui TOS os.environ["COQUI_TOS_AGREED"] = "1" # Initialize TTS (XTTS-v2) # This will download the model on the first run # We use gpu=True if available, otherwise False use_gpu = True try: import torch if not torch.cuda.is_available(): use_gpu = False except: use_gpu = False print(f"Initializing TTS with GPU={use_gpu}...") tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=use_gpu) # Output folder os.makedirs("output", exist_ok=True) os.makedirs("voices", exist_ok=True) # Ensure at least one voice exists if not os.listdir("voices"): print("No voices found. Downloading sample...") try: # Download a sample female voice urllib.request.urlretrieve("https://huggingface.co/spaces/coqui/xtts/resolve/main/examples/female.wav", "voices/female_calm.wav") print("Downloaded female_calm.wav") except Exception as e: print(f"Failed to download sample voice: {e}") def generate_speech(text, voice_id): """ Generates speech from text using a specific voice clone. voice_id: The name of the wav file in 'voices/' folder to clone. """ output_path = "output/output.wav" # Map voice_id to a sample file # You must upload these files to your Space's 'voices/' folder speaker_wav = f"voices/{voice_id}.wav" if not os.path.exists(speaker_wav): # Fallback to the first available voice if specific one missing available = os.listdir("voices") if available: print(f"Voice '{voice_id}' not found. Falling back to '{available[0]}'") speaker_wav = f"voices/{available[0]}" else: return None, f"Error: Voice ID '{voice_id}' not found and no voices available." # Generate try: tts.tts_to_file( text=text, file_path=output_path, speaker_wav=speaker_wav, language="en" ) return output_path, "Success" except Exception as e: return None, f"Error generating speech: {str(e)}" # Define Gradio Interface iface = gr.Interface( fn=generate_speech, inputs=[ gr.Textbox(label="Text"), gr.Textbox(label="Voice ID (e.g. 'narrator', 'aribeth')") ], outputs=[ gr.Audio(label="Generated Audio", type="filepath"), gr.Textbox(label="Status") ], title="Unreal Engine TTS Backend (XTTS-v2)", description="API for DGG_001 Game. Send POST requests to /run/predict" ) iface.launch()