Spaces:
Running
Running
| # app.py | |
| import os | |
| import tempfile | |
| import torch | |
| import gradio as gr | |
| from TTS.api import TTS | |
| # Patch torch.load for compatibility with older Coqui checkpoints | |
| old_torch_load = torch.load | |
| def patched_torch_load(*args, **kwargs): | |
| kwargs["weights_only"] = False | |
| return old_torch_load(*args, **kwargs) | |
| torch.load = patched_torch_load | |
| # Accept Coqui TOS | |
| os.environ["COQUI_TOS_AGREED"] = "1" | |
| # Ensure speakers folder exists | |
| SPEAKER_DIR = "speakers" | |
| os.makedirs(SPEAKER_DIR, exist_ok=True) | |
| # Get device | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Model | |
| MODEL = "tts_models/multilingual/multi-dataset/xtts_v2" | |
| print("Loading model:", MODEL) | |
| tts = TTS(MODEL).to(device) | |
| # Supported languages | |
| LANGS = [ | |
| "en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", | |
| "cs", "ar", "zh-cn", "ja", "ko", "hu", "hi" | |
| ] | |
| def generate_audio(text, language, speaker_file): | |
| if not text or len(text.strip()) < 2: | |
| return None | |
| out_path = tempfile.mktemp(suffix=".wav") | |
| # Resolve speaker path (either from upload or from speakers folder) | |
| speaker_path = None | |
| if speaker_file: | |
| speaker_path = speaker_file | |
| else: | |
| # Default to first speaker file in folder if exists | |
| files = [f for f in os.listdir(SPEAKER_DIR) if f.lower().endswith(".wav")] | |
| if files: | |
| speaker_path = os.path.join(SPEAKER_DIR, files[0]) | |
| if speaker_path: | |
| tts.tts_to_file( | |
| text=text, | |
| speaker_wav=speaker_path, | |
| language=language, | |
| file_path=out_path | |
| ) | |
| else: | |
| return None | |
| return out_path | |
| demo = gr.Interface( | |
| fn=generate_audio, | |
| inputs=[ | |
| gr.Textbox(lines=3, label="Text"), | |
| gr.Dropdown(LANGS, value="en", label="Language"), | |
| gr.Audio(label="Upload speaker reference (optional)", type="filepath") | |
| ], | |
| outputs=gr.Audio(type="filepath", label="Generated Speech"), | |
| title="XTTS-v2 Voice Cloning", | |
| description=f"Drop WAV files into `{SPEAKER_DIR}` folder for reusable speaker voices.", | |
| allow_flagging="never", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |