Spaces:
Running
Running
File size: 2,165 Bytes
e6a706e f8749a0 a279c11 65b0e67 7a10ff5 f8749a0 a279c11 e6a706e f8749a0 a279c11 e6a706e 7a10ff5 e6a706e a279c11 f8749a0 e6a706e f8749a0 e6a706e a279c11 e6a706e a279c11 7a10ff5 a279c11 7a10ff5 a279c11 e6a706e f8749a0 e6a706e a279c11 f8749a0 a279c11 f8749a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# app.py
import os
import tempfile
import torch
import gradio as gr
from TTS.api import TTS
# Patch torch.load for compatibility with older Coqui checkpoints
old_torch_load = torch.load
def patched_torch_load(*args, **kwargs):
kwargs["weights_only"] = False
return old_torch_load(*args, **kwargs)
torch.load = patched_torch_load
# Accept Coqui TOS
os.environ["COQUI_TOS_AGREED"] = "1"
# Ensure speakers folder exists
SPEAKER_DIR = "speakers"
os.makedirs(SPEAKER_DIR, exist_ok=True)
# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Model
MODEL = "tts_models/multilingual/multi-dataset/xtts_v2"
print("Loading model:", MODEL)
tts = TTS(MODEL).to(device)
# Supported languages
LANGS = [
"en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl",
"cs", "ar", "zh-cn", "ja", "ko", "hu", "hi"
]
def generate_audio(text, language, speaker_file):
if not text or len(text.strip()) < 2:
return None
out_path = tempfile.mktemp(suffix=".wav")
# Resolve speaker path (either from upload or from speakers folder)
speaker_path = None
if speaker_file:
speaker_path = speaker_file
else:
# Default to first speaker file in folder if exists
files = [f for f in os.listdir(SPEAKER_DIR) if f.lower().endswith(".wav")]
if files:
speaker_path = os.path.join(SPEAKER_DIR, files[0])
if speaker_path:
tts.tts_to_file(
text=text,
speaker_wav=speaker_path,
language=language,
file_path=out_path
)
else:
return None
return out_path
demo = gr.Interface(
fn=generate_audio,
inputs=[
gr.Textbox(lines=3, label="Text"),
gr.Dropdown(LANGS, value="en", label="Language"),
gr.Audio(label="Upload speaker reference (optional)", type="filepath")
],
outputs=gr.Audio(type="filepath", label="Generated Speech"),
title="XTTS-v2 Voice Cloning",
description=f"Drop WAV files into `{SPEAKER_DIR}` folder for reusable speaker voices.",
allow_flagging="never",
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)
|