File size: 1,454 Bytes
e8bdfd6 b543d43 e8bdfd6 b543d43 e8bdfd6 b543d43 e8bdfd6 b543d43 e8bdfd6 b543d43 02c124a e8bdfd6 b543d43 e8bdfd6 02c124a b543d43 e8bdfd6 b543d43 e8bdfd6 b543d43 02c124a b543d43 02c124a b543d43 02c124a b543d43 e8bdfd6 02c124a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | import gradio as gr
import tempfile
from TTS.api import TTS
# =========================================================
# LOAD XTTS
# =========================================================
print("⏳ Loading XTTS v2 model...")
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
print("✅ XTTS loaded")
# =========================================================
# GENERATE FUNCTION
# =========================================================
def generate_xtts(text, speaker_wav):
if not text.strip():
return None
if speaker_wav is None:
return None
temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
tts.tts_to_file(
text=text,
speaker_wav=speaker_wav,
language="en", # change language code if needed
file_path=temp_wav.name
)
return temp_wav.name
# =========================================================
# UI
# =========================================================
demo = gr.Interface(
fn=generate_xtts,
inputs=[
gr.Textbox(
label="Enter Text",
placeholder="Hello, this is my XTTS project demo"
),
gr.Audio(
type="filepath",
label="Reference Voice (6–10 sec WAV)"
)
],
outputs=gr.Audio(label="Generated Speech"),
title="XTTS v2 Voice Cloning Demo",
description="Multilingual zero-shot TTS using Coqui XTTS-v2"
)
demo.launch() |