File size: 1,454 Bytes
e8bdfd6
 
b543d43
e8bdfd6
b543d43
 
 
 
e8bdfd6
b543d43
e8bdfd6
b543d43
e8bdfd6
 
b543d43
 
 
 
02c124a
 
e8bdfd6
b543d43
e8bdfd6
 
02c124a
b543d43
 
 
 
 
 
 
e8bdfd6
 
 
 
b543d43
 
 
e8bdfd6
b543d43
02c124a
 
b543d43
 
02c124a
b543d43
 
 
 
02c124a
b543d43
 
 
e8bdfd6
 
02c124a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
import tempfile
from TTS.api import TTS

# =========================================================
# LOAD XTTS
# =========================================================
print("⏳ Loading XTTS v2 model...")

tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")

print("✅ XTTS loaded")


# =========================================================
# GENERATE FUNCTION
# =========================================================
def generate_xtts(text, speaker_wav):
    if not text.strip():
        return None

    if speaker_wav is None:
        return None

    temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")

    tts.tts_to_file(
        text=text,
        speaker_wav=speaker_wav,
        language="en",   # change language code if needed
        file_path=temp_wav.name
    )

    return temp_wav.name


# =========================================================
# UI
# =========================================================
demo = gr.Interface(
    fn=generate_xtts,
    inputs=[
        gr.Textbox(
            label="Enter Text",
            placeholder="Hello, this is my XTTS project demo"
        ),
        gr.Audio(
            type="filepath",
            label="Reference Voice (6–10 sec WAV)"
        )
    ],
    outputs=gr.Audio(label="Generated Speech"),
    title="XTTS v2 Voice Cloning Demo",
    description="Multilingual zero-shot TTS using Coqui XTTS-v2"
)

demo.launch()