File size: 2,703 Bytes
026659d
a11a83f
026659d
a11a83f
 
 
 
fd3011a
a11a83f
 
 
 
 
026659d
a11a83f
026659d
a11a83f
 
 
 
 
 
026659d
a11a83f
026659d
a11a83f
 
 
 
 
026659d
a11a83f
026659d
 
a11a83f
 
 
 
 
 
 
 
 
 
 
026659d
a11a83f
 
026659d
a11a83f
026659d
a11a83f
 
 
 
 
026659d
a11a83f
026659d
 
a11a83f
 
 
 
026659d
 
a11a83f
026659d
a11a83f
 
026659d
a11a83f
 
026659d
 
 
a11a83f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr
from TTS.api import TTS

# =========================
# Load Model (CPU / Zero GPU)
# =========================
print("Loading ai4bharat Indic TTS model (CPU)...")

tts = TTS(
    model_name="ai4bharat/indic-tts-coqui-misc",
    gpu=False,
    progress_bar=False
)

print("Model loaded successfully.")

# =========================
# TTS Function
# =========================
def text_to_speech(text):
    if not text or not text.strip():
        return None

    output_path = "tts_output.wav"

    tts.tts_to_file(
        text=text,
        file_path=output_path,
        language="hi"
    )

    return output_path


# =========================
# Fake Voice Clone Handler
# (Explains limitation clearly)
# =========================
def voice_clone(text, reference_audio):
    """
    NOTE:
    ai4bharat/indic-tts-coqui-misc
    DOES NOT support voice cloning.
    This function falls back to normal TTS.
    """

    if not text or not text.strip():
        return None

    output_path = "clone_fallback.wav"

    tts.tts_to_file(
        text=text,
        file_path=output_path,
        language="hi"
    )

    return output_path


# =========================
# Gradio UI
# =========================
with gr.Blocks(title="Hindi TTS (Zero GPU)") as demo:
    gr.Markdown(
        """
        ## 🗣 Hindi Text to Speech (Zero GPU)

        **Model:** ai4bharat/indic-tts-coqui-misc  
        **Hardware:** CPU / Zero GPU  

        ⚠️ **Voice cloning is NOT supported by this model.**  
        Reference audio upload is shown only for UI completeness.
        """
    )

    with gr.Tab("🔊 Text to Speech"):
        tts_text = gr.Textbox(
            label="Hindi Text",
            placeholder="यहाँ हिंदी टेक्स्ट लिखें...",
            lines=4
        )
        tts_btn = gr.Button("Generate Voice")
        tts_audio = gr.Audio(type="filepath", label="Output Audio")

        tts_btn.click(
            fn=text_to_speech,
            inputs=tts_text,
            outputs=tts_audio
        )

    with gr.Tab("🎙 Voice Clone (Fallback)"):
        clone_text = gr.Textbox(
            label="Hindi Text",
            placeholder="यहाँ टेक्स्ट लिखें...",
            lines=4
        )
        ref_audio = gr.Audio(
            label="Upload Reference Voice (Not Used)",
            type="filepath"
        )
        clone_btn = gr.Button("Generate (TTS Fallback)")
        clone_audio = gr.Audio(type="filepath", label="Generated Audio")

        clone_btn.click(
            fn=voice_clone,
            inputs=[clone_text, ref_audio],
            outputs=clone_audio
        )

demo.launch()