12labs commited on
Commit
a11a83f
·
verified ·
1 Parent(s): fd3011a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -103
app.py CHANGED
@@ -1,126 +1,111 @@
1
  import gradio as gr
2
- import spaces
3
- import subprocess
4
- import uuid
5
- import os
6
- import shutil
7
 
8
- # ================= CONFIG =================
 
 
 
9
 
10
- BASE_MODEL = "vibevoice/VibeVoice-7B"
11
- CHECKPOINT = "tarun7r/vibevoice-hindi-lora"
 
 
 
12
 
13
- VOICES_DIR = "demo/voices"
14
- OUTPUT_DIR = "outputs"
15
 
16
- os.makedirs(VOICES_DIR, exist_ok=True)
17
- os.makedirs(OUTPUT_DIR, exist_ok=True)
 
 
 
 
18
 
19
- # ================= GPU FUNCTION =================
20
 
21
- @spaces.GPU
22
- def generate_voice(text, voice_file, cfg_scale, seed):
23
- if not text or not text.strip():
24
- raise gr.Error("❌ Hindi text empty hai")
 
25
 
26
- if voice_file is None:
27
- raise gr.Error("❌ Reference voice upload karo (WAV)")
28
 
29
- speaker_name = "user_voice"
30
- speaker_path = os.path.join(VOICES_DIR, f"{speaker_name}.wav")
31
 
32
- # overwrite previous voice
33
- shutil.copy(voice_file, speaker_path)
 
 
 
 
 
 
 
 
 
34
 
35
- out_file = os.path.join(
36
- OUTPUT_DIR, f"out_{uuid.uuid4().hex}.wav"
37
- )
38
 
39
- cmd = [
40
- "python", "demo/inference_from_file.py",
41
- "--model_path", BASE_MODEL,
42
- "--checkpoint_path", CHECKPOINT,
43
- "--speaker_names", speaker_name,
44
- "--txt", text,
45
- "--cfg_scale", str(cfg_scale),
46
- "--seed", str(seed),
47
- "--output_path", out_file
48
- ]
49
 
50
- try:
51
- subprocess.run(cmd, check=True)
52
- except subprocess.CalledProcessError:
53
- raise gr.Error("❌ Generation failed (check logs / GPU)")
 
54
 
55
- return out_file
56
 
57
- # ================= UI =================
58
 
59
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
 
 
60
  gr.Markdown(
61
  """
62
- # 🇮🇳 Hindi Voice Cloning (VibeVoice)
63
- Upload a reference voice and generate **emotional Hindi speech**
64
- using the same voice.
65
- """
66
- )
67
 
68
- with gr.Row():
69
- with gr.Column(scale=1):
70
- text = gr.Textbox(
71
- label="📝 Hindi Text",
72
- placeholder="नमस्ते, आज हम आर्टिफिशियल इंटेलिजेंस के बारे में बात करेंगे...",
73
- lines=6
74
- )
75
-
76
- voice = gr.Audio(
77
- label="🎙️ Reference Voice (WAV only)",
78
- type="filepath",
79
- format="wav",
80
- sources=["upload"]
81
- )
82
-
83
- cfg = gr.Slider(
84
- 0.8, 2.0,
85
- value=1.3,
86
- step=0.1,
87
- label="🎭 Expression Strength (CFG Scale)"
88
- )
89
-
90
- seed = gr.Number(
91
- value=42,
92
- precision=0,
93
- label="🎲 Seed"
94
- )
95
-
96
- btn = gr.Button("🚀 Generate Voice")
97
-
98
- with gr.Column(scale=1):
99
- output = gr.Audio(
100
- label="🔊 Generated Audio",
101
- type="filepath"
102
- )
103
-
104
- btn.click(
105
- generate_voice,
106
- inputs=[text, voice, cfg, seed],
107
- outputs=output,
108
- api_name=None
109
- )
110
 
111
- gr.Markdown(
112
- """
113
- ### ℹ️ Tips
114
- - Use clean WAV (10–30 sec)
115
- - Emotion reference voice se aata hai
116
- - CFG 1.2–1.4 best hota hai
117
- - GPU required
118
  """
119
  )
120
 
121
- # ================= LAUNCH =================
122
-
123
- demo.launch(
124
- share=True,
125
- server_name="0.0.0.0"
126
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from TTS.api import TTS
 
 
 
 
3
 
4
+ # =========================
5
+ # Load Model (CPU / Zero GPU)
6
+ # =========================
7
+ print("Loading ai4bharat Indic TTS model (CPU)...")
8
 
9
+ tts = TTS(
10
+ model_name="ai4bharat/indic-tts-coqui-misc",
11
+ gpu=False,
12
+ progress_bar=False
13
+ )
14
 
15
+ print("Model loaded successfully.")
 
16
 
17
+ # =========================
18
+ # TTS Function
19
+ # =========================
20
+ def text_to_speech(text):
21
+ if not text or not text.strip():
22
+ return None
23
 
24
+ output_path = "tts_output.wav"
25
 
26
+ tts.tts_to_file(
27
+ text=text,
28
+ file_path=output_path,
29
+ language="hi"
30
+ )
31
 
32
+ return output_path
 
33
 
 
 
34
 
35
+ # =========================
36
+ # Fake Voice Clone Handler
37
+ # (Explains limitation clearly)
38
+ # =========================
39
+ def voice_clone(text, reference_audio):
40
+ """
41
+ NOTE:
42
+ ai4bharat/indic-tts-coqui-misc
43
+ DOES NOT support voice cloning.
44
+ This function falls back to normal TTS.
45
+ """
46
 
47
+ if not text or not text.strip():
48
+ return None
 
49
 
50
+ output_path = "clone_fallback.wav"
 
 
 
 
 
 
 
 
 
51
 
52
+ tts.tts_to_file(
53
+ text=text,
54
+ file_path=output_path,
55
+ language="hi"
56
+ )
57
 
58
+ return output_path
59
 
 
60
 
61
+ # =========================
62
+ # Gradio UI
63
+ # =========================
64
+ with gr.Blocks(title="Hindi TTS (Zero GPU)") as demo:
65
  gr.Markdown(
66
  """
67
+ ## 🗣 Hindi Text to Speech (Zero GPU)
 
 
 
 
68
 
69
+ **Model:** ai4bharat/indic-tts-coqui-misc
70
+ **Hardware:** CPU / Zero GPU
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ ⚠️ **Voice cloning is NOT supported by this model.**
73
+ Reference audio upload is shown only for UI completeness.
 
 
 
 
 
74
  """
75
  )
76
 
77
+ with gr.Tab("🔊 Text to Speech"):
78
+ tts_text = gr.Textbox(
79
+ label="Hindi Text",
80
+ placeholder="यहाँ ��िंदी टेक्स्ट लिखें...",
81
+ lines=4
82
+ )
83
+ tts_btn = gr.Button("Generate Voice")
84
+ tts_audio = gr.Audio(type="filepath", label="Output Audio")
85
+
86
+ tts_btn.click(
87
+ fn=text_to_speech,
88
+ inputs=tts_text,
89
+ outputs=tts_audio
90
+ )
91
+
92
+ with gr.Tab("🎙 Voice Clone (Fallback)"):
93
+ clone_text = gr.Textbox(
94
+ label="Hindi Text",
95
+ placeholder="यहाँ टेक्स्ट लिखें...",
96
+ lines=4
97
+ )
98
+ ref_audio = gr.Audio(
99
+ label="Upload Reference Voice (Not Used)",
100
+ type="filepath"
101
+ )
102
+ clone_btn = gr.Button("Generate (TTS Fallback)")
103
+ clone_audio = gr.Audio(type="filepath", label="Generated Audio")
104
+
105
+ clone_btn.click(
106
+ fn=voice_clone,
107
+ inputs=[clone_text, ref_audio],
108
+ outputs=clone_audio
109
+ )
110
+
111
+ demo.launch()