Spaces:

12labs
/

ind

Runtime error

App Files Files Community

12labs commited on Jan 29

Commit

a11a83f

verified ·

1 Parent(s): fd3011a

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -103

app.py CHANGED Viewed

@@ -1,126 +1,111 @@
 import gradio as gr
-import spaces
-import subprocess
-import uuid
-import os
-import shutil
-# ================= CONFIG =================
-BASE_MODEL = "vibevoice/VibeVoice-7B"
-CHECKPOINT = "tarun7r/vibevoice-hindi-lora"
-VOICES_DIR = "demo/voices"
-OUTPUT_DIR = "outputs"
-os.makedirs(VOICES_DIR, exist_ok=True)
-os.makedirs(OUTPUT_DIR, exist_ok=True)
-# ================= GPU FUNCTION =================
-@spaces.GPU
-def generate_voice(text, voice_file, cfg_scale, seed):
-    if not text or not text.strip():
-        raise gr.Error("❌ Hindi text empty hai")
-    if voice_file is None:
-        raise gr.Error("❌ Reference voice upload karo (WAV)")
-    speaker_name = "user_voice"
-    speaker_path = os.path.join(VOICES_DIR, f"{speaker_name}.wav")
-    # overwrite previous voice
-    shutil.copy(voice_file, speaker_path)
-    out_file = os.path.join(
-        OUTPUT_DIR, f"out_{uuid.uuid4().hex}.wav"
-    )
-    cmd = [
-        "python", "demo/inference_from_file.py",
-        "--model_path", BASE_MODEL,
-        "--checkpoint_path", CHECKPOINT,
-        "--speaker_names", speaker_name,
-        "--txt", text,
-        "--cfg_scale", str(cfg_scale),
-        "--seed", str(seed),
-        "--output_path", out_file
-    ]
-    try:
-        subprocess.run(cmd, check=True)
-    except subprocess.CalledProcessError:
-        raise gr.Error("❌ Generation failed (check logs / GPU)")
-    return out_file
-# ================= UI =================
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
-        # 🇮🇳 Hindi Voice Cloning (VibeVoice)
-        Upload a reference voice and generate **emotional Hindi speech**
-        using the same voice.
-        """
-    )
-    with gr.Row():
-        with gr.Column(scale=1):
-            text = gr.Textbox(
-                label="📝 Hindi Text",
-                placeholder="नमस्ते, आज हम आर्टिफिशियल इंटेलिजेंस के बारे में बात करेंगे...",
-                lines=6
-            )
-            voice = gr.Audio(
-                label="🎙️ Reference Voice (WAV only)",
-                type="filepath",
-                format="wav",
-                sources=["upload"]
-            )
-            cfg = gr.Slider(
-                0.8, 2.0,
-                value=1.3,
-                step=0.1,
-                label="🎭 Expression Strength (CFG Scale)"
-            )
-            seed = gr.Number(
-                value=42,
-                precision=0,
-                label="🎲 Seed"
-            )
-            btn = gr.Button("🚀 Generate Voice")
-        with gr.Column(scale=1):
-            output = gr.Audio(
-                label="🔊 Generated Audio",
-                type="filepath"
-            )
-    btn.click(
-        generate_voice,
-        inputs=[text, voice, cfg, seed],
-        outputs=output,
-        api_name=None
-    )
-    gr.Markdown(
-        """
-        ### ℹ️ Tips
-        - Use clean WAV (10–30 sec)
-        - Emotion reference voice se aata hai
-        - CFG 1.2–1.4 best hota hai
-        - GPU required
         """
     )
-# ================= LAUNCH =================
-demo.launch(
-    share=True,
-    server_name="0.0.0.0"
-)

 import gradio as gr
+from TTS.api import TTS
+# =========================
+# Load Model (CPU / Zero GPU)
+# =========================
+print("Loading ai4bharat Indic TTS model (CPU)...")
+tts = TTS(
+    model_name="ai4bharat/indic-tts-coqui-misc",
+    gpu=False,
+    progress_bar=False
+)
+print("Model loaded successfully.")
+# =========================
+# TTS Function
+# =========================
+def text_to_speech(text):
+    if not text or not text.strip():
+        return None
+    output_path = "tts_output.wav"
+    tts.tts_to_file(
+        text=text,
+        file_path=output_path,
+        language="hi"
+    )
+    return output_path
+# =========================
+# Fake Voice Clone Handler
+# (Explains limitation clearly)
+# =========================
+def voice_clone(text, reference_audio):
+    """
+    NOTE:
+    ai4bharat/indic-tts-coqui-misc
+    DOES NOT support voice cloning.
+    This function falls back to normal TTS.
+    """
+    if not text or not text.strip():
+        return None
+    output_path = "clone_fallback.wav"
+    tts.tts_to_file(
+        text=text,
+        file_path=output_path,
+        language="hi"
+    )
+    return output_path
+# =========================
+# Gradio UI
+# =========================
+with gr.Blocks(title="Hindi TTS (Zero GPU)") as demo:
     gr.Markdown(
         """
+        ## 🗣 Hindi Text to Speech (Zero GPU)
+        **Model:** ai4bharat/indic-tts-coqui-misc
+        **Hardware:** CPU / Zero GPU
+        ⚠️ **Voice cloning is NOT supported by this model.**
+        Reference audio upload is shown only for UI completeness.
         """
     )
+    with gr.Tab("🔊 Text to Speech"):
+        tts_text = gr.Textbox(
+            label="Hindi Text",
+            placeholder="यहाँ ��िंदी टेक्स्ट लिखें...",
+            lines=4
+        )
+        tts_btn = gr.Button("Generate Voice")
+        tts_audio = gr.Audio(type="filepath", label="Output Audio")
+        tts_btn.click(
+            fn=text_to_speech,
+            inputs=tts_text,
+            outputs=tts_audio
+        )
+    with gr.Tab("🎙 Voice Clone (Fallback)"):
+        clone_text = gr.Textbox(
+            label="Hindi Text",
+            placeholder="यहाँ टेक्स्ट लिखें...",
+            lines=4
+        )
+        ref_audio = gr.Audio(
+            label="Upload Reference Voice (Not Used)",
+            type="filepath"
+        )
+        clone_btn = gr.Button("Generate (TTS Fallback)")
+        clone_audio = gr.Audio(type="filepath", label="Generated Audio")
+        clone_btn.click(
+            fn=voice_clone,
+            inputs=[clone_text, ref_audio],
+            outputs=clone_audio
+        )
+demo.launch()