callentrin_test

Sleeping

App Files Files Community

rishidahiya commited on Nov 11, 2025

Commit

2d146cc

verified ·

1 Parent(s): 80f2c88

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -28

app.py CHANGED Viewed

@@ -5,24 +5,40 @@ from vocoder import inference as vocoder_inference
 import librosa
 import soundfile as sf
 import numpy as np
-from io import BytesIO
 import os
 # Load models at startup
 print("Loading models...")
-encoder_inference.load_model("saved_models/encoder.pt")
-synthesizer = Synthesizer("saved_models/synthesizer.pt")
-vocoder_inference.load_model("saved_models/vocoder.pt")
-print("✓ Models loaded!")
 def clone_voice(voice_sample, text):
     """Clone voice and generate speech"""
     try:
         if voice_sample is None:
-            return None, "Error: No voice sample provided"
         if not text or len(text.strip()) == 0:
-            return None, "Error: No text provided"
         # Extract audio data and sample rate
         if isinstance(voice_sample, tuple):
@@ -53,39 +69,41 @@ def clone_voice(voice_sample, text):
         wav_generated = vocoder_inference.vocoder(mels[0])
         print(f"Generated audio: {wav_generated.shape}")
-        return (22050, (wav_generated * 32768).astype(np.int16)), "✓ Success!"
     except Exception as e:
         print(f"Error: {e}")
         import traceback
         traceback.print_exc()
-        return None, f"Error: {str(e)}"
 # Create Gradio interface
-with gr.Blocks(title="Voice Cloning - Real-Time Test") as demo:
-    gr.Markdown("# 🎤 Voice Cloning Test")
-    gr.Markdown("Record your voice, enter text, and hear it synthesized in your voice!")
     with gr.Row():
         with gr.Column():
-            gr.Markdown("### Step 1: Record Your Voice")
             voice_input = gr.Audio(
-                label="Record or Upload Voice Sample (5-10 seconds)",
                 type="numpy",
                 sources=["microphone", "upload"]
             )
-            gr.Markdown("### Step 2: Enter Text")
             text_input = gr.Textbox(
-                label="Text to Synthesize (Hindi or Kannada)",
                 placeholder="नमस्ते, यह एक परीक्षण है",
                 lines=3
             )
         with gr.Column():
-            gr.Markdown("### Step 3: Generated Speech")
-            audio_output = gr.Audio(label="Cloned Voice Output", type="numpy")
-            status_output = gr.Textbox(label="Status", interactive=False)
     clone_button = gr.Button("🎯 Clone Voice & Generate Speech", variant="primary", size="lg")
     clone_button.click(
@@ -95,18 +113,27 @@ with gr.Blocks(title="Voice Cloning - Real-Time Test") as demo:
     )
     gr.Markdown("""
-    ### Instructions:
-    1. **Record your voice** using the microphone (5-10 seconds in Hindi/Kannada) OR upload a WAV/OGG file
-    2. **Enter text** you want to generate in your voice (Hindi or Kannada)
     3. **Click "Clone Voice & Generate Speech"**
     4. **Wait** (10-30 seconds on CPU) and hear the result!
-    ### Tips:
-    - Clearer voice samples = better results
-    - Longer samples (10 seconds) = better voice cloning
-    - Same language as input voice works best
-    - Be patient - CPU processing takes time!
     """)
 if __name__ == "__main__":
-    demo.launch(share=True)

 import librosa
 import soundfile as sf
 import numpy as np
 import os
 # Load models at startup
 print("Loading models...")
+try:
+    encoder_inference.load_model("saved_models/encoder.pt")
+    print("✓ Encoder loaded!")
+except Exception as e:
+    print(f"Encoder load error: {e}")
+try:
+    synthesizer = Synthesizer("saved_models/synthesizer.pt")
+    print("✓ Synthesizer loaded!")
+except Exception as e:
+    print(f"Synthesizer load error: {e}")
+try:
+    vocoder_inference.load_model("saved_models/vocoder.pt")
+    print("✓ Vocoder loaded!")
+except Exception as e:
+    print(f"Vocoder load error: {e}")
+print("Ready for voice cloning!")
 def clone_voice(voice_sample, text):
     """Clone voice and generate speech"""
     try:
         if voice_sample is None:
+            return None, "❌ Error: No voice sample provided"
         if not text or len(text.strip()) == 0:
+            return None, "❌ Error: No text provided"
+        print(f"Processing: text='{text}', voice_sample={voice_sample}")
         # Extract audio data and sample rate
         if isinstance(voice_sample, tuple):
         wav_generated = vocoder_inference.vocoder(mels[0])
         print(f"Generated audio: {wav_generated.shape}")
+        return (22050, (wav_generated * 32768).astype(np.int16)), "✅ Success! Your voice has been cloned!"
     except Exception as e:
         print(f"Error: {e}")
         import traceback
         traceback.print_exc()
+        return None, f"❌ Error: {str(e)}"
 # Create Gradio interface
+with gr.Blocks(title="Voice Cloning - Real-Time Test", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎤 Real-Time Voice Cloning")
+    gr.Markdown("**Record your voice, enter text, and hear it synthesized in your voice!**")
+    gr.Markdown("---")
     with gr.Row():
         with gr.Column():
+            gr.Markdown("### 📝 Step 1: Record Your Voice")
+            gr.Markdown("Record 5-10 seconds of clear audio in **Hindi or Kannada**")
             voice_input = gr.Audio(
+                label="🎙️ Voice Sample (Microphone or Upload)",
                 type="numpy",
                 sources=["microphone", "upload"]
             )
+            gr.Markdown("### ✍️ Step 2: Enter Text")
             text_input = gr.Textbox(
+                label="📄 Text to Synthesize (Hindi or Kannada)",
                 placeholder="नमस्ते, यह एक परीक्षण है",
                 lines=3
             )
         with gr.Column():
+            gr.Markdown("### 🔊 Step 3: Generated Speech")
+            audio_output = gr.Audio(label="🎧 Cloned Voice Output", type="numpy")
+            status_output = gr.Textbox(label="📊 Status", interactive=False, lines=2)
     clone_button = gr.Button("🎯 Clone Voice & Generate Speech", variant="primary", size="lg")
     clone_button.click(
     )
     gr.Markdown("""
+    ---
+    ### 📋 Instructions:
+    1. **Record your voice** using the microphone (5-10 seconds) OR upload a WAV/OGG file
+       - Speak clearly in Hindi or Kannada
+       - Avoid background noise
+    2. **Enter text** you want to generate in your voice (same language as recording)
     3. **Click "Clone Voice & Generate Speech"**
     4. **Wait** (10-30 seconds on CPU) and hear the result!
+    ### 💡 Tips for Best Results:
+    - **Clear voice samples** = better results
+    - **10+ seconds** = better voice cloning accuracy
+    - **Same language** as input voice works best
+    - **Patience** - CPU processing takes time (GPU would be 2-3x faster)
+    - **Quality audio** - minimize background noise
+    ### ⚠️ Limitations:
+    - CPU processing is slower (~10-30 seconds per request)
+    - Long texts (500+ characters) may timeout
+    - Best results with 10+ second voice samples
     """)
 if __name__ == "__main__":
+    demo.launch(share=False, server_name="0.0.0.0", server_port=7860)