Spaces:

crackuser
/

voiceclone-dev

Running

App Files Files Community

crackuser commited on Sep 10, 2025

Commit

19173b4

verified ·

1 Parent(s): fd59512

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -166

app.py CHANGED Viewed

@@ -1,185 +1,82 @@
 import gradio as gr
 import torch
-from TTS.api import TTS
-import os
-import tempfile
 import soundfile as sf
-# Set environment variable for Coqui TOS
-os.environ["COQUI_TOS_AGREED"] = "1"
-# Initialize device
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Using device: {device}")
-# Initialize TTS model
-try:
-    tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
-    print("✅ XTTS v2 model loaded successfully!")
-except Exception as e:
-    print(f"❌ Error loading model: {e}")
-    tts = None
-def clone_voice(text, reference_audio):
     """
-    Clone voice using XTTS v2 model
     """
-    if not text or not text.strip():
-        return None, "❌ Please enter some text to convert!"
-    if not reference_audio:
-        return None, "❌ Please upload a reference audio file!"
-    if tts is None:
-        return None, "❌ TTS model not loaded properly!"
     try:
-        # Validate text length
-        if len(text) > 500:
-            return None, "❌ Text too long! Please keep it under 500 characters."
-        # Create temporary output file
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
-            output_path = tmp_file.name
-        # Generate cloned voice
-        print(f"🎤 Cloning voice for text: {text[:50]}...")
-        tts.tts_to_file(
-            text=text,
-            speaker_wav=reference_audio,
-            language="en",
-            file_path=output_path
-        )
-        # Verify output file exists and has content
-        if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
-            return output_path, f"✅ Voice cloning successful!\n🎵 Generated audio for: '{text[:100]}{'...' if len(text) > 100 else ''}'"
-        else:
-            return None, "❌ Failed to generate audio file!"
-    except Exception as e:
-        error_msg = str(e)
-        print(f"❌ Voice cloning error: {error_msg}")
-        if "CUDA" in error_msg:
-            return None, "❌ GPU memory error! Try with shorter text or restart the space."
-        elif "audio" in error_msg.lower():
-            return None, "❌ Audio processing error! Please upload a clear WAV or MP3 file."
-        else:
-            return None, f"❌ Error: {error_msg}"
 # Create Gradio interface
-def create_interface():
-    with gr.Blocks(
-        title="🎭 Voice Cloning Studio",
-        theme=gr.themes.Soft(primary_hue="blue", secondary_hue="green")
-    ) as demo:
-        # Header
-        gr.HTML("""
-        <div style="text-align: center; padding: 20px;">
-            <h1 style="color: #2E86AB; margin-bottom: 10px;">🎭 AI Voice Cloning Studio</h1>
-            <p style="color: #666; font-size: 18px;">Clone any voice with advanced AI technology</p>
-        </div>
-        """)
-        with gr.Row():
-            with gr.Column(scale=1):
-                # Input section
-                gr.HTML("<h3 style='color: #2E86AB;'>📤 Upload Reference Voice</h3>")
-                reference_audio = gr.Audio(
-                    label="Reference Audio (10+ seconds recommended)",
-                    type="filepath",
-                    sources=["upload"]
-                )
-                gr.HTML("<h3 style='color: #2E86AB;'>📝 Enter Text to Clone</h3>")
-                text_input = gr.Textbox(
-                    label="Text to Convert",
-                    placeholder="Enter the text you want to speak in the cloned voice...",
-                    lines=4,
-                    max_lines=6
-                )
-                clone_button = gr.Button(
-                    "🎤 Clone Voice",
-                    variant="primary",
-                    size="lg"
-                )
-            with gr.Column(scale=1):
-                # Output section
-                gr.HTML("<h3 style='color: #2E86AB;'>🎵 Cloned Voice Output</h3>")
-                audio_output = gr.Audio(
-                    label="Generated Audio",
-                    type="filepath"
-                )
-                status_output = gr.Textbox(
-                    label="Status",
-                    lines=3,
-                    interactive=False
-                )
-        # Examples section
-        gr.HTML("<h3 style='color: #2E86AB;'>💡 Example Texts</h3>")
-        examples = [
-            "Hello, this is a demonstration of AI voice cloning technology.",
-            "Welcome to the future of artificial intelligence and speech synthesis.",
-            "This voice was generated using advanced machine learning models.",
-            "Experience the power of AI-driven voice generation with natural speech patterns."
-        ]
-        gr.Examples(
-            examples=examples,
-            inputs=text_input,
-            label="Click to try these examples:"
-        )
-        # How it works
-        with gr.Accordion("🔍 How It Works", open=False):
-            gr.Markdown("""
-            ### The Technology
-            1. **🎤 Voice Upload**: Upload 10+ seconds of clear speech
-            2. **🧠 AI Analysis**: XTTS v2 model analyzes voice characteristics
-            3. **📝 Text Input**: Enter the text you want to convert
-            4. **🎵 Voice Synthesis**: Generate speech that matches the uploaded voice
-            ### Tips for Best Results
-            - Use high-quality, clear audio recordings
-            - Ensure 10+ seconds of continuous speech
-            - Avoid background noise and music
-            - Single speaker only in reference audio
-            ### Supported Languages
-            - English (primary)
-            - Spanish, French, German, Italian, Portuguese
-            - Chinese, Japanese, Korean
-            """)
-        # Event handlers
-        clone_button.click(
-            fn=clone_voice,
-            inputs=[text_input, reference_audio],
-            outputs=[audio_output, status_output],
-            show_progress=True
-        )
-        # Auto-generate on Enter
-        text_input.submit(
-            fn=clone_voice,
-            inputs=[text_input, reference_audio],
-            outputs=[audio_output, status_output],
-            show_progress=True
-        )
-    return demo
-# Launch the app
 if __name__ == "__main__":
-    demo = create_interface()
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False
-    )

 import gradio as gr
 import torch
+import numpy as np
 import soundfile as sf
+import tempfile
+import os
+def voice_clone_demo(reference_audio, input_text):
     """
+    Demo voice cloning function
     """
     try:
+        if not reference_audio:
+            return None, "❌ Please upload reference audio!"
+        if not input_text or not input_text.strip():
+            return None, "❌ Please enter text to convert!"
+        # For demo purposes, return the reference audio
+        # In production, this would call actual voice cloning APIs
+        return reference_audio, f"✅ Demo: Would clone '{input_text[:50]}...' using uploaded voice"
+    except Exception as e:
+        return None, f"❌ Error: {str(e)}"
 # Create Gradio interface
+with gr.Blocks(
+    title="🎭 Voice Cloning Studio",
+    theme=gr.themes.Soft(primary_hue="blue")
+) as demo:
+    gr.HTML("""
+    <div style="text-align: center; padding: 20px;">
+        <h1 style="color: #2E86AB;">🎭 AI Voice Cloning Studio</h1>
+        <p style="color: #666; font-size: 18px;">Clone any voice with AI technology</p>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column():
+            gr.HTML("<h3>📤 Upload Reference Voice</h3>")
+            reference_audio = gr.Audio(
+                label="Reference Audio (10+ seconds)",
+                type="filepath"
+            )
+            gr.HTML("<h3>📝 Enter Text</h3>")
+            text_input = gr.Textbox(
+                label="Text to Convert",
+                placeholder="Enter text to speak in the cloned voice...",
+                lines=4
+            )
+            clone_button = gr.Button("🎤 Clone Voice", variant="primary")
+        with gr.Column():
+            gr.HTML("<h3>🎵 Output</h3>")
+            audio_output = gr.Audio(label="Cloned Voice")
+            status_output = gr.Textbox(label="Status", interactive=False)
+    # Examples
+    examples = [
+        "Hello, this is a demonstration of voice cloning technology.",
+        "Welcome to the future of AI-powered speech synthesis.",
+        "This voice was generated using advanced machine learning."
+    ]
+    gr.Examples(
+        examples=examples,
+        inputs=text_input
+    )
+    # Event handler
+    clone_button.click(
+        fn=voice_clone_demo,
+        inputs=[reference_audio, text_input],
+        outputs=[audio_output, status_output]
+    )
 if __name__ == "__main__":
+    demo.launch()