Spaces:

crackuser
/

voiceclone-dev

Sleeping

App Files Files Community

crackuser commited on Sep 10, 2025

Commit

986aa2a

verified ·

1 Parent(s): 1bdf03a

Update app.py

Browse files

Files changed (1) hide show

app.py +169 -49

app.py CHANGED Viewed

@@ -1,65 +1,185 @@
 import gradio as gr
 import torch
-import torchaudio
-import numpy as np
-from transformers import AutoModel, AutoTokenizer
-import tempfile
 import os
-def clone_voice(reference_audio, input_text):
-    """Voice cloning function"""
     try:
-        # Your voice cloning logic here
-        # This is a basic template - replace with your actual model
-        # Load your model (replace with actual model loading)
-        # model = AutoModel.from_pretrained("your-model-name")
-        # Process the reference audio
-        if reference_audio is None:
-            return None, "Please upload reference audio"
-        # Simple echo for testing (replace with actual voice cloning)
-        # In a real implementation, you'd:
-        # 1. Process reference_audio to extract voice features
-        # 2. Generate speech from input_text using those features
-        # 3. Return the generated audio
-        # For now, return the reference audio as a test
-        return reference_audio, "Voice cloning completed (test mode)"
     except Exception as e:
-        return None, f"Error: {str(e)}"
 # Create Gradio interface
-with gr.Blocks(title="Voice Cloning") as app:
-    gr.Markdown("# 🎭 AI Voice Cloning")
-    gr.Markdown("Upload reference audio and enter text to clone the voice.")
-    with gr.Row():
-        with gr.Column():
-            reference_audio = gr.Audio(
-                label="Reference Voice (10+ seconds)",
-                type="filepath"
-            )
-            input_text = gr.Textbox(
-                label="Text to Convert",
-                placeholder="Enter the text you want to speak in the cloned voice...",
-                lines=3
-            )
-            clone_btn = gr.Button("🎤 Clone Voice", variant="primary")
-        with gr.Column():
-            output_audio = gr.Audio(label="Cloned Voice Output")
-            status_text = gr.Textbox(label="Status", interactive=False)
-    # Connect the function
-    clone_btn.click(
-        fn=clone_voice,
-        inputs=[reference_audio, input_text],
-        outputs=[output_audio, status_text]
-    )
 # Launch the app
 if __name__ == "__main__":
-    app.launch()

 import gradio as gr
 import torch
+from TTS.api import TTS
 import os
+import tempfile
+import soundfile as sf
+# Set environment variable for Coqui TOS
+os.environ["COQUI_TOS_AGREED"] = "1"
+# Initialize device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# Initialize TTS model
+try:
+    tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
+    print("✅ XTTS v2 model loaded successfully!")
+except Exception as e:
+    print(f"❌ Error loading model: {e}")
+    tts = None
+def clone_voice(text, reference_audio):
+    """
+    Clone voice using XTTS v2 model
+    """
+    if not text or not text.strip():
+        return None, "❌ Please enter some text to convert!"
+    if not reference_audio:
+        return None, "❌ Please upload a reference audio file!"
+    if tts is None:
+        return None, "❌ TTS model not loaded properly!"
     try:
+        # Validate text length
+        if len(text) > 500:
+            return None, "❌ Text too long! Please keep it under 500 characters."
+        # Create temporary output file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+            output_path = tmp_file.name
+        # Generate cloned voice
+        print(f"🎤 Cloning voice for text: {text[:50]}...")
+        tts.tts_to_file(
+            text=text,
+            speaker_wav=reference_audio,
+            language="en",
+            file_path=output_path
+        )
+        # Verify output file exists and has content
+        if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
+            return output_path, f"✅ Voice cloning successful!\n🎵 Generated audio for: '{text[:100]}{'...' if len(text) > 100 else ''}'"
+        else:
+            return None, "❌ Failed to generate audio file!"
     except Exception as e:
+        error_msg = str(e)
+        print(f"❌ Voice cloning error: {error_msg}")
+        if "CUDA" in error_msg:
+            return None, "❌ GPU memory error! Try with shorter text or restart the space."
+        elif "audio" in error_msg.lower():
+            return None, "❌ Audio processing error! Please upload a clear WAV or MP3 file."
+        else:
+            return None, f"❌ Error: {error_msg}"
 # Create Gradio interface
+def create_interface():
+    with gr.Blocks(
+        title="🎭 Voice Cloning Studio",
+        theme=gr.themes.Soft(primary_hue="blue", secondary_hue="green")
+    ) as demo:
+        # Header
+        gr.HTML("""
+        <div style="text-align: center; padding: 20px;">
+            <h1 style="color: #2E86AB; margin-bottom: 10px;">🎭 AI Voice Cloning Studio</h1>
+            <p style="color: #666; font-size: 18px;">Clone any voice with advanced AI technology</p>
+        </div>
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                # Input section
+                gr.HTML("<h3 style='color: #2E86AB;'>📤 Upload Reference Voice</h3>")
+                reference_audio = gr.Audio(
+                    label="Reference Audio (10+ seconds recommended)",
+                    type="filepath",
+                    sources=["upload"]
+                )
+                gr.HTML("<h3 style='color: #2E86AB;'>📝 Enter Text to Clone</h3>")
+                text_input = gr.Textbox(
+                    label="Text to Convert",
+                    placeholder="Enter the text you want to speak in the cloned voice...",
+                    lines=4,
+                    max_lines=6
+                )
+                clone_button = gr.Button(
+                    "🎤 Clone Voice",
+                    variant="primary",
+                    size="lg"
+                )
+            with gr.Column(scale=1):
+                # Output section
+                gr.HTML("<h3 style='color: #2E86AB;'>🎵 Cloned Voice Output</h3>")
+                audio_output = gr.Audio(
+                    label="Generated Audio",
+                    type="filepath"
+                )
+                status_output = gr.Textbox(
+                    label="Status",
+                    lines=3,
+                    interactive=False
+                )
+        # Examples section
+        gr.HTML("<h3 style='color: #2E86AB;'>💡 Example Texts</h3>")
+        examples = [
+            "Hello, this is a demonstration of AI voice cloning technology.",
+            "Welcome to the future of artificial intelligence and speech synthesis.",
+            "This voice was generated using advanced machine learning models.",
+            "Experience the power of AI-driven voice generation with natural speech patterns."
+        ]
+        gr.Examples(
+            examples=examples,
+            inputs=text_input,
+            label="Click to try these examples:"
+        )
+        # How it works
+        with gr.Accordion("🔍 How It Works", open=False):
+            gr.Markdown("""
+            ### The Technology
+            1. **🎤 Voice Upload**: Upload 10+ seconds of clear speech
+            2. **🧠 AI Analysis**: XTTS v2 model analyzes voice characteristics
+            3. **📝 Text Input**: Enter the text you want to convert
+            4. **🎵 Voice Synthesis**: Generate speech that matches the uploaded voice
+            ### Tips for Best Results
+            - Use high-quality, clear audio recordings
+            - Ensure 10+ seconds of continuous speech
+            - Avoid background noise and music
+            - Single speaker only in reference audio
+            ### Supported Languages
+            - English (primary)
+            - Spanish, French, German, Italian, Portuguese
+            - Chinese, Japanese, Korean
+            """)
+        # Event handlers
+        clone_button.click(
+            fn=clone_voice,
+            inputs=[text_input, reference_audio],
+            outputs=[audio_output, status_output],
+            show_progress=True
+        )
+        # Auto-generate on Enter
+        text_input.submit(
+            fn=clone_voice,
+            inputs=[text_input, reference_audio],
+            outputs=[audio_output, status_output],
+            show_progress=True
+        )
+    return demo
 # Launch the app
 if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )