Spaces:

DevNumb
/

TextTOVoiceConv

Sleeping

App Files Files Community

DevNumb commited on Dec 5, 2025

Commit

036af98

verified ·

1 Parent(s): 5c51f1f

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -530

app.py CHANGED Viewed

@@ -1,568 +1,99 @@
-import asyncio
-import platform
-import sys
-# Fix for asyncio warnings on Hugging Face Spaces
-if sys.platform.startswith("linux") or sys.platform.startswith("darwin"):
-    try:
-        # Clean up any existing loops
-        try:
-            loop = asyncio.get_event_loop()
-            if loop.is_running():
-                loop.close()
-        except:
-            pass
-        # Set default policy
-        asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
-        # Create a new event loop
-        asyncio.set_event_loop(asyncio.new_event_loop())
-    except Exception:
-        pass  # Ignore any errors if this fails
-# Now import other modules
 import gradio as gr
-import numpy as np
 import tempfile
-import time
-import scipy.io.wavfile
 import warnings
 warnings.filterwarnings("ignore")
-# Clean white theme CSS with black text
 css = """
 <style>
-/* WHITE BACKGROUND THEME */
-body, .gradio-container {
     background: white !important;
-    color: #333333 !important;
-    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
-    margin: 0;
     padding: 20px;
-    min-height: 100vh;
 }
-/* Header */
-.header {
-    text-align: center;
-    padding: 2.5rem;
-    background: linear-gradient(135deg, #4F46E5 0%, #7C3AED 100%);
-    border-radius: 16px;
-    margin-bottom: 2rem;
-    color: white;
-    box-shadow: 0 4px 20px rgba(79, 70, 229, 0.2);
-}
-.header h1 {
-    font-size: 2.5em;
-    margin: 0 0 0.5rem 0;
-    font-weight: 700;
-    letter-spacing: -0.5px;
-}
-.header p {
-    font-size: 1.1em;
-    opacity: 0.95;
-    margin: 0;
-}
-/* Cards */
-.card {
-    background: white;
-    border: 1px solid #E5E7EB;
-    border-radius: 16px;
-    padding: 1.5rem;
-    margin-bottom: 1.5rem;
-    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
-}
-/* Text Input - BLACK TEXT ON WHITE */
 textarea {
     background: white !important;
-    border: 2px solid #D1D5DB !important;
-    border-radius: 12px !important;
-    color: #000000 !important;  /* Pure black text */
-    padding: 1rem !important;
     font-size: 16px !important;
-    font-family: 'SF Mono', Monaco, 'Courier New', monospace !important;
     width: 100% !important;
-    min-height: 120px !important;
-    line-height: 1.5 !important;
 }
-textarea:focus {
-    border-color: #4F46E5 !important;
-    box-shadow: 0 0 0 3px rgba(79, 70, 229, 0.1) !important;
-    outline: none !important;
-    color: #000000 !important;
-}
-textarea::placeholder {
-    color: #666666 !important;
-    opacity: 0.8 !important;
-}
-/* Buttons */
-.btn-primary {
-    background: linear-gradient(135deg, #4F46E5 0%, #7C3AED 100%) !important;
-    border: none !important;
     color: white !important;
-    padding: 0.75rem 1.5rem !important;
-    border-radius: 10px !important;
-    font-weight: 600 !important;
-    font-size: 1rem !important;
-    cursor: pointer !important;
-    margin: 0.5rem !important;
-}
-.btn-primary:hover {
-    transform: translateY(-2px) !important;
-    box-shadow: 0 4px 12px rgba(79, 70, 229, 0.3) !important;
-}
-.btn-secondary {
-    background: white !important;
-    border: 2px solid #D1D5DB !important;
-    color: #374151 !important;
-    padding: 0.75rem 1.5rem !important;
-    border-radius: 10px !important;
-    font-weight: 500 !important;
-    cursor: pointer !important;
-    margin: 0.5rem !important;
-}
-.btn-secondary:hover {
-    border-color: #4F46E5 !important;
-    color: #4F46E5 !important;
-    background: #F5F3FF !important;
-}
-/* Slider */
-input[type="range"] {
-    width: 100% !important;
-    height: 6px !important;
-    background: #E5E7EB !important;
-    border-radius: 10px !important;
-    outline: none !important;
-    margin: 1rem 0 !important;
-}
-input[type="range"]::-webkit-slider-thumb {
-    width: 20px !important;
-    height: 20px !important;
-    background: #4F46E5 !important;
-    border: 3px solid white !important;
-    border-radius: 50% !important;
-    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2) !important;
-    cursor: pointer !important;
-}
-/* Audio Player */
-.audio-player {
-    background: #F9FAFB !important;
-    border-radius: 12px !important;
-    padding: 1.5rem !important;
-    margin-top: 1rem !important;
-    border: 1px solid #E5E7EB !important;
-}
-.audio-player audio {
-    width: 100% !important;
-    border-radius: 8px !important;
-}
-/* Stats */
-.stats-grid {
-    display: grid;
-    grid-template-columns: repeat(2, 1fr);
-    gap: 1rem;
-    margin-top: 1rem;
-}
-.stat-box {
-    background: white;
-    border: 1px solid #E5E7EB;
-    border-radius: 12px;
-    padding: 1rem;
-    text-align: center;
-}
-.stat-value {
-    font-size: 1.8em;
-    font-weight: 700;
-    color: #4F46E5 !important;
-    margin-bottom: 0.25rem;
-}
-.stat-label {
-    color: #6B7280;
-    font-size: 0.8em;
-    text-transform: uppercase;
-    letter-spacing: 0.5px;
-    font-weight: 600;
-}
-/* Status messages */
-.status-success {
-    background: #DCFCE7 !important;
-    border: 1px solid #86EFAC !important;
-    border-left: 4px solid #10B981 !important;
-    color: #065F46 !important;
-    padding: 1rem !important;
-    border-radius: 8px !important;
-    margin: 1rem 0 !important;
-}
-.status-error {
-    background: #FEE2E2 !important;
-    border: 1px solid #FCA5A5 !important;
-    border-left: 4px solid #EF4444 !important;
-    color: #991B1B !important;
-    padding: 1rem !important;
-    border-radius: 8px !important;
-    margin: 1rem 0 !important;
-}
-/* Footer */
-.footer {
-    text-align: center;
-    padding: 2rem;
-    margin-top: 2rem;
-    color: #6B7280;
-    border-top: 1px solid #E5E7EB;
-}
-/* Grid layout */
-.container {
-    max-width: 1000px;
-    margin: 0 auto;
-}
-.row {
-    display: flex;
-    gap: 1.5rem;
-    margin-bottom: 1.5rem;
-}
-.col {
-    flex: 1;
-}
-.col-2 {
-    flex: 2;
-}
-/* Markdown content - BLACK TEXT */
-.markdown {
-    color: #374151 !important;
-    line-height: 1.6 !important;
-}
-.markdown h1, .markdown h2, .markdown h3 {
-    color: #111827 !important;
-    font-weight: 600 !important;
-}
-.markdown p {
-    color: #4B5563 !important;
 }
 </style>
 """
-# Initialize stats
-class Stats:
-    def __init__(self):
-        self.generations = 0
-        self.characters = 0
-        self.start_time = time.time()
-    def add(self, text):
-        self.generations += 1
-        self.characters += len(text)
-    def get_stats(self):
-        uptime = time.time() - self.start_time
-        hours = int(uptime // 3600)
-        minutes = int((uptime % 3600) // 60)
-        return {
-            'generations': self.generations,
-            'characters': self.characters,
-            'avg_length': self.characters // max(self.generations, 1),
-            'uptime': f"{hours}h {minutes}m"
-        }
-stats = Stats()
-def create_speech_audio(text, speed=1.0):
-    """Create speech audio from text"""
-    if not text or not text.strip():
         return None
-    # Create audio based on text
-    duration = min(len(text) * 0.05, 5)  # Up to 5 seconds
-    sampling_rate = 24000
-    # Generate time array
-    t = np.linspace(0, duration, int(sampling_rate * duration))
-    # Create base tone
-    base_freq = 220
-    audio = np.zeros_like(t)
-    # Add harmonics based on text
-    for i, char in enumerate(text[:20]):
-        freq = base_freq + (ord(char) % 300)
-        amplitude = 0.5 / (i + 1)
-        audio += amplitude * np.sin(2 * np.pi * freq * t * (i + 1) / 10)
-    # Apply envelope
-    envelope = np.exp(-2 * t) * (1 - np.exp(-10 * t))
-    audio *= envelope
-    # Normalize
-    max_val = np.max(np.abs(audio))
-    if max_val > 0:
-        audio = audio / max_val * 0.8
-    # Adjust speed
-    if speed != 1.0:
-        from scipy import signal
-        new_length = int(len(audio) / speed)
-        audio = signal.resample(audio, new_length)
-    # Save to file
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
-        scipy.io.wavfile.write(f.name, sampling_rate, audio.astype(np.float32))
-        return f.name
-def generate_tts(text, speed=1.0, emotion="neutral"):
-    """Generate TTS with status message"""
-    if not text or not text.strip():
-        return None, "⚠️ Please enter some text first", get_stats_html()
-    # Update stats
-    stats.add(text)
     try:
-        audio_file = create_speech_audio(text, speed)
-        if audio_file:
-            duration = len(text) * 0.05 / speed
-            duration = min(duration, 5)
-            message = f"""
-            <div class="status-success">
-                <div style="font-weight: 600; margin-bottom: 0.5rem;">✅ Speech Generated Successfully!</div>
-                <div style="color: #065F46;">
-                    <strong>{len(text)} characters</strong> •
-                    <strong>{duration:.1f}s duration</strong> •
-                    Speed: <strong>{speed}x</strong>
-                </div>
-            </div>
-            """
-        else:
-            message = """
-            <div class="status-error">
-                <div style="font-weight: 600;">❌ Failed to generate audio</div>
-                <div>Please try again with different text.</div>
-            </div>
-            """
-        return audio_file, message, get_stats_html()
     except Exception as e:
-        error_msg = f"""
-        <div class="status-error">
-            <div style="font-weight: 600;">❌ Error occurred</div>
-            <div>{str(e)[:100]}</div>
-        </div>
-        """
-        return None, error_msg, get_stats_html()
-def get_stats_html():
-    """Generate HTML for statistics"""
-    data = stats.get_stats()
-    return f"""
-    <div class="stats-grid">
-        <div class="stat-box">
-            <div class="stat-value">{data['generations']}</div>
-            <div class="stat-label">Generations</div>
-        </div>
-        <div class="stat-box">
-            <div class="stat-value">{data['characters']}</div>
-            <div class="stat-label">Characters</div>
-        </div>
-        <div class="stat-box">
-            <div class="stat-value">{data['avg_length']}</div>
-            <div class="stat-label">Avg Length</div>
-        </div>
-        <div class="stat-box">
-            <div class="stat-value">{data['uptime']}</div>
-            <div class="stat-label">Uptime</div>
-        </div>
-    </div>
-    """
-# Create the interface
-with gr.Blocks() as demo:
-    # Add CSS
-    gr.HTML(css)
-    # Main container
-    with gr.Column(elem_id="container"):
-        # Header
-        gr.HTML("""
-        <div class="header">
-            <h1>🎵 VibeVoice TTS</h1>
-            <p>Text-to-Speech with Clean White Interface</p>
-        </div>
-        """)
-        # Main content row
-        with gr.Row(elem_classes="row"):
-            # Left column - Input
-            with gr.Column(scale=2, elem_classes="col-2"):
-                with gr.Column(elem_classes="card"):
-                    gr.Markdown("### 📝 Input Text")
-                    text_input = gr.Textbox(
-                        label="",
-                        placeholder="Type or paste your text here... (Black text on white background)",
-                        lines=5
-                    )
-                    gr.Markdown("### ⚙️ Settings")
-                    with gr.Row():
-                        emotion = gr.Dropdown(
-                            label="Voice Style",
-                            choices=["Neutral", "Happy", "Calm"],
-                            value="Neutral"
-                        )
-                        speed = gr.Slider(
-                            minimum=0.5,
-                            maximum=2.0,
-                            value=1.0,
-                            step=0.1,
-                            label="Speaking Speed"
-                        )
-                    with gr.Row():
-                        generate_btn = gr.Button(
-                            "✨ Generate Speech",
-                            variant="primary",
-                            elem_classes="btn-primary"
-                        )
-                        clear_btn = gr.Button(
-                            "Clear",
-                            variant="secondary",
-                            elem_classes="btn-secondary"
-                        )
-            # Right column - Output
-            with gr.Column(scale=1, elem_classes="col"):
-                with gr.Column(elem_classes="card"):
-                    gr.Markdown("### 🎧 Audio Output")
-                    with gr.Column(elem_classes="audio-player"):
-                        audio_output = gr.Audio(
-                            type="filepath",
-                            label=""
-                        )
-                    status_display = gr.HTML(
-                        """<div style="text-align: center; color: #6B7280; padding: 1rem;">
-                           Ready. Enter text and click Generate.
-                           </div>"""
-                    )
-        # Statistics
-        with gr.Column(elem_classes="card"):
-            gr.Markdown("### 📊 Statistics")
-            stats_display = gr.HTML(get_stats_html())
-        # Examples
-        with gr.Column(elem_classes="card"):
-            gr.Markdown("### 💡 Examples")
-            gr.Examples(
-                examples=[
-                    ["Hello! Welcome to the text-to-speech system."],
-                    ["The quick brown fox jumps over the lazy dog."],
-                    ["This is a test of the audio generation."],
-                    ["The weather is beautiful today."]
-                ],
-                inputs=text_input,
-                label="Click to try:"
-            )
-        # About section
-        with gr.Column(elem_classes="card"):
-            gr.Markdown("### ℹ️ About")
-            gr.Markdown("""
-            **VibeVoice TTS** converts text to audio.
-            **Features:**
-            - 🎵 Audio generation
-            - ⚡ Fast processing
-            - 🎭 Voice styles
-            - ⚙️ Speed control
-            **Note:** Text input shows **black text on white background**.
-            """)
-    # Event handlers
-    def process_text(text, speed_val, emotion_val):
-        """Process text to generate speech"""
-        if not text or not text.strip():
-            return None, """
-            <div class="status-error">
-                <div style="font-weight: 600;">⚠️ Please enter text</div>
-            </div>
-            """, get_stats_html()
-        return generate_tts(text, speed_val, emotion_val)
-    def clear_all():
-        """Clear all inputs"""
-        return "", None, """
-        <div style="text-align: center; color: #6B7280; padding: 1rem;">
-            Cleared. Ready for new text.
-        </div>
-        """, get_stats_html()
-    # Connect buttons
-    generate_btn.click(
-        fn=process_text,
-        inputs=[text_input, speed, emotion],
-        outputs=[audio_output, status_display, stats_display]
     )
-    clear_btn.click(
-        fn=clear_all,
-        inputs=[],
-        outputs=[text_input, audio_output, status_display, stats_display]
-    )
-# Launch the app - SIMPLIFIED for Gradio 3.x
 if __name__ == "__main__":
-    # Clean up any existing event loops before starting
-    try:
-        import asyncio
-        loop = asyncio.get_event_loop()
-        if loop.is_running():
-            loop.close()
-    except:
-        pass
-    # Launch with minimal parameters
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True,
-        quiet=True,
-        debug=False
-    )

 import gradio as gr
 import tempfile
+import os
 import warnings
 warnings.filterwarnings("ignore")
+# CSS for white background with black text
 css = """
 <style>
+body {
     background: white !important;
     padding: 20px;
 }
 textarea {
     background: white !important;
+    color: black !important;
+    border: 2px solid #4CAF50 !important;
+    border-radius: 10px !important;
+    padding: 15px !important;
     font-size: 16px !important;
     width: 100% !important;
 }
+button {
+    background: #4CAF50 !important;
     color: white !important;
+    border: none !important;
+    padding: 10px 20px !important;
+    border-radius: 5px !important;
 }
 </style>
 """
+def text_to_speech_actual(text):
+    """Use actual TTS engine"""
+    if not text:
         return None
     try:
+        # Try using gTTS (Google Text-to-Speech) - works well and is free
+        from gtts import gTTS
+        import pygame
+        # Create temporary file
+        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
+            temp_file = f.name
+        # Generate speech
+        tts = gTTS(text=text, lang='en', slow=False)
+        tts.save(temp_file)
+        return temp_file
     except Exception as e:
+        print(f"TTS Error: {e}")
+        return None
+# Simple interface
+with gr.Blocks(css=css) as demo:
+    gr.Markdown("# 🎵 Actual Text-to-Speech")
+    gr.Markdown("This uses real TTS to convert text to speech")
+    text_input = gr.Textbox(
+        label="Enter Text",
+        placeholder="Type your text here...",
+        lines=4
+    )
+    with gr.Row():
+        generate_btn = gr.Button("Generate Speech")
+        clear_btn = gr.Button("Clear")
+    audio_output = gr.Audio(type="filepath", label="Speech Output")
+    status = gr.Markdown("Ready...")
+    gr.Examples(
+        examples=[
+            ["Hello! This is actual text-to-speech conversion."],
+            ["Welcome to the speech synthesis system."],
+            ["The quick brown fox jumps over the lazy dog."]
+        ],
+        inputs=text_input
     )
+    def process(text):
+        audio = text_to_speech_actual(text)
+        if audio:
+            return audio, "✅ Speech generated successfully!"
+        return None, "❌ Failed to generate speech"
+    def clear():
+        return "", None, "Cleared"
+    generate_btn.click(process, text_input, [audio_output, status])
+    clear_btn.click(clear, [], [text_input, audio_output, status])
 if __name__ == "__main__":
+    demo.launch()