Spaces:

DevNumb
/

TextTOVoiceConv

Sleeping

App Files Files Community

DevNumb commited on Dec 5, 2025

Commit

0c0737d

verified ·

1 Parent(s): 4ab70db

Create app.py

Browse files

Files changed (1) hide show

app.py +779 -0

app.py ADDED Viewed

	@@ -0,0 +1,779 @@

+import gradio as gr
+import torch
+import numpy as np
+import scipy.io.wavfile
+import tempfile
+import os
+import time
+import plotly.graph_objects as go
+from datetime import datetime
+from PIL import Image
+import io
+import base64
+from transformers import VibeVoiceStreamingForConditionalGenerationInference, AutoProcessor
+import warnings
+warnings.filterwarnings("ignore")
+# Custom CSS for beautiful UI
+custom_css = """
+/* Main Theme Variables */
+:root {
+    --primary-gradient: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    --secondary-gradient: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
+    --accent-color: #8a2be2;
+    --dark-bg: #0f172a;
+    --card-bg: rgba(255, 255, 255, 0.1);
+    --glass-effect: backdrop-filter: blur(10px);
+}
+/* Custom Scrollbar */
+::-webkit-scrollbar {
+    width: 10px;
+}
+::-webkit-scrollbar-track {
+    background: rgba(255, 255, 255, 0.1);
+    border-radius: 10px;
+}
+::-webkit-scrollbar-thumb {
+    background: var(--primary-gradient);
+    border-radius: 10px;
+}
+/* Header Animation */
+@keyframes float {
+    0%, 100% { transform: translateY(0px); }
+    50% { transform: translateY(-10px); }
+}
+@keyframes pulse-glow {
+    0%, 100% { box-shadow: 0 0 20px rgba(102, 126, 234, 0.5); }
+    50% { box-shadow: 0 0 40px rgba(102, 126, 234, 0.8); }
+}
+@keyframes shimmer {
+    0% { background-position: -200% center; }
+    100% { background-position: 200% center; }
+}
+/* Header Styles */
+.header-container {
+    text-align: center;
+    padding: 2rem;
+    background: var(--primary-gradient);
+    border-radius: 20px;
+    margin-bottom: 2rem;
+    position: relative;
+    overflow: hidden;
+}
+.header-container::before {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    bottom: 0;
+    background: linear-gradient(45deg, transparent 30%, rgba(255,255,255,0.1) 50%, transparent 70%);
+    animation: shimmer 3s infinite linear;
+    background-size: 200% auto;
+}
+.header-title {
+    font-size: 3.5em !important;
+    background: linear-gradient(45deg, #fff, #f0f0f0);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    margin-bottom: 0.5rem !important;
+    font-weight: 800 !important;
+    text-shadow: 0 2px 10px rgba(0,0,0,0.2);
+    animation: float 3s ease-in-out infinite;
+}
+.header-subtitle {
+    font-size: 1.2em !important;
+    color: rgba(255, 255, 255, 0.9) !important;
+    margin-bottom: 1rem !important;
+}
+/* Card Styles */
+.glass-card {
+    background: rgba(255, 255, 255, 0.1) !important;
+    backdrop-filter: blur(10px) !important;
+    border: 1px solid rgba(255, 255, 255, 0.2) !important;
+    border-radius: 20px !important;
+    padding: 2rem !important;
+    transition: all 0.3s ease !important;
+}
+.glass-card:hover {
+    transform: translateY(-5px) !important;
+    box-shadow: 0 20px 40px rgba(0, 0, 0, 0.3) !important;
+}
+/* Button Styles */
+.glow-button {
+    background: var(--primary-gradient) !important;
+    border: none !important;
+    color: white !important;
+    padding: 1rem 2rem !important;
+    border-radius: 50px !important;
+    font-size: 1.1em !important;
+    font-weight: 600 !important;
+    transition: all 0.3s ease !important;
+    position: relative !important;
+    overflow: hidden !important;
+    animation: pulse-glow 2s infinite !important;
+}
+.glow-button:hover {
+    transform: scale(1.05) !important;
+    box-shadow: 0 10px 30px rgba(102, 126, 234, 0.6) !important;
+}
+.glow-button::after {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: -100%;
+    width: 100%;
+    height: 100%;
+    background: linear-gradient(90deg, transparent, rgba(255,255,255,0.2), transparent);
+    transition: 0.5s;
+}
+.glow-button:hover::after {
+    left: 100%;
+}
+.secondary-button {
+    background: rgba(255, 255, 255, 0.1) !important;
+    border: 2px solid rgba(255, 255, 255, 0.3) !important;
+    color: white !important;
+    padding: 0.8rem 1.5rem !important;
+    border-radius: 50px !important;
+    font-size: 1em !important;
+    transition: all 0.3s ease !important;
+}
+.secondary-button:hover {
+    background: rgba(255, 255, 255, 0.2) !important;
+    border-color: rgba(255, 255, 255, 0.5) !important;
+    transform: translateY(-2px) !important;
+}
+/* Input Styles */
+.fancy-textbox textarea {
+    background: rgba(255, 255, 255, 0.05) !important;
+    border: 2px solid rgba(255, 255, 255, 0.1) !important;
+    border-radius: 15px !important;
+    color: white !important;
+    font-size: 1.1em !important;
+    padding: 1.5rem !important;
+    transition: all 0.3s ease !important;
+}
+.fancy-textbox textarea:focus {
+    border-color: #667eea !important;
+    box-shadow: 0 0 20px rgba(102, 126, 234, 0.3) !important;
+    background: rgba(255, 255, 255, 0.08) !important;
+}
+/* Slider Styles */
+.custom-slider .gr-slider {
+    background: rgba(255, 255, 255, 0.1) !important;
+    height: 8px !important;
+    border-radius: 10px !important;
+}
+.custom-slider .gr-slider::-webkit-slider-thumb {
+    background: var(--primary-gradient) !important;
+    border: none !important;
+    width: 24px !important;
+    height: 24px !important;
+    border-radius: 50% !important;
+    box-shadow: 0 4px 10px rgba(0,0,0,0.3) !important;
+}
+/* Audio Player Styles */
+.audio-container {
+    background: rgba(255, 255, 255, 0.05) !important;
+    border-radius: 20px !important;
+    padding: 2rem !important;
+    border: 2px solid rgba(255, 255, 255, 0.1) !important;
+}
+/* Stats Card */
+.stats-card {
+    background: rgba(255, 255, 255, 0.08) !important;
+    padding: 1.5rem !important;
+    border-radius: 15px !important;
+    text-align: center !important;
+    transition: transform 0.3s ease !important;
+}
+.stats-card:hover {
+    transform: scale(1.05) !important;
+}
+.stats-value {
+    font-size: 2.5em !important;
+    font-weight: 700 !important;
+    background: var(--primary-gradient) !important;
+    -webkit-background-clip: text !important;
+    -webkit-text-fill-color: transparent !important;
+    margin-bottom: 0.5rem !important;
+}
+.stats-label {
+    color: rgba(255, 255, 255, 0.7) !important;
+    font-size: 0.9em !important;
+    text-transform: uppercase !important;
+    letter-spacing: 1px !important;
+}
+/* Progress Bar */
+.progress-container {
+    margin: 2rem 0;
+}
+.progress-bar {
+    height: 8px;
+    background: rgba(255, 255, 255, 0.1);
+    border-radius: 10px;
+    overflow: hidden;
+    position: relative;
+}
+.progress-fill {
+    height: 100%;
+    background: var(--primary-gradient);
+    width: 0%;
+    border-radius: 10px;
+    transition: width 0.3s ease;
+    position: relative;
+}
+.progress-fill::after {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    bottom: 0;
+    background: linear-gradient(90deg, transparent, rgba(255,255,255,0.4), transparent);
+    animation: shimmer 2s infinite;
+}
+/* Tab Styles */
+.tab-nav {
+    background: rgba(255, 255, 255, 0.05) !important;
+    border-radius: 15px !important;
+    padding: 0.5rem !important;
+}
+.tab-nav button {
+    border-radius: 10px !important;
+    margin: 0 0.25rem !important;
+    transition: all 0.3s ease !important;
+}
+.tab-nav button.selected {
+    background: var(--primary-gradient) !important;
+}
+/* Notification */
+.notification {
+    position: fixed;
+    top: 20px;
+    right: 20px;
+    background: var(--primary-gradient);
+    color: white;
+    padding: 1rem 1.5rem;
+    border-radius: 10px;
+    box-shadow: 0 10px 30px rgba(0,0,0,0.3);
+    z-index: 1000;
+    animation: slideIn 0.3s ease;
+}
+@keyframes slideIn {
+    from { transform: translateX(100%); opacity: 0; }
+    to { transform: translateX(0); opacity: 1; }
+}
+"""
+# Initialize model and processor
+@gr.cache_resource
+def load_model():
+    print("🚀 Loading VibeVoice model...")
+    model = VibeVoiceStreamingForConditionalGenerationInference.from_pretrained(
+        "microsoft/VibeVoice-Realtime-0.5B",
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        device_map="auto"
+    )
+    processor = AutoProcessor.from_pretrained("microsoft/VibeVoice-Realtime-0.5B")
+    print("✅ Model loaded successfully!")
+    return model, processor
+model, processor = load_model()
+# Stats tracking
+class TTSStats:
+    def __init__(self):
+        self.total_generations = 0
+        self.total_chars = 0
+        self.start_time = time.time()
+    def add_generation(self, text):
+        self.total_generations += 1
+        self.total_chars += len(text)
+    def get_stats(self):
+        uptime = time.time() - self.start_time
+        hours, remainder = divmod(uptime, 3600)
+        minutes, seconds = divmod(remainder, 60)
+        return {
+            'total_generations': self.total_generations,
+            'total_chars': self.total_chars,
+            'avg_chars': self.total_chars / max(self.total_generations, 1),
+            'uptime': f"{int(hours)}h {int(minutes)}m {int(seconds)}s"
+        }
+stats = TTSStats()
+def create_waveform_visualization(audio_data, sr=16000):
+    """Create a beautiful waveform visualization"""
+    if audio_data is None:
+        return None
+    # Sample the audio data for visualization
+    samples = audio_data[::10]  # Downsample for performance
+    x = np.arange(len(samples)) / (sr / 10)
+    fig = go.Figure()
+    # Add waveform trace with gradient fill
+    fig.add_trace(go.Scatter(
+        x=x,
+        y=samples,
+        fill='tozeroy',
+        mode='lines',
+        line=dict(
+            color='#667eea',
+            width=2,
+            shape='spline'
+        ),
+        fillcolor='rgba(102, 126, 234, 0.3)',
+        name='Waveform'
+    ))
+    # Add envelope trace
+    envelope = np.abs(samples)
+    fig.add_trace(go.Scatter(
+        x=x,
+        y=envelope,
+        mode='lines',
+        line=dict(
+            color='#764ba2',
+            width=1,
+            dash='dash'
+        ),
+        name='Envelope'
+    ))
+    fig.update_layout(
+        title="🎵 Audio Waveform",
+        plot_bgcolor='rgba(255, 255, 255, 0.05)',
+        paper_bgcolor='rgba(0, 0, 0, 0)',
+        font=dict(color='white'),
+        xaxis=dict(
+            title="Time (s)",
+            gridcolor='rgba(255, 255, 255, 0.1)',
+            zerolinecolor='rgba(255, 255, 255, 0.2)'
+        ),
+        yaxis=dict(
+            title="Amplitude",
+            gridcolor='rgba(255, 255, 255, 0.1)',
+            zerolinecolor='rgba(255, 255, 255, 0.2)'
+        ),
+        showlegend=True,
+        legend=dict(
+            bgcolor='rgba(255, 255, 255, 0.1)',
+            bordercolor='rgba(255, 255, 255, 0.2)'
+        ),
+        margin=dict(l=50, r=50, t=50, b=50)
+    )
+    return fig
+def generate_speech(text, voice_style="neutral", speed=1.0, temperature=0.7):
+    """
+    Generate speech from text with enhanced parameters
+    """
+    try:
+        if not text or text.strip() == "":
+            return None, None, "Please enter some text to convert to speech."
+        # Update stats
+        stats.add_generation(text)
+        # Add voice style prompt
+        style_prompts = {
+            "neutral": "",
+            "excited": "with excited and energetic voice",
+            "calm": "with calm and soothing voice",
+            "professional": "with professional and clear voice",
+            "storytelling": "with engaging storytelling voice"
+        }
+        prompt = f"{text} {style_prompts.get(voice_style, '')}".strip()
+        # Process input
+        inputs = processor(
+            text=prompt,
+            return_tensors="pt",
+            sampling_rate=16000,
+        )
+        device = next(model.parameters()).device
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        # Generate with progress callback simulation
+        with torch.no_grad():
+            audio = model.generate(
+                **inputs,
+                temperature=temperature,
+                do_sample=True,
+                length_penalty=1.0,
+                repetition_penalty=2.0,
+            )
+        # Convert to numpy
+        audio_np = audio.cpu().numpy().squeeze()
+        # Apply speed adjustment
+        if speed != 1.0:
+            from scipy import signal
+            new_length = int(len(audio_np) / speed)
+            audio_np = signal.resample(audio_np, new_length)
+        # Normalize audio
+        max_val = np.max(np.abs(audio_np))
+        if max_val > 0:
+            audio_np = audio_np / max_val * 0.95
+        # Create temporary file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+            scipy.io.wavfile.write(tmp_file.name, 16000, audio_np.astype(np.float32))
+            # Create waveform visualization
+            waveform_fig = create_waveform_visualization(audio_np)
+            return tmp_file.name, waveform_fig, "✅ Speech generated successfully!"
+    except Exception as e:
+        print(f"Error: {e}")
+        return None, None, f"❌ Error: {str(e)}"
+def update_stats_display():
+    """Update the statistics display"""
+    stats_data = stats.get_stats()
+    return f"""
+    <div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 1rem;">
+        <div class="stats-card">
+            <div class="stats-value">{stats_data['total_generations']}</div>
+            <div class="stats-label">Total Generations</div>
+        </div>
+        <div class="stats-card">
+            <div class="stats-value">{stats_data['total_chars']}</div>
+            <div class="stats-label">Characters Processed</div>
+        </div>
+        <div class="stats-card">
+            <div class="stats-value">{stats_data['avg_chars']:.0f}</div>
+            <div class="stats-label">Avg. Characters</div>
+        </div>
+        <div class="stats-card">
+            <div class="stats-value">{stats_data['uptime']}</div>
+            <div class="stats-label">System Uptime</div>
+        </div>
+    </div>
+    """
+# Create the main interface
+with gr.Blocks(
+    title="🎵 VibeVoice Pro - AI Text to Speech",
+    theme=gr.themes.Soft(
+        primary_hue="violet",
+        secondary_hue="purple",
+        neutral_hue="slate"
+    ),
+    css=custom_css
+) as demo:
+    # Header Section
+    with gr.Column(elem_classes="header-container"):
+        gr.HTML("""
+        <div style="text-align: center;">
+            <h1 class="header-title">🎵 VibeVoice Pro</h1>
+            <p class="header-subtitle">Transform Text into Natural, Expressive Speech</p>
+            <div style="display: flex; justify-content: center; gap: 0.5rem; margin-top: 1rem;">
+                <span style="background: rgba(255,255,255,0.2); padding: 0.5rem 1rem; border-radius: 20px; font-size: 0.9em;">
+                    🤖 Powered by Microsoft VibeVoice
+                </span>
+                <span style="background: rgba(255,255,255,0.2); padding: 0.5rem 1rem; border-radius: 20px; font-size: 0.9em;">
+                    ⚡ Real-time Generation
+                </span>
+                <span style="background: rgba(255,255,255,0.2); padding: 0.5rem 1rem; border-radius: 20px; font-size: 0.9em;">
+                    🎭 Multiple Voice Styles
+                </span>
+            </div>
+        </div>
+        """)
+    # Main Content
+    with gr.Row():
+        # Left Panel - Input Controls
+        with gr.Column(scale=1, elem_classes="glass-card"):
+            gr.Markdown("### 📝 Text Input")
+            text_input = gr.Textbox(
+                label="",
+                placeholder="✨ Enter your text here... (Maximum 1000 characters)",
+                lines=6,
+                max_lines=10,
+                elem_classes="fancy-textbox",
+                scale=2
+            )
+            gr.Markdown("### 🎭 Voice Settings")
+            with gr.Row():
+                voice_style = gr.Dropdown(
+                    label="Voice Style",
+                    choices=["neutral", "excited", "calm", "professional", "storytelling"],
+                    value="neutral",
+                    info="Select the emotional tone of the voice"
+                )
+            with gr.Row():
+                speed = gr.Slider(
+                    minimum=0.5,
+                    maximum=2.0,
+                    value=1.0,
+                    step=0.1,
+                    label="🎚️ Speaking Speed",
+                    info="Adjust the speaking rate",
+                    elem_classes="custom-slider"
+                )
+                temperature = gr.Slider(
+                    minimum=0.1,
+                    maximum=1.5,
+                    value=0.7,
+                    step=0.1,
+                    label="🔥 Temperature",
+                    info="Control creativity vs consistency",
+                    elem_classes="custom-slider"
+                )
+            # Action Buttons
+            with gr.Row():
+                generate_btn = gr.Button(
+                    "✨ Generate Speech",
+                    variant="primary",
+                    size="lg",
+                    elem_classes="glow-button",
+                    scale=2
+                )
+                clear_btn = gr.Button(
+                    "🗑️ Clear All",
+                    variant="secondary",
+                    elem_classes="secondary-button"
+                )
+            # Quick Actions
+            gr.Markdown("### ⚡ Quick Actions")
+            with gr.Row():
+                quick_test = gr.Button("🎯 Test Voice", size="sm", elem_classes="secondary-button")
+                quick_clear = gr.Button("📄 Clear Text", size="sm", elem_classes="secondary-button")
+        # Right Panel - Output Display
+        with gr.Column(scale=1, elem_classes="glass-card"):
+            gr.Markdown("### 🎧 Generated Audio")
+            with gr.Column(elem_classes="audio-container"):
+                audio_output = gr.Audio(
+                    label="",
+                    type="filepath",
+                    elem_id="audio_output",
+                    scale=1
+                )
+                # Visualizer
+                waveform_plot = gr.Plot(
+                    label="📊 Audio Waveform",
+                    show_label=True
+                )
+                # Status and Info
+                status_display = gr.HTML(
+                    value="<div style='text-align: center; color: rgba(255,255,255,0.7);'>Ready to generate speech...</div>"
+                )
+            # Download and Share
+            with gr.Row():
+                download_btn = gr.Button("💾 Download Audio", elem_classes="secondary-button")
+                share_btn = gr.Button("🔗 Generate Share Link", elem_classes="secondary-button")
+    # Bottom Section - Stats and Examples
+    with gr.Column(elem_classes="glass-card"):
+        with gr.Tabs(elem_classes="tab-nav"):
+            with gr.TabItem("📈 Statistics"):
+                stats_display = gr.HTML(
+                    value=update_stats_display()
+                )
+                refresh_stats = gr.Button("🔄 Refresh Stats", size="sm")
+            with gr.TabItem("💡 Examples"):
+                gr.Examples(
+                    examples=[
+                        ["Welcome to the future of text-to-speech technology! This is VibeVoice Pro, creating natural and expressive voices."],
+                        ["In a world where AI transforms everything, voice synthesis stands at the forefront of innovation and creativity."],
+                        ["The quick brown fox jumps over the lazy dog. This classic sentence tests all English phonemes."],
+                        ["Imagine a world where every written word can be heard in the most beautiful, human-like voice possible."],
+                        ["This is not just text-to-speech. This is emotion, expression, and personality in every syllable."]
+                    ],
+                    inputs=text_input,
+                    label="Click any example to try it",
+                    examples_per_page=5
+                )
+            with gr.TabItem("⚙️ Settings"):
+                gr.Markdown("### Advanced Settings")
+                with gr.Row():
+                    auto_play = gr.Checkbox(label="Auto-play generated audio", value=True)
+                    show_waveform = gr.Checkbox(label="Show waveform visualization", value=True)
+                    save_history = gr.Checkbox(label="Save generation history", value=False)
+                gr.Markdown("### About")
+                gr.Markdown("""
+                **VibeVoice Pro** uses Microsoft's state-of-the-art VibeVoice model for high-quality speech synthesis.
+                - **Model**: VibeVoice-Realtime-0.5B
+                - **Max Input**: 1000 characters
+                - **Output Quality**: 16kHz, 32-bit float
+                - **Languages**: English (optimized)
+                ⚠️ **Note**: For best results, keep text under 500 characters.
+                """)
+    # Footer
+    gr.HTML("""
+    <div style="text-align: center; margin-top: 2rem; padding: 1.5rem; background: rgba(255,255,255,0.05); border-radius: 15px;">
+        <div style="display: flex; justify-content: center; gap: 2rem; margin-bottom: 1rem;">
+            <a href="#" style="color: rgba(255,255,255,0.7); text-decoration: none; transition: color 0.3s;">📖 Documentation</a>
+            <a href="#" style="color: rgba(255,255,255,0.7); text-decoration: none; transition: color 0.3s;">🐛 Report Issue</a>
+            <a href="#" style="color: rgba(255,255,255,0.7); text-decoration: none; transition: color 0.3s;">⭐ Star Project</a>
+            <a href="#" style="color: rgba(255,255,255,0.7); text-decoration: none; transition: color 0.3s;">🔄 API Access</a>
+        </div>
+        <p style="color: rgba(255,255,255,0.5); font-size: 0.9em;">
+            Made with ❤️ using Gradio & Transformers |
+            <span id="live-time" style="color: #667eea;"></span>
+        </p>
+    </div>
+    <script>
+        function updateTime() {
+            const now = new Date();
+            const timeString = now.toLocaleTimeString();
+            document.getElementById('live-time').textContent = timeString;
+        }
+        setInterval(updateTime, 1000);
+        updateTime();
+        // Add smooth scroll behavior
+        document.addEventListener('DOMContentLoaded', function() {
+            document.querySelectorAll('a[href^="#"]').forEach(anchor => {
+                anchor.addEventListener('click', function (e) {
+                    e.preventDefault();
+                    const target = document.querySelector(this.getAttribute('href'));
+                    if (target) {
+                        target.scrollIntoView({ behavior: 'smooth' });
+                    }
+                });
+            });
+        });
+    </script>
+    """)
+    # Event Handlers
+    def process_generation(text, voice_style, speed, temperature):
+        """Handle speech generation with visual feedback"""
+        if not text or text.strip() == "":
+            return None, None, "<div style='color: #ff6b6b; text-align: center;'>⚠️ Please enter some text first!</div>"
+        # Show processing message
+        yield None, None, "<div style='color: #667eea; text-align: center;'>⏳ Generating speech... Please wait.</div>"
+        # Generate speech
+        audio_path, waveform, status = generate_speech(text, voice_style, speed, temperature)
+        # Update stats display
+        stats_html = update_stats_display()
+        return audio_path, waveform, f"""
+        <div style="background: rgba(102, 126, 234, 0.1); padding: 1rem; border-radius: 10px; border-left: 4px solid #667eea;">
+            <div style="color: #667eea; font-weight: 600; margin-bottom: 0.5rem;">✅ Generation Complete!</div>
+            <div style="color: rgba(255,255,255,0.8);">
+                Generated {len(text)} characters | Voice: {voice_style.title()} | Speed: {speed}x
+            </div>
+        </div>
+        """
+    # Connect buttons
+    generate_btn.click(
+        fn=process_generation,
+        inputs=[text_input, voice_style, speed, temperature],
+        outputs=[audio_output, waveform_plot, status_display]
+    )
+    clear_btn.click(
+        fn=lambda: ["", None, None, 1.0, 0.7, "neutral", "<div style='color: rgba(255,255,255,0.7); text-align: center;'>Cleared. Ready for new input.</div>"],
+        inputs=[],
+        outputs=[text_input, audio_output, waveform_plot, speed, temperature, voice_style, status_display]
+    )
+    quick_test.click(
+        fn=lambda: "This is a test of the VibeVoice Pro text-to-speech system. How amazing is this?",
+        inputs=[],
+        outputs=[text_input]
+    )
+    quick_clear.click(
+        fn=lambda: "",
+        inputs=[],
+        outputs=[text_input]
+    )
+    refresh_stats.click(
+        fn=update_stats_display,
+        inputs=[],
+        outputs=[stats_display]
+    )
+    # Keyboard shortcuts
+    demo.load(
+        fn=lambda: gr.Info("💡 Tip: Press Ctrl+Enter to generate speech faster!"),
+        inputs=[],
+        outputs=[]
+    )
+if __name__ == "__main__":
+    demo.launch(
+        debug=True,
+        share=False,
+        server_name="0.0.0.0",
+        server_port=7860,
+        favicon_path=None
+    )