Spaces:

NeuralFalcon
/

anycoder-ee200fb6

Runtime error

App Files Files Community

NeuralFalcon commited on Dec 5, 2025

Commit

329d2b4

verified ·

1 Parent(s): fa03508

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +200 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import gradio as gr
+import time
+import os
+from utils import generate_dummy_audio, MOCK_LOGS
+# -----------------------------------------------------------------------------
+# Model Inference Wrapper
+# -----------------------------------------------------------------------------
+def run_vibevoice(
+    text_prompt: str,
+    reference_audio: str,
+    speed: float,
+    temperature: float
+):
+    """
+    Wrapper function for VibeVoice inference.
+    Args:
+        text_prompt: The text to be spoken.
+        reference_audio: Path to the reference audio file for style cloning.
+        speed: Speaking rate.
+        temperature: Sampling temperature (creativity/variance).
+    """
+    # 1. Input Validation
+    if not text_prompt:
+        raise gr.Error("Please enter text to synthesize.")
+    if not reference_audio:
+        # VibeVoice usually requires a reference, but we can warn if missing
+        gr.Warning("No reference audio provided. Using default voice style.")
+    # 2. Progress Simulation (Replace this block with actual model inference)
+    # ------------------------------------------------------------------
+    # Actual implementation would look like:
+    # model = load_vibevoice_model()
+    # audio_array = model.inference(text_prompt, reference_audio, ...)
+    # return (sample_rate, audio_array), "Generation Successful"
+    # ------------------------------------------------------------------
+    progress = gr.Progress()
+    progress(0, desc="Initializing VibeVoice...")
+    time.sleep(0.5)
+    progress(0.3, desc="Analyzing Reference Audio Style...")
+    time.sleep(0.8)
+    progress(0.6, desc="Synthesizing Speech...")
+    time.sleep(0.8)
+    progress(0.9, desc="Finalizing Audio...")
+    time.sleep(0.3)
+    # Generate dummy audio for demonstration purposes
+    output_audio_path = generate_dummy_audio(duration=3)
+    log_message = (
+        f"✅ Generation Complete\n"
+        f"📝 Text length: {len(text_prompt)} chars\n"
+        f"🎚️ Speed: {speed}x | 🌡️ Temp: {temperature}\n"
+        f"🎤 Reference: {os.path.basename(reference_audio) if reference_audio else 'None'}"
+    )
+    return output_audio_path, log_message
+# -----------------------------------------------------------------------------
+# Custom Theme Definition
+# -----------------------------------------------------------------------------
+# Creating a professional Microsoft-inspired blue theme
+custom_theme = gr.themes.Soft(
+    primary_hue="blue",
+    secondary_hue="slate",
+    neutral_hue="slate",
+    font=gr.themes.GoogleFont("Segoe UI"),
+    text_size="lg",
+    radius_size="md"
+).set(
+    button_primary_background_fill="*primary_600",
+    button_primary_background_fill_hover="*primary_700",
+    block_title_text_weight="600",
+    block_shadow="*shadow_drop_lg"
+)
+# -----------------------------------------------------------------------------
+# Gradio 6 UI Layout
+# -----------------------------------------------------------------------------
+# Note: No parameters in gr.Blocks() for Gradio 6
+with gr.Blocks() as demo:
+    # Header Section
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("# 🗣️ Microsoft VibeVoice")
+            gr.Markdown("### Zero-shot Text-to-Speech with Emotion & Style Transfer")
+    with gr.Row():
+        gr.Markdown(
+            "Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)",
+            elem_classes=["header-link"]
+        )
+    # Main Content
+    with gr.Row():
+        # Left Column: Inputs
+        with gr.Column(scale=1):
+            with gr.Group():
+                gr.Markdown("### 1. Input Text")
+                input_text = gr.Textbox(
+                    label="Text to Speech",
+                    placeholder="Enter the text you want VibeVoice to speak...",
+                    lines=4,
+                    max_lines=8,
+                    value="The quick brown fox jumps over the lazy dog, demonstrating the amazing capabilities of modern voice synthesis."
+                )
+            with gr.Group():
+                gr.Markdown("### 2. Voice Reference (The 'Vibe')")
+                ref_audio = gr.Audio(
+                    label="Reference Audio",
+                    sources=["upload", "microphone"],
+                    type="filepath",
+                    editable=True
+                )
+            with gr.Accordion("⚙️ Advanced Settings", open=False):
+                speed_slider = gr.Slider(
+                    minimum=0.5, maximum=2.0, value=1.0, step=0.1,
+                    label="Speaking Speed"
+                )
+                temp_slider = gr.Slider(
+                    minimum=0.1, maximum=1.0, value=0.7, step=0.1,
+                    label="Temperature (Variance)"
+                )
+            generate_btn = gr.Button("Generate Speech 🎵", variant="primary", size="lg")
+        # Right Column: Outputs
+        with gr.Column(scale=1):
+            gr.Markdown("### 3. Generated Result")
+            output_audio = gr.Audio(
+                label="Synthesized Audio",
+                interactive=False,
+                autoplay=False
+            )
+            with gr.Group():
+                gr.Markdown("#### Process Logs")
+                logs = gr.Textbox(
+                    label="Status",
+                    value="Ready to generate.",
+                    lines=5,
+                    interactive=False,
+                    show_copy_button=True
+                )
+    # -------------------------------------------------------------------------
+    # Event Listeners
+    # -------------------------------------------------------------------------
+    # Note: using api_visibility="public" (Gradio 6 standard)
+    generate_btn.click(
+        fn=run_vibevoice,
+        inputs=[input_text, ref_audio, speed_slider, temp_slider],
+        outputs=[output_audio, logs],
+        api_visibility="public"
+    )
+    # Example inputs to help users get started
+    gr.Examples(
+        examples=[
+            ["Hello! This is a test of the VibeVoice system.", None, 1.0, 0.7],
+            ["Dramatic reading requires a specific cadence and tone.", None, 0.8, 0.9],
+        ],
+        inputs=[input_text, ref_audio, speed_slider, temp_slider]
+    )
+# -----------------------------------------------------------------------------
+# App Launch
+# -----------------------------------------------------------------------------
+# Note: All app-level configs go here in Gradio 6
+if __name__ == "__main__":
+    demo.launch(
+        theme=custom_theme,
+        footer_links=[
+            {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
+            {"label": "VibeVoice Repo", "url": "https://github.com/microsoft/VibeVoice"}
+        ],
+        css="""
+        .header-link a {
+            text-decoration: none;
+            color: #666;
+            font-size: 0.9em;
+            font-weight: bold;
+        }
+        .header-link a:hover {
+            color: #2563eb;
+            text-decoration: underline;
+        }
+        """
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio>=6.0
+requests
+Pillow
+numpy
+scipy
+soundfile
+librosa