Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import time | |
| import os | |
| from utils import generate_dummy_audio, MOCK_LOGS | |
| # ----------------------------------------------------------------------------- | |
| # Model Inference Wrapper | |
| # ----------------------------------------------------------------------------- | |
| def run_vibevoice( | |
| text_prompt: str, | |
| reference_audio: str, | |
| speed: float, | |
| temperature: float | |
| ): | |
| """ | |
| Wrapper function for VibeVoice inference. | |
| Args: | |
| text_prompt: The text to be spoken. | |
| reference_audio: Path to the reference audio file for style cloning. | |
| speed: Speaking rate. | |
| temperature: Sampling temperature (creativity/variance). | |
| """ | |
| # 1. Input Validation | |
| if not text_prompt: | |
| raise gr.Error("Please enter text to synthesize.") | |
| if not reference_audio: | |
| # VibeVoice usually requires a reference, but we can warn if missing | |
| gr.Warning("No reference audio provided. Using default voice style.") | |
| # 2. Progress Simulation (Replace this block with actual model inference) | |
| # ------------------------------------------------------------------ | |
| # Actual implementation would look like: | |
| # model = load_vibevoice_model() | |
| # audio_array = model.inference(text_prompt, reference_audio, ...) | |
| # return (sample_rate, audio_array), "Generation Successful" | |
| # ------------------------------------------------------------------ | |
| progress = gr.Progress() | |
| progress(0, desc="Initializing VibeVoice...") | |
| time.sleep(0.5) | |
| progress(0.3, desc="Analyzing Reference Audio Style...") | |
| time.sleep(0.8) | |
| progress(0.6, desc="Synthesizing Speech...") | |
| time.sleep(0.8) | |
| progress(0.9, desc="Finalizing Audio...") | |
| time.sleep(0.3) | |
| # Generate dummy audio for demonstration purposes | |
| output_audio_path = generate_dummy_audio(duration=3) | |
| log_message = ( | |
| f"β Generation Complete\n" | |
| f"π Text length: {len(text_prompt)} chars\n" | |
| f"ποΈ Speed: {speed}x | π‘οΈ Temp: {temperature}\n" | |
| f"π€ Reference: {os.path.basename(reference_audio) if reference_audio else 'None'}" | |
| ) | |
| return output_audio_path, log_message | |
| # ----------------------------------------------------------------------------- | |
| # Custom Theme Definition | |
| # ----------------------------------------------------------------------------- | |
| # Creating a professional Microsoft-inspired blue theme | |
| custom_theme = gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="slate", | |
| neutral_hue="slate", | |
| font=gr.themes.GoogleFont("Segoe UI"), | |
| text_size="lg", | |
| radius_size="md" | |
| ).set( | |
| button_primary_background_fill="*primary_600", | |
| button_primary_background_fill_hover="*primary_700", | |
| block_title_text_weight="600", | |
| block_shadow="*shadow_drop_lg" | |
| ) | |
| # ----------------------------------------------------------------------------- | |
| # Gradio 6 UI Layout | |
| # ----------------------------------------------------------------------------- | |
| # Note: No parameters in gr.Blocks() for Gradio 6 | |
| with gr.Blocks() as demo: | |
| # Header Section | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("# π£οΈ Microsoft VibeVoice") | |
| gr.Markdown("### Zero-shot Text-to-Speech with Emotion & Style Transfer") | |
| with gr.Row(): | |
| gr.Markdown( | |
| "Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)", | |
| elem_classes=["header-link"] | |
| ) | |
| # Main Content | |
| with gr.Row(): | |
| # Left Column: Inputs | |
| with gr.Column(scale=1): | |
| with gr.Group(): | |
| gr.Markdown("### 1. Input Text") | |
| input_text = gr.Textbox( | |
| label="Text to Speech", | |
| placeholder="Enter the text you want VibeVoice to speak...", | |
| lines=4, | |
| max_lines=8, | |
| value="The quick brown fox jumps over the lazy dog, demonstrating the amazing capabilities of modern voice synthesis." | |
| ) | |
| with gr.Group(): | |
| gr.Markdown("### 2. Voice Reference (The 'Vibe')") | |
| ref_audio = gr.Audio( | |
| label="Reference Audio", | |
| sources=["upload", "microphone"], | |
| type="filepath", | |
| editable=True | |
| ) | |
| with gr.Accordion("βοΈ Advanced Settings", open=False): | |
| speed_slider = gr.Slider( | |
| minimum=0.5, maximum=2.0, value=1.0, step=0.1, | |
| label="Speaking Speed" | |
| ) | |
| temp_slider = gr.Slider( | |
| minimum=0.1, maximum=1.0, value=0.7, step=0.1, | |
| label="Temperature (Variance)" | |
| ) | |
| generate_btn = gr.Button("Generate Speech π΅", variant="primary", size="lg") | |
| # Right Column: Outputs | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 3. Generated Result") | |
| output_audio = gr.Audio( | |
| label="Synthesized Audio", | |
| interactive=False, | |
| autoplay=False | |
| ) | |
| with gr.Group(): | |
| gr.Markdown("#### Process Logs") | |
| logs = gr.Textbox( | |
| label="Status", | |
| value="Ready to generate.", | |
| lines=5, | |
| interactive=False, | |
| show_copy_button=True | |
| ) | |
| # ------------------------------------------------------------------------- | |
| # Event Listeners | |
| # ------------------------------------------------------------------------- | |
| # Note: using api_visibility="public" (Gradio 6 standard) | |
| generate_btn.click( | |
| fn=run_vibevoice, | |
| inputs=[input_text, ref_audio, speed_slider, temp_slider], | |
| outputs=[output_audio, logs], | |
| api_visibility="public" | |
| ) | |
| # Example inputs to help users get started | |
| gr.Examples( | |
| examples=[ | |
| ["Hello! This is a test of the VibeVoice system.", None, 1.0, 0.7], | |
| ["Dramatic reading requires a specific cadence and tone.", None, 0.8, 0.9], | |
| ], | |
| inputs=[input_text, ref_audio, speed_slider, temp_slider] | |
| ) | |
| # ----------------------------------------------------------------------------- | |
| # App Launch | |
| # ----------------------------------------------------------------------------- | |
| # Note: All app-level configs go here in Gradio 6 | |
| if __name__ == "__main__": | |
| demo.launch( | |
| theme=custom_theme, | |
| footer_links=[ | |
| {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}, | |
| {"label": "VibeVoice Repo", "url": "https://github.com/microsoft/VibeVoice"} | |
| ], | |
| css=""" | |
| .header-link a { | |
| text-decoration: none; | |
| color: #666; | |
| font-size: 0.9em; | |
| font-weight: bold; | |
| } | |
| .header-link a:hover { | |
| color: #2563eb; | |
| text-decoration: underline; | |
| } | |
| """ | |
| ) |