Spaces:
Runtime error
Runtime error
File size: 7,149 Bytes
329d2b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
import gradio as gr
import time
import os
from utils import generate_dummy_audio, MOCK_LOGS
# -----------------------------------------------------------------------------
# Model Inference Wrapper
# -----------------------------------------------------------------------------
def run_vibevoice(
text_prompt: str,
reference_audio: str,
speed: float,
temperature: float
):
"""
Wrapper function for VibeVoice inference.
Args:
text_prompt: The text to be spoken.
reference_audio: Path to the reference audio file for style cloning.
speed: Speaking rate.
temperature: Sampling temperature (creativity/variance).
"""
# 1. Input Validation
if not text_prompt:
raise gr.Error("Please enter text to synthesize.")
if not reference_audio:
# VibeVoice usually requires a reference, but we can warn if missing
gr.Warning("No reference audio provided. Using default voice style.")
# 2. Progress Simulation (Replace this block with actual model inference)
# ------------------------------------------------------------------
# Actual implementation would look like:
# model = load_vibevoice_model()
# audio_array = model.inference(text_prompt, reference_audio, ...)
# return (sample_rate, audio_array), "Generation Successful"
# ------------------------------------------------------------------
progress = gr.Progress()
progress(0, desc="Initializing VibeVoice...")
time.sleep(0.5)
progress(0.3, desc="Analyzing Reference Audio Style...")
time.sleep(0.8)
progress(0.6, desc="Synthesizing Speech...")
time.sleep(0.8)
progress(0.9, desc="Finalizing Audio...")
time.sleep(0.3)
# Generate dummy audio for demonstration purposes
output_audio_path = generate_dummy_audio(duration=3)
log_message = (
f"✅ Generation Complete\n"
f"📝 Text length: {len(text_prompt)} chars\n"
f"🎚️ Speed: {speed}x | 🌡️ Temp: {temperature}\n"
f"🎤 Reference: {os.path.basename(reference_audio) if reference_audio else 'None'}"
)
return output_audio_path, log_message
# -----------------------------------------------------------------------------
# Custom Theme Definition
# -----------------------------------------------------------------------------
# Creating a professional Microsoft-inspired blue theme
custom_theme = gr.themes.Soft(
primary_hue="blue",
secondary_hue="slate",
neutral_hue="slate",
font=gr.themes.GoogleFont("Segoe UI"),
text_size="lg",
radius_size="md"
).set(
button_primary_background_fill="*primary_600",
button_primary_background_fill_hover="*primary_700",
block_title_text_weight="600",
block_shadow="*shadow_drop_lg"
)
# -----------------------------------------------------------------------------
# Gradio 6 UI Layout
# -----------------------------------------------------------------------------
# Note: No parameters in gr.Blocks() for Gradio 6
with gr.Blocks() as demo:
# Header Section
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("# 🗣️ Microsoft VibeVoice")
gr.Markdown("### Zero-shot Text-to-Speech with Emotion & Style Transfer")
with gr.Row():
gr.Markdown(
"Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)",
elem_classes=["header-link"]
)
# Main Content
with gr.Row():
# Left Column: Inputs
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("### 1. Input Text")
input_text = gr.Textbox(
label="Text to Speech",
placeholder="Enter the text you want VibeVoice to speak...",
lines=4,
max_lines=8,
value="The quick brown fox jumps over the lazy dog, demonstrating the amazing capabilities of modern voice synthesis."
)
with gr.Group():
gr.Markdown("### 2. Voice Reference (The 'Vibe')")
ref_audio = gr.Audio(
label="Reference Audio",
sources=["upload", "microphone"],
type="filepath",
editable=True
)
with gr.Accordion("⚙️ Advanced Settings", open=False):
speed_slider = gr.Slider(
minimum=0.5, maximum=2.0, value=1.0, step=0.1,
label="Speaking Speed"
)
temp_slider = gr.Slider(
minimum=0.1, maximum=1.0, value=0.7, step=0.1,
label="Temperature (Variance)"
)
generate_btn = gr.Button("Generate Speech 🎵", variant="primary", size="lg")
# Right Column: Outputs
with gr.Column(scale=1):
gr.Markdown("### 3. Generated Result")
output_audio = gr.Audio(
label="Synthesized Audio",
interactive=False,
autoplay=False
)
with gr.Group():
gr.Markdown("#### Process Logs")
logs = gr.Textbox(
label="Status",
value="Ready to generate.",
lines=5,
interactive=False,
show_copy_button=True
)
# -------------------------------------------------------------------------
# Event Listeners
# -------------------------------------------------------------------------
# Note: using api_visibility="public" (Gradio 6 standard)
generate_btn.click(
fn=run_vibevoice,
inputs=[input_text, ref_audio, speed_slider, temp_slider],
outputs=[output_audio, logs],
api_visibility="public"
)
# Example inputs to help users get started
gr.Examples(
examples=[
["Hello! This is a test of the VibeVoice system.", None, 1.0, 0.7],
["Dramatic reading requires a specific cadence and tone.", None, 0.8, 0.9],
],
inputs=[input_text, ref_audio, speed_slider, temp_slider]
)
# -----------------------------------------------------------------------------
# App Launch
# -----------------------------------------------------------------------------
# Note: All app-level configs go here in Gradio 6
if __name__ == "__main__":
demo.launch(
theme=custom_theme,
footer_links=[
{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
{"label": "VibeVoice Repo", "url": "https://github.com/microsoft/VibeVoice"}
],
css="""
.header-link a {
text-decoration: none;
color: #666;
font-size: 0.9em;
font-weight: bold;
}
.header-link a:hover {
color: #2563eb;
text-decoration: underline;
}
"""
) |