Spaces:

NeuralFalcon
/

anycoder-ee200fb6

Runtime error

App Files Files Community

anycoder-ee200fb6 / app.py

NeuralFalcon

Upload folder using huggingface_hub

329d2b4 verified about 2 months ago

raw

history blame contribute delete

7.15 kB

	import gradio as gr
	import time
	import os
	from utils import generate_dummy_audio, MOCK_LOGS

	# -----------------------------------------------------------------------------
	# Model Inference Wrapper
	# -----------------------------------------------------------------------------
	def run_vibevoice(
	text_prompt: str,
	reference_audio: str,
	speed: float,
	temperature: float
	):
	"""
	Wrapper function for VibeVoice inference.

	Args:
	text_prompt: The text to be spoken.
	reference_audio: Path to the reference audio file for style cloning.
	speed: Speaking rate.
	temperature: Sampling temperature (creativity/variance).
	"""

	# 1. Input Validation
	if not text_prompt:
	raise gr.Error("Please enter text to synthesize.")

	if not reference_audio:
	# VibeVoice usually requires a reference, but we can warn if missing
	gr.Warning("No reference audio provided. Using default voice style.")

	# 2. Progress Simulation (Replace this block with actual model inference)
	# ------------------------------------------------------------------
	# Actual implementation would look like:
	# model = load_vibevoice_model()
	# audio_array = model.inference(text_prompt, reference_audio, ...)
	# return (sample_rate, audio_array), "Generation Successful"
	# ------------------------------------------------------------------

	progress = gr.Progress()
	progress(0, desc="Initializing VibeVoice...")
	time.sleep(0.5)

	progress(0.3, desc="Analyzing Reference Audio Style...")
	time.sleep(0.8)

	progress(0.6, desc="Synthesizing Speech...")
	time.sleep(0.8)

	progress(0.9, desc="Finalizing Audio...")
	time.sleep(0.3)

	# Generate dummy audio for demonstration purposes
	output_audio_path = generate_dummy_audio(duration=3)

	log_message = (
	f"✅ Generation Complete\n"
	f"📝 Text length: {len(text_prompt)} chars\n"
	f"🎚️ Speed: {speed}x \| 🌡️ Temp: {temperature}\n"
	f"🎤 Reference: {os.path.basename(reference_audio) if reference_audio else 'None'}"
	)

	return output_audio_path, log_message

	# -----------------------------------------------------------------------------
	# Custom Theme Definition
	# -----------------------------------------------------------------------------
	# Creating a professional Microsoft-inspired blue theme
	custom_theme = gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="slate",
	neutral_hue="slate",
	font=gr.themes.GoogleFont("Segoe UI"),
	text_size="lg",
	radius_size="md"
	).set(
	button_primary_background_fill="*primary_600",
	button_primary_background_fill_hover="*primary_700",
	block_title_text_weight="600",
	block_shadow="*shadow_drop_lg"
	)

	# -----------------------------------------------------------------------------
	# Gradio 6 UI Layout
	# -----------------------------------------------------------------------------
	# Note: No parameters in gr.Blocks() for Gradio 6
	with gr.Blocks() as demo:

	# Header Section
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("# 🗣️ Microsoft VibeVoice")
	gr.Markdown("### Zero-shot Text-to-Speech with Emotion & Style Transfer")

	with gr.Row():
	gr.Markdown(
	"Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)",
	elem_classes=["header-link"]
	)

	# Main Content
	with gr.Row():

	# Left Column: Inputs
	with gr.Column(scale=1):
	with gr.Group():
	gr.Markdown("### 1. Input Text")
	input_text = gr.Textbox(
	label="Text to Speech",
	placeholder="Enter the text you want VibeVoice to speak...",
	lines=4,
	max_lines=8,
	value="The quick brown fox jumps over the lazy dog, demonstrating the amazing capabilities of modern voice synthesis."
	)

	with gr.Group():
	gr.Markdown("### 2. Voice Reference (The 'Vibe')")
	ref_audio = gr.Audio(
	label="Reference Audio",
	sources=["upload", "microphone"],
	type="filepath",
	editable=True
	)

	with gr.Accordion("⚙️ Advanced Settings", open=False):
	speed_slider = gr.Slider(
	minimum=0.5, maximum=2.0, value=1.0, step=0.1,
	label="Speaking Speed"
	)
	temp_slider = gr.Slider(
	minimum=0.1, maximum=1.0, value=0.7, step=0.1,
	label="Temperature (Variance)"
	)

	generate_btn = gr.Button("Generate Speech 🎵", variant="primary", size="lg")

	# Right Column: Outputs
	with gr.Column(scale=1):
	gr.Markdown("### 3. Generated Result")
	output_audio = gr.Audio(
	label="Synthesized Audio",
	interactive=False,
	autoplay=False
	)

	with gr.Group():
	gr.Markdown("#### Process Logs")
	logs = gr.Textbox(
	label="Status",
	value="Ready to generate.",
	lines=5,
	interactive=False,
	show_copy_button=True
	)

	# -------------------------------------------------------------------------
	# Event Listeners
	# -------------------------------------------------------------------------
	# Note: using api_visibility="public" (Gradio 6 standard)
	generate_btn.click(
	fn=run_vibevoice,
	inputs=[input_text, ref_audio, speed_slider, temp_slider],
	outputs=[output_audio, logs],
	api_visibility="public"
	)

	# Example inputs to help users get started
	gr.Examples(
	examples=[
	["Hello! This is a test of the VibeVoice system.", None, 1.0, 0.7],
	["Dramatic reading requires a specific cadence and tone.", None, 0.8, 0.9],
	],
	inputs=[input_text, ref_audio, speed_slider, temp_slider]
	)

	# -----------------------------------------------------------------------------
	# App Launch
	# -----------------------------------------------------------------------------
	# Note: All app-level configs go here in Gradio 6
	if __name__ == "__main__":
	demo.launch(
	theme=custom_theme,
	footer_links=[
	{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
	{"label": "VibeVoice Repo", "url": "https://github.com/microsoft/VibeVoice"}
	],
	css="""
	.header-link a {
	text-decoration: none;
	color: #666;
	font-size: 0.9em;
	font-weight: bold;
	}
	.header-link a:hover {
	color: #2563eb;
	text-decoration: underline;
	}
	"""
	)