Spaces:

timefractal
/

ACE-Step-Turbo-Music-Gen

Running

App Files Files Community

ACE-Step-Turbo-Music-Gen / app.py

$timefractal's picture$

timefractal

Fix: move pipe.to(cuda) inside @spaces.GPU for ZeroGPU compatibility

f0a4dff verified about 1 month ago

raw

history blame contribute delete

4.88 kB

	import spaces
	import gradio as gr
	import torch
	import numpy as np
	import tempfile
	import soundfile as sf
	from diffusers import AceStepPipeline

	# ─── Model Loading (CPU at module level for ZeroGPU) ─────────────────────
	MODEL_ID = "ACE-Step/acestep-v15-xl-turbo-diffusers"
	pipe = AceStepPipeline.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16)
	pipe.vae.enable_tiling()

	# ─── Inference ───────────────────────────────────────────────────────────

	@spaces.GPU(duration=180)
	def generate_music(prompt, lyrics, duration, seed, num_steps):
	"""Generate music from text prompt and optional lyrics."""
	if not prompt.strip():
	raise gr.Error("Please enter a music prompt!")

	# Move to GPU inside @spaces.GPU where CUDA is available
	pipe.to("cuda")

	generator = torch.Generator(device="cuda").manual_seed(int(seed))

	output = pipe(
	prompt=prompt,
	lyrics=lyrics if lyrics.strip() else None,
	audio_duration=float(duration),
	num_inference_steps=int(num_steps),
	generator=generator,
	)

	audio = output.audios[0] # (channels, samples) @ 48kHz

	# Convert to numpy and save as wav
	if isinstance(audio, torch.Tensor):
	audio = audio.cpu().numpy()

	# If stereo (2, samples), transpose for soundfile
	if audio.ndim == 2:
	audio = audio.T # (samples, channels)

	# Save to temp file
	tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
	sf.write(tmp.name, audio, samplerate=48000)

	return tmp.name

	# ─── UI ──────────────────────────────────────────────────────────────────

	DESCRIPTION = """
	# 🎵 ACE-Step Turbo — AI Music Generator

	Generate music from text descriptions and optional lyrics using ACE-Step v1.5 XL Turbo —
	a 5B-parameter flow-matching DiT model distilled for fast 8-step generation.

	### What's New
	This uses the brand-new diffusers-formatted Turbo model (released April 22, 2026) —
	guidance-distilled for speed without sacrificing quality. MIT licensed.
	"""

	EXAMPLES = [
	["An upbeat synthwave track with driving drums and a catchy lead synth melody", "[Verse]\nNeon lights are calling me\nRunning through the city free\n[Chorus]\nRide the wave tonight\nEverything feels right", 30, 42, 8],
	["A peaceful acoustic guitar piece with gentle fingerpicking, nature ambience", "", 20, 123, 8],
	["Heavy metal with distorted guitars, double bass drums, and aggressive vocals", "[Verse]\nFire in the sky\nWe will never die\n[Chorus]\nRise up, stand tall\nWe won't ever fall", 30, 456, 8],
	["Lo-fi hip hop beats to relax/study to, warm vinyl crackle, mellow piano", "", 30, 789, 8],
	["Epic orchestral film score with soaring strings, brass fanfare, and timpani", "", 30, 321, 8],
	["Jazz trio — upright bass, piano, and brushed drums, smoky nightclub vibe", "", 25, 654, 8],
	]

	with gr.Blocks(
	title="ACE-Step Turbo — AI Music Generator",
	theme=gr.themes.Soft(primary_hue="orange", secondary_hue="amber"),
	css="footer { display: none !important; }"
	) as demo:
	gr.Markdown(DESCRIPTION)

	with gr.Row():
	with gr.Column(scale=2):
	prompt = gr.Textbox(
	label="🎶 Music Description",
	placeholder="Describe the music style, instruments, mood, tempo...",
	lines=3,
	)
	lyrics = gr.Textbox(
	label="📝 Lyrics (Optional)",
	placeholder="[Verse]\nYour lyrics here...\n[Chorus]\nChorus lyrics...",
	lines=5,
	)
	with gr.Column(scale=1):
	duration = gr.Slider(5, 60, value=30, step=5, label="⏱️ Duration (seconds)")
	num_steps = gr.Slider(4, 16, value=8, step=1, label="🔄 Inference Steps")
	seed = gr.Number(value=42, label="🎲 Seed", precision=0)
	btn = gr.Button("🎵 Generate Music", variant="primary", size="lg")

	audio_output = gr.Audio(label="Generated Music", type="filepath")

	btn.click(
	fn=generate_music,
	inputs=[prompt, lyrics, duration, seed, num_steps],
	outputs=audio_output,
	)

	gr.Examples(
	examples=EXAMPLES,
	inputs=[prompt, lyrics, duration, seed, num_steps],
	outputs=audio_output,
	fn=generate_music,
	cache_examples=False,
	)

	gr.Markdown("""
	---
	Model: [ACE-Step v1.5 XL Turbo](https://huggingface.co/ACE-Step/acestep-v15-xl-turbo-diffusers) \| Architecture: 5B DiT, flow-matching, guidance-distilled \| License: MIT \| Audio: 48kHz stereo
	""")

	demo.launch()