| import spaces |
| import gradio as gr |
| import torch |
| import numpy as np |
| import tempfile |
| import soundfile as sf |
| from diffusers import AceStepPipeline |
|
|
| |
| MODEL_ID = "ACE-Step/acestep-v15-xl-turbo-diffusers" |
| pipe = AceStepPipeline.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16) |
| pipe.vae.enable_tiling() |
|
|
| |
|
|
| @spaces.GPU(duration=180) |
| def generate_music(prompt, lyrics, duration, seed, num_steps): |
| """Generate music from text prompt and optional lyrics.""" |
| if not prompt.strip(): |
| raise gr.Error("Please enter a music prompt!") |
|
|
| |
| pipe.to("cuda") |
|
|
| generator = torch.Generator(device="cuda").manual_seed(int(seed)) |
|
|
| output = pipe( |
| prompt=prompt, |
| lyrics=lyrics if lyrics.strip() else None, |
| audio_duration=float(duration), |
| num_inference_steps=int(num_steps), |
| generator=generator, |
| ) |
|
|
| audio = output.audios[0] |
|
|
| |
| if isinstance(audio, torch.Tensor): |
| audio = audio.cpu().numpy() |
|
|
| |
| if audio.ndim == 2: |
| audio = audio.T |
|
|
| |
| tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) |
| sf.write(tmp.name, audio, samplerate=48000) |
|
|
| return tmp.name |
|
|
| |
|
|
| DESCRIPTION = """ |
| # 🎵 ACE-Step Turbo — AI Music Generator |
| |
| Generate music from text descriptions and optional lyrics using **ACE-Step v1.5 XL Turbo** — |
| a 5B-parameter flow-matching DiT model distilled for fast 8-step generation. |
| |
| ### What's New |
| This uses the **brand-new diffusers-formatted Turbo model** (released April 22, 2026) — |
| guidance-distilled for speed without sacrificing quality. MIT licensed. |
| """ |
|
|
| EXAMPLES = [ |
| ["An upbeat synthwave track with driving drums and a catchy lead synth melody", "[Verse]\nNeon lights are calling me\nRunning through the city free\n[Chorus]\nRide the wave tonight\nEverything feels right", 30, 42, 8], |
| ["A peaceful acoustic guitar piece with gentle fingerpicking, nature ambience", "", 20, 123, 8], |
| ["Heavy metal with distorted guitars, double bass drums, and aggressive vocals", "[Verse]\nFire in the sky\nWe will never die\n[Chorus]\nRise up, stand tall\nWe won't ever fall", 30, 456, 8], |
| ["Lo-fi hip hop beats to relax/study to, warm vinyl crackle, mellow piano", "", 30, 789, 8], |
| ["Epic orchestral film score with soaring strings, brass fanfare, and timpani", "", 30, 321, 8], |
| ["Jazz trio — upright bass, piano, and brushed drums, smoky nightclub vibe", "", 25, 654, 8], |
| ] |
|
|
| with gr.Blocks( |
| title="ACE-Step Turbo — AI Music Generator", |
| theme=gr.themes.Soft(primary_hue="orange", secondary_hue="amber"), |
| css="footer { display: none !important; }" |
| ) as demo: |
| gr.Markdown(DESCRIPTION) |
|
|
| with gr.Row(): |
| with gr.Column(scale=2): |
| prompt = gr.Textbox( |
| label="🎶 Music Description", |
| placeholder="Describe the music style, instruments, mood, tempo...", |
| lines=3, |
| ) |
| lyrics = gr.Textbox( |
| label="📝 Lyrics (Optional)", |
| placeholder="[Verse]\nYour lyrics here...\n[Chorus]\nChorus lyrics...", |
| lines=5, |
| ) |
| with gr.Column(scale=1): |
| duration = gr.Slider(5, 60, value=30, step=5, label="⏱️ Duration (seconds)") |
| num_steps = gr.Slider(4, 16, value=8, step=1, label="🔄 Inference Steps") |
| seed = gr.Number(value=42, label="🎲 Seed", precision=0) |
| btn = gr.Button("🎵 Generate Music", variant="primary", size="lg") |
|
|
| audio_output = gr.Audio(label="Generated Music", type="filepath") |
|
|
| btn.click( |
| fn=generate_music, |
| inputs=[prompt, lyrics, duration, seed, num_steps], |
| outputs=audio_output, |
| ) |
|
|
| gr.Examples( |
| examples=EXAMPLES, |
| inputs=[prompt, lyrics, duration, seed, num_steps], |
| outputs=audio_output, |
| fn=generate_music, |
| cache_examples=False, |
| ) |
|
|
| gr.Markdown(""" |
| --- |
| **Model:** [ACE-Step v1.5 XL Turbo](https://huggingface.co/ACE-Step/acestep-v15-xl-turbo-diffusers) | **Architecture:** 5B DiT, flow-matching, guidance-distilled | **License:** MIT | **Audio:** 48kHz stereo |
| """) |
|
|
| demo.launch() |
|
|