Buckets:
| import{s as It,o as Et,n as Gt}from"../chunks/scheduler.53228c21.js";import{S as Zt,i as Vt,e as r,s as o,c as f,h as Nt,a as l,d as n,b as s,f as A,g,j as d,k as S,l as a,m as i,n as h,t as _,o as y,p as v}from"../chunks/index.100fac89.js";import{C as Dt}from"../chunks/CopyLLMTxtMenu.af3e1493.js";import{D as oe}from"../chunks/Docstring.147b33f1.js";import{C as qt}from"../chunks/CodeBlock.0adb3827.js";import{E as zt}from"../chunks/ExampleCodeBlock.6be04f7a.js";import{H as Le,E as Lt}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.b5eefd91.js";function Wt(be){let m,P="Examples:",x,M,b;return M=new qt({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjBzb3VuZGZpbGUlMjBhcyUyMHNmJTBBZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEFjZVN0ZXBQaXBlbGluZSUwQSUwQXBpcGUlMjAlM0QlMjBBY2VTdGVwUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUyMkFDRS1TdGVwJTJGQWNlLVN0ZXAxLjUlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQXBpcGUlMjAlM0QlMjBwaXBlLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwVGV4dC10by1tdXNpYyUyMGdlbmVyYXRpb24lMjB3aXRoJTIwbWV0YWRhdGElMEFhdWRpbyUyMCUzRCUyMHBpcGUoJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEJTIyQSUyMGJlYXV0aWZ1bCUyMHBpYW5vJTIwcGllY2UlMjB3aXRoJTIwc29mdCUyMG1lbG9kaWVzJTIyJTJDJTBBJTIwJTIwJTIwJTIwbHlyaWNzJTNEJTIyJTVCdmVyc2UlNUQlNUNuU29mdCUyMG5vdGVzJTIwaW4lMjB0aGUlMjBtb3JuaW5nJTIwbGlnaHQlNUNuJTVCY2hvcnVzJTVEJTVDbk11c2ljJTIwZmlsbHMlMjB0aGUlMjBhaXIlMjB0b25pZ2h0JTIyJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fZHVyYXRpb24lM0QzMC4wJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDglMkMlMEElMjAlMjAlMjAlMjBicG0lM0QxMjAlMkMlMEElMjAlMjAlMjAlMjBrZXlzY2FsZSUzRCUyMkMlMjBtYWpvciUyMiUyQyUwQSUyMCUyMCUyMCUyMHRpbWVzaWduYXR1cmUlM0QlMjI0JTIyJTJDJTBBKS5hdWRpb3MlMEElMEElMjMlMjBTYXZlJTIwdGhlJTIwZ2VuZXJhdGVkJTIwYXVkaW8lMEFzZi53cml0ZSglMjJvdXRwdXQud2F2JTIyJTJDJTIwYXVkaW8lNUIwJTJDJTIwMCU1RC5jcHUoKS5udW1weSgpJTJDJTIwNDgwMDApJTBBJTBBJTIzJTIwUmVwYWludCUyMHRhc2slM0ElMjByZWdlbmVyYXRlJTIwYSUyMHNlY3Rpb24lMjBvZiUyMGV4aXN0aW5nJTIwc3RlcmVvJTIwNDhrSHolMjBhdWRpbyUwQXNyY19hdWRpbyUyQyUyMHNyJTIwJTNEJTIwc2YucmVhZCglMjJpbnB1dC53YXYlMjIpJTBBc3JjX2F1ZGlvJTIwJTNEJTIwdG9yY2guZnJvbV9udW1weShzcmNfYXVkaW8pLmZsb2F0KCkuVCUwQWF1ZGlvJTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBwcm9tcHQlM0QlMjJFcGljJTIwcm9jayUyMGd1aXRhciUyMHNvbG8lMjIlMkMlMEElMjAlMjAlMjAlMjBseXJpY3MlM0QlMjIlMjIlMkMlMEElMjAlMjAlMjAlMjB0YXNrX3R5cGUlM0QlMjJyZXBhaW50JTIyJTJDJTBBJTIwJTIwJTIwJTIwc3JjX2F1ZGlvJTNEc3JjX2F1ZGlvJTJDJTBBJTIwJTIwJTIwJTIwcmVwYWludGluZ19zdGFydCUzRDEwLjAlMkMlMEElMjAlMjAlMjAlMjByZXBhaW50aW5nX2VuZCUzRDIwLjAlMkMlMEEpLmF1ZGlvcyUwQSUwQSUyMyUyMENvdmVyJTIwdGFzayUyMHdpdGglMjByZWZlcmVuY2UlMjBhdWRpbyUyMGZvciUyMHRpbWJyZSUyMHRyYW5zZmVyJTBBcmVmX2F1ZGlvJTJDJTIwc3IlMjAlM0QlMjBzZi5yZWFkKCUyMnJlZmVyZW5jZS53YXYlMjIpJTBBcmVmX2F1ZGlvJTIwJTNEJTIwdG9yY2guZnJvbV9udW1weShyZWZfYXVkaW8pLmZsb2F0KCkuVCUwQWF1ZGlvJTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBwcm9tcHQlM0QlMjJQb3AlMjBzb25nJTIwd2l0aCUyMGJyaWdodCUyMHZvY2FscyUyMiUyQyUwQSUyMCUyMCUyMCUyMGx5cmljcyUzRCUyMiU1QnZlcnNlJTVEJTVDbkhlbGxvJTIwd29ybGQlMjIlMkMlMEElMjAlMjAlMjAlMjB0YXNrX3R5cGUlM0QlMjJjb3ZlciUyMiUyQyUwQSUyMCUyMCUyMCUyMHJlZmVyZW5jZV9hdWRpbyUzRHJlZl9hdWRpbyUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX2NvdmVyX3N0cmVuZ3RoJTNEMC44JTJDJTBBKS5hdWRpb3M=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> soundfile <span class="hljs-keyword">as</span> sf | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AceStepPipeline | |
| <span class="hljs-meta">>>> </span>pipe = AceStepPipeline.from_pretrained(<span class="hljs-string">"ACE-Step/Ace-Step1.5"</span>, torch_dtype=torch.bfloat16) | |
| <span class="hljs-meta">>>> </span>pipe = pipe.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Text-to-music generation with metadata</span> | |
| <span class="hljs-meta">>>> </span>audio = pipe( | |
| <span class="hljs-meta">... </span> prompt=<span class="hljs-string">"A beautiful piano piece with soft melodies"</span>, | |
| <span class="hljs-meta">... </span> lyrics=<span class="hljs-string">"[verse]\\nSoft notes in the morning light\\n[chorus]\\nMusic fills the air tonight"</span>, | |
| <span class="hljs-meta">... </span> audio_duration=<span class="hljs-number">30.0</span>, | |
| <span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">8</span>, | |
| <span class="hljs-meta">... </span> bpm=<span class="hljs-number">120</span>, | |
| <span class="hljs-meta">... </span> keyscale=<span class="hljs-string">"C major"</span>, | |
| <span class="hljs-meta">... </span> timesignature=<span class="hljs-string">"4"</span>, | |
| <span class="hljs-meta">... </span>).audios | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Save the generated audio</span> | |
| <span class="hljs-meta">>>> </span>sf.write(<span class="hljs-string">"output.wav"</span>, audio[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>].cpu().numpy(), <span class="hljs-number">48000</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Repaint task: regenerate a section of existing stereo 48kHz audio</span> | |
| <span class="hljs-meta">>>> </span>src_audio, sr = sf.read(<span class="hljs-string">"input.wav"</span>) | |
| <span class="hljs-meta">>>> </span>src_audio = torch.from_numpy(src_audio).<span class="hljs-built_in">float</span>().T | |
| <span class="hljs-meta">>>> </span>audio = pipe( | |
| <span class="hljs-meta">... </span> prompt=<span class="hljs-string">"Epic rock guitar solo"</span>, | |
| <span class="hljs-meta">... </span> lyrics=<span class="hljs-string">""</span>, | |
| <span class="hljs-meta">... </span> task_type=<span class="hljs-string">"repaint"</span>, | |
| <span class="hljs-meta">... </span> src_audio=src_audio, | |
| <span class="hljs-meta">... </span> repainting_start=<span class="hljs-number">10.0</span>, | |
| <span class="hljs-meta">... </span> repainting_end=<span class="hljs-number">20.0</span>, | |
| <span class="hljs-meta">... </span>).audios | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Cover task with reference audio for timbre transfer</span> | |
| <span class="hljs-meta">>>> </span>ref_audio, sr = sf.read(<span class="hljs-string">"reference.wav"</span>) | |
| <span class="hljs-meta">>>> </span>ref_audio = torch.from_numpy(ref_audio).<span class="hljs-built_in">float</span>().T | |
| <span class="hljs-meta">>>> </span>audio = pipe( | |
| <span class="hljs-meta">... </span> prompt=<span class="hljs-string">"Pop song with bright vocals"</span>, | |
| <span class="hljs-meta">... </span> lyrics=<span class="hljs-string">"[verse]\\nHello world"</span>, | |
| <span class="hljs-meta">... </span> task_type=<span class="hljs-string">"cover"</span>, | |
| <span class="hljs-meta">... </span> reference_audio=ref_audio, | |
| <span class="hljs-meta">... </span> audio_cover_strength=<span class="hljs-number">0.8</span>, | |
| <span class="hljs-meta">... </span>).audios`,lang:"py",wrap:!1}}),{c(){m=r("p"),m.textContent=P,x=o(),f(M.$$.fragment)},l(u){m=l(u,"P",{"data-svelte-h":!0}),d(m)!=="svelte-kvfsh7"&&(m.textContent=P),x=s(u),g(M.$$.fragment,u)},m(u,T){i(u,m,T),i(u,x,T),h(M,u,T),b=!0},p:Gt,i(u){b||(_(M.$$.fragment,u),b=!0)},o(u){y(M.$$.fragment,u),b=!1},d(u){u&&(n(m),n(x)),v(M,u)}}}function Ht(be){let m,P,x,M,b,u,T,Te,B,dt='ACE-Step 1.5 was introduced in <a href="https://arxiv.org/abs/2602.00744" rel="nofollow">ACE-Step 1.5: Pushing the Boundaries of Open-Source Music Generation</a> by the ACE-Step Team (ACE Studio and StepFun). It is an open-source music foundation model that generates commercial-grade stereo music with lyrics from text prompts.',xe,q,ut='ACE-Step 1.5 generates variable-length stereo audio at 48 kHz (10 seconds to 10 minutes) from text prompts and optional lyrics. The full system pairs a Language Model planner with a Diffusion Transformer (DiT) synthesizer; this pipeline wraps the DiT half of that stack, and consists of three components: an <a href="/docs/diffusers/pr_13751/en/api/models/autoencoder_oobleck#diffusers.AutoencoderOobleck">AutoencoderOobleck</a> VAE that compresses waveforms into 25 Hz stereo latents, a Qwen3-based text encoder for prompt and lyric conditioning, and an <a href="/docs/diffusers/pr_13751/en/api/models/ace_step_transformer#diffusers.AceStepTransformer1DModel">AceStepTransformer1DModel</a> DiT that operates in the VAE latent space using flow matching.',we,I,mt="The model supports 50+ languages for lyrics — including English, Chinese, Japanese, Korean, French, German, Spanish, Italian, Portuguese, and Russian — and runs on consumer GPUs (under 4 GB of VRAM when offloaded).",je,E,ft='This pipeline was contributed by the <a href="https://github.com/ace-step" rel="nofollow">ACE-Step Team</a>. The original codebase can be found at <a href="https://github.com/ace-step/ACE-Step-1.5" rel="nofollow">ace-step/ACE-Step-1.5</a>.',Je,G,Ae,Z,gt="ACE-Step 1.5 ships three DiT checkpoints that share the same transformer architecture but differ in guidance behavior; the pipeline auto-detects turbo checkpoints from the loaded transformer config and ignores CFG guidance for those guidance-distilled weights.",Se,V,ht='<thead><tr><th>Variant</th> <th align="center">CFG</th> <th align="center">Default steps</th> <th align="center">Default <code>guidance_scale</code></th> <th align="center">Default <code>shift</code></th> <th>HF repo</th></tr></thead> <tbody><tr><td><code>turbo</code> (guidance-distilled)</td> <td align="center">off</td> <td align="center">8</td> <td align="center">ignored</td> <td align="center">3.0</td> <td><a href="https://huggingface.co/ACE-Step/Ace-Step1.5" rel="nofollow"><code>ACE-Step/Ace-Step1.5</code></a></td></tr> <tr><td><code>base</code></td> <td align="center">on</td> <td align="center">8</td> <td align="center">7.0</td> <td align="center">3.0</td> <td><a href="https://huggingface.co/ACE-Step/acestep-v15-base" rel="nofollow"><code>ACE-Step/acestep-v15-base</code></a></td></tr> <tr><td><code>sft</code></td> <td align="center">on</td> <td align="center">8</td> <td align="center">7.0</td> <td align="center">3.0</td> <td><a href="https://huggingface.co/ACE-Step/acestep-v15-sft" rel="nofollow"><code>ACE-Step/acestep-v15-sft</code></a></td></tr></tbody>',ke,N,_t="Base and SFT use the learned <code>null_condition_emb</code> for classifier-free guidance (APG, not vanilla CFG). Users commonly override <code>num_inference_steps</code> to 30–60 on base/sft for higher quality.",Ce,D,Ue,z,yt="When constructing a prompt, keep in mind:",$e,L,vt="<li>Descriptive prompt inputs work best; use adjectives to describe the music style, instruments, mood, and tempo.</li> <li>The prompt should describe the overall musical characteristics (e.g., “upbeat pop song with electric guitar and drums”).</li> <li>Lyrics should be structured with tags like <code>[verse]</code>, <code>[chorus]</code>, <code>[bridge]</code>, etc.</li>",Pe,W,Mt="During inference:",Be,H,bt="<li><code>num_inference_steps</code>, <code>guidance_scale</code>, and <code>shift</code> default to the values shown above. For turbo checkpoints, <code>guidance_scale > 1.0</code> is ignored with a warning because guidance is distilled into the weights.</li> <li>The <code>audio_duration</code> parameter controls the length of the generated music in seconds.</li> <li>The <code>vocal_language</code> parameter should match the language of the lyrics.</li> <li><code>pipe.sample_rate</code> and <code>pipe.latents_per_second</code> are sourced from the VAE config (48000 Hz and 25 fps for the released checkpoints).</li> <li>For audio-to-audio tasks, pass <code>src_audio</code> and <code>reference_audio</code> as preprocessed stereo tensors at <code>pipe.sample_rate</code>.</li> <li><code>flash</code> and <code>flash_hub</code> use FlashAttention’s native sliding-window support for ACE-Step’s self-attention and expect unpadded text batches. If a batched prompt contains padding, use <code>flash_varlen</code> or <code>flash_varlen_hub</code> instead. Single-prompt inference with <code>padding="longest"</code> is normally unpadded.</li>",qe,X,Ie,Q,Ee,c,R,We,se,Tt="Pipeline for text-to-music generation using ACE-Step 1.5.",He,ae,xt=`This model inherits from <a href="/docs/diffusers/pr_13751/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods | |
| implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,Xe,ie,wt=`The pipeline uses flow matching with a custom timestep schedule for the diffusion process. The turbo model variant | |
| uses 8 inference steps by default.`,Qe,re,jt="Supported task types:",Re,le,Jt="<li><code>"text2music"</code>: Generate music from text prompts and lyrics.</li> <li><code>"cover"</code>: Generate audio from source audio / semantic codes with timbre transfer from reference audio.</li> <li><code>"repaint"</code>: Regenerate a section of existing audio while keeping the rest.</li> <li><code>"extract"</code>: Extract a specific track (e.g., vocals, drums) from audio.</li> <li><code>"lego"</code>: Generate a specific track based on audio context.</li> <li><code>"complete"</code>: Complete an input audio with additional tracks.</li>",Fe,w,F,Ye,ce,At="The call function to the pipeline for music generation.",Oe,k,Ke,C,Y,et,pe,St="Validate user-facing arguments before we start allocating noise tensors.",tt,j,O,nt,de,kt="Encode text prompts and lyrics into embeddings.",ot,ue,Ct=`Text prompts are encoded through the full text encoder model to produce contextual hidden states. Lyrics are | |
| only passed through the text encoder’s embedding layer (token lookup), since the lyric encoder in the condition | |
| encoder handles the contextual encoding.`,st,U,K,at,me,Ut="Prepare initial noise latents for the flow matching process.",it,J,ee,rt,fe,$t="Process reference audio into acoustic latents for the timbre encoder.",lt,ge,Pt=`The reference audio is repeated/cropped to 30 seconds (3 segments of 10 seconds each from front, middle, and | |
| back), encoded through the VAE, and then transposed for the timbre encoder.`,ct,$,te,pt,he,Bt="Prepare source latents for text-to-music and audio-to-audio tasks.",Ge,ne,Ze,Me,Ve;return b=new Dt({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),T=new Le({props:{title:"ACE-Step 1.5",local:"ace-step-15",headingTag:"h1"}}),G=new Le({props:{title:"Variants",local:"variants",headingTag:"h2"}}),D=new Le({props:{title:"Tips",local:"tips",headingTag:"h2"}}),X=new qt({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjBzb3VuZGZpbGUlMjBhcyUyMHNmJTBBZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEFjZVN0ZXBQaXBlbGluZSUwQSUwQXBpcGUlMjAlM0QlMjBBY2VTdGVwUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUyMkFDRS1TdGVwJTJGQWNlLVN0ZXAxLjUlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQXBpcGUlMjAlM0QlMjBwaXBlLnRvKCUyMmN1ZGElMjIpJTBBJTBBYXVkaW8lMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMHByb21wdCUzRCUyMkElMjBiZWF1dGlmdWwlMjBwaWFubyUyMHBpZWNlJTIwd2l0aCUyMHNvZnQlMjBtZWxvZGllcyUyMGFuZCUyMGdlbnRsZSUyMHJoeXRobSUyMiUyQyUwQSUyMCUyMCUyMCUyMGx5cmljcyUzRCUyMiU1QnZlcnNlJTVEJTVDblNvZnQlMjBub3RlcyUyMGluJTIwdGhlJTIwbW9ybmluZyUyMGxpZ2h0JTVDbkRhbmNpbmclMjB0aHJvdWdoJTIwdGhlJTIwYWlyJTIwc28lMjBicmlnaHQlNUNuJTVCY2hvcnVzJTVEJTVDbk11c2ljJTIwZmlsbHMlMjB0aGUlMjBhaXIlMjB0b25pZ2h0JTVDbkV2ZXJ5JTIwbm90ZSUyMGZlZWxzJTIwanVzdCUyMHJpZ2h0JTIyJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fZHVyYXRpb24lM0QzMC4wJTJDJTBBKS5hdWRpb3MlMEElMEFzZi53cml0ZSglMjJvdXRwdXQud2F2JTIyJTJDJTIwYXVkaW8lNUIwJTVELlQuY3B1KCkuZmxvYXQoKS5udW1weSgpJTJDJTIwcGlwZS5zYW1wbGVfcmF0ZSk=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> soundfile <span class="hljs-keyword">as</span> sf | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AceStepPipeline | |
| pipe = AceStepPipeline.from_pretrained(<span class="hljs-string">"ACE-Step/Ace-Step1.5"</span>, torch_dtype=torch.bfloat16) | |
| pipe = pipe.to(<span class="hljs-string">"cuda"</span>) | |
| audio = pipe( | |
| prompt=<span class="hljs-string">"A beautiful piano piece with soft melodies and gentle rhythm"</span>, | |
| lyrics=<span class="hljs-string">"[verse]\\nSoft notes in the morning light\\nDancing through the air so bright\\n[chorus]\\nMusic fills the air tonight\\nEvery note feels just right"</span>, | |
| audio_duration=<span class="hljs-number">30.0</span>, | |
| ).audios | |
| sf.write(<span class="hljs-string">"output.wav"</span>, audio[<span class="hljs-number">0</span>].T.cpu().<span class="hljs-built_in">float</span>().numpy(), pipe.sample_rate)`,lang:"python",wrap:!1}}),Q=new Le({props:{title:"AceStepPipeline",local:"diffusers.AceStepPipeline",headingTag:"h2"}}),R=new oe({props:{name:"class diffusers.AceStepPipeline",anchor:"diffusers.AceStepPipeline",parameters:[{name:"vae",val:": AutoencoderOobleck"},{name:"text_encoder",val:": PreTrainedModel"},{name:"tokenizer",val:": TokenizersBackend"},{name:"transformer",val:": AceStepTransformer1DModel"},{name:"condition_encoder",val:": AceStepConditionEncoder"},{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"audio_tokenizer",val:": typing.Optional[diffusers.pipelines.ace_step.modeling_ace_step.AceStepAudioTokenizer] = None"},{name:"audio_token_detokenizer",val:": typing.Optional[diffusers.pipelines.ace_step.modeling_ace_step.AceStepAudioTokenDetokenizer] = None"}],parametersDescription:[{anchor:"diffusers.AceStepPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_13751/en/api/models/autoencoder_oobleck#diffusers.AutoencoderOobleck">AutoencoderOobleck</a>) — | |
| Variational Auto-Encoder (VAE) model to encode and decode audio waveforms to and from latent | |
| representations.`,name:"vae"},{anchor:"diffusers.AceStepPipeline.text_encoder",description:`<strong>text_encoder</strong> (<a href="https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModel" rel="nofollow">AutoModel</a>) — | |
| Text encoder model (e.g., Qwen3-Embedding-0.6B) for encoding text prompts and lyrics.`,name:"text_encoder"},{anchor:"diffusers.AceStepPipeline.tokenizer",description:`<strong>tokenizer</strong> (<a href="https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoTokenizer" rel="nofollow">AutoTokenizer</a>) — | |
| Tokenizer for the text encoder.`,name:"tokenizer"},{anchor:"diffusers.AceStepPipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/pr_13751/en/api/models/ace_step_transformer#diffusers.AceStepTransformer1DModel">AceStepTransformer1DModel</a>) — | |
| The Diffusion Transformer (DiT) model for denoising audio latents.`,name:"transformer"},{anchor:"diffusers.AceStepPipeline.condition_encoder",description:`<strong>condition_encoder</strong> (<code>AceStepConditionEncoder</code>) — | |
| Condition encoder that combines text, lyric, and timbre embeddings for cross-attention.`,name:"condition_encoder"},{anchor:"diffusers.AceStepPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_13751/en/api/schedulers/flow_match_euler_discrete#diffusers.FlowMatchEulerDiscreteScheduler">FlowMatchEulerDiscreteScheduler</a>) — | |
| Flow-matching Euler scheduler. ACE-Step feeds the DiT timesteps in <code>[0, 1]</code>, so the scheduler is configured | |
| with <code>num_train_timesteps=1</code> and <code>shift=1.0</code> — the pipeline computes its shifted / turbo sigma schedule | |
| itself and passes it via <code>set_timesteps(sigmas=...)</code>.`,name:"scheduler"}],source:"https://github.com/huggingface/diffusers/blob/vr_13751/src/diffusers/pipelines/ace_step/pipeline_ace_step.py#L130"}}),F=new oe({props:{name:"__call__",anchor:"diffusers.AceStepPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"lyrics",val:": typing.Union[str, typing.List[str]] = ''"},{name:"audio_duration",val:": float = 60.0"},{name:"vocal_language",val:": typing.Union[str, typing.List[str]] = 'en'"},{name:"num_inference_steps",val:": int = 8"},{name:"guidance_scale",val:": float = 7.0"},{name:"shift",val:": float = 3.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'pt'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": typing.Optional[typing.Callable[[int, int, torch.Tensor], NoneType]] = None"},{name:"callback_steps",val:": typing.Optional[int] = 1"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[..., dict]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": typing.List[str] = ('latents',)"},{name:"instruction",val:": typing.Optional[str] = None"},{name:"max_text_length",val:": int = 256"},{name:"max_lyric_length",val:": int = 2048"},{name:"bpm",val:": typing.Optional[int] = None"},{name:"keyscale",val:": typing.Optional[str] = None"},{name:"timesignature",val:": typing.Optional[str] = None"},{name:"task_type",val:": str = 'text2music'"},{name:"track_name",val:": typing.Optional[str] = None"},{name:"complete_track_classes",val:": typing.Optional[typing.List[str]] = None"},{name:"src_audio",val:": typing.Optional[torch.Tensor] = None"},{name:"reference_audio",val:": typing.Optional[torch.Tensor] = None"},{name:"audio_codes",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"repainting_start",val:": typing.Optional[float] = None"},{name:"repainting_end",val:": typing.Optional[float] = None"},{name:"audio_cover_strength",val:": float = 1.0"},{name:"cfg_interval_start",val:": float = 0.0"},{name:"cfg_interval_end",val:": float = 1.0"},{name:"timesteps",val:": typing.Optional[typing.List[float]] = None"}],parametersDescription:[{anchor:"diffusers.AceStepPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide music generation. Describes the style, genre, instruments, etc.`,name:"prompt"},{anchor:"diffusers.AceStepPipeline.__call__.lyrics",description:`<strong>lyrics</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>, defaults to <code>""</code>) — | |
| The lyrics text for the music. Supports structured lyrics with tags like <code>[verse]</code>, <code>[chorus]</code>, etc.`,name:"lyrics"},{anchor:"diffusers.AceStepPipeline.__call__.audio_duration",description:`<strong>audio_duration</strong> (<code>float</code>, <em>optional</em>, defaults to 60.0) — | |
| Duration of the generated audio in seconds.`,name:"audio_duration"},{anchor:"diffusers.AceStepPipeline.__call__.vocal_language",description:`<strong>vocal_language</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>, defaults to <code>"en"</code>) — | |
| Language code for the lyrics (e.g., <code>"en"</code>, <code>"zh"</code>, <code>"ja"</code>).`,name:"vocal_language"},{anchor:"diffusers.AceStepPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 8) — | |
| The number of denoising steps. The turbo model is designed for 8 steps.`,name:"num_inference_steps"},{anchor:"diffusers.AceStepPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.0) — | |
| Guidance scale for classifier-free guidance. A value of 1.0 disables CFG.`,name:"guidance_scale"},{anchor:"diffusers.AceStepPipeline.__call__.shift",description:`<strong>shift</strong> (<code>float</code>, <em>optional</em>, defaults to 3.0) — | |
| Shift parameter for the timestep schedule (1.0, 2.0, or 3.0).`,name:"shift"},{anchor:"diffusers.AceStepPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A generator to make generation deterministic.`,name:"generator"},{anchor:"diffusers.AceStepPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noise latents of shape <code>(batch_size, latent_length, acoustic_dim)</code>.`,name:"latents"},{anchor:"diffusers.AceStepPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pt"</code>) — | |
| Output format. <code>"pt"</code> for PyTorch tensor, <code>"np"</code> for NumPy array, <code>"latent"</code> for raw latents.`,name:"output_type"},{anchor:"diffusers.AceStepPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to return an <code>AudioPipelineOutput</code> or a plain tuple.`,name:"return_dict"},{anchor:"diffusers.AceStepPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function called every <code>callback_steps</code> steps with <code>(step, timestep, latents)</code>.`,name:"callback"},{anchor:"diffusers.AceStepPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| Frequency of the callback function.`,name:"callback_steps"},{anchor:"diffusers.AceStepPipeline.__call__.instruction",description:`<strong>instruction</strong> (<code>str</code>, <em>optional</em>) — | |
| Custom instruction text for the generation task. If not provided, it is auto-generated based on | |
| <code>task_type</code>.`,name:"instruction"},{anchor:"diffusers.AceStepPipeline.__call__.max_text_length",description:`<strong>max_text_length</strong> (<code>int</code>, <em>optional</em>, defaults to 256) — | |
| Maximum token length for text prompt encoding.`,name:"max_text_length"},{anchor:"diffusers.AceStepPipeline.__call__.max_lyric_length",description:`<strong>max_lyric_length</strong> (<code>int</code>, <em>optional</em>, defaults to 2048) — | |
| Maximum token length for lyrics encoding.`,name:"max_lyric_length"},{anchor:"diffusers.AceStepPipeline.__call__.bpm",description:`<strong>bpm</strong> (<code>int</code>, <em>optional</em>) — | |
| BPM (beats per minute) for music metadata. If <code>None</code>, the model estimates it.`,name:"bpm"},{anchor:"diffusers.AceStepPipeline.__call__.keyscale",description:`<strong>keyscale</strong> (<code>str</code>, <em>optional</em>) — | |
| Musical key (e.g., <code>"C major"</code>, <code>"A minor"</code>). If <code>None</code>, the model estimates it.`,name:"keyscale"},{anchor:"diffusers.AceStepPipeline.__call__.timesignature",description:`<strong>timesignature</strong> (<code>str</code>, <em>optional</em>) — | |
| Time signature (e.g., <code>"4"</code> for 4/4, <code>"3"</code> for 3/4). If <code>None</code>, the model estimates it.`,name:"timesignature"},{anchor:"diffusers.AceStepPipeline.__call__.task_type",description:`<strong>task_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"text2music"</code>) — | |
| The generation task type. One of <code>"text2music"</code>, <code>"cover"</code>, <code>"repaint"</code>, <code>"extract"</code>, <code>"lego"</code>, | |
| <code>"complete"</code>.`,name:"task_type"},{anchor:"diffusers.AceStepPipeline.__call__.track_name",description:`<strong>track_name</strong> (<code>str</code>, <em>optional</em>) — | |
| Track name for <code>"extract"</code> or <code>"lego"</code> tasks (e.g., <code>"vocals"</code>, <code>"drums"</code>).`,name:"track_name"},{anchor:"diffusers.AceStepPipeline.__call__.complete_track_classes",description:`<strong>complete_track_classes</strong> (<code>List[str]</code>, <em>optional</em>) — | |
| Track classes for the <code>"complete"</code> task.`,name:"complete_track_classes"},{anchor:"diffusers.AceStepPipeline.__call__.src_audio",description:`<strong>src_audio</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Source audio tensor of shape <code>[channels, samples]</code> at 48kHz for audio-to-audio tasks (repaint, lego, | |
| cover, extract, complete). The audio is encoded through the VAE to produce source latents.`,name:"src_audio"},{anchor:"diffusers.AceStepPipeline.__call__.reference_audio",description:`<strong>reference_audio</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Reference audio tensor of shape <code>[channels, samples]</code> at 48kHz for timbre conditioning. Used to extract | |
| timbre features for style transfer.`,name:"reference_audio"},{anchor:"diffusers.AceStepPipeline.__call__.audio_codes",description:`<strong>audio_codes</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| Audio semantic code strings (e.g. <code>"<|audio_code_123|><|audio_code_456|>..."</code>). When provided, the task | |
| is automatically switched to <code>"cover"</code> mode and the registered ACE-Step audio tokenizer / detokenizer | |
| modules decode the 5 Hz codes into 25 Hz acoustic conditioning.`,name:"audio_codes"},{anchor:"diffusers.AceStepPipeline.__call__.repainting_start",description:`<strong>repainting_start</strong> (<code>float</code>, <em>optional</em>) — | |
| Start time in seconds for the repaint region (for <code>"repaint"</code> and <code>"lego"</code> tasks).`,name:"repainting_start"},{anchor:"diffusers.AceStepPipeline.__call__.repainting_end",description:`<strong>repainting_end</strong> (<code>float</code>, <em>optional</em>) — | |
| End time in seconds for the repaint region. Use <code>-1</code> or <code>None</code> for until end.`,name:"repainting_end"},{anchor:"diffusers.AceStepPipeline.__call__.audio_cover_strength",description:`<strong>audio_cover_strength</strong> (<code>float</code>, <em>optional</em>, defaults to 1.0) — | |
| Strength of audio cover blending (0.0 to 1.0). When < 1.0, blends cover-conditioned and | |
| text-only-conditioned outputs. Lower values produce more style transfer effect.`,name:"audio_cover_strength"},{anchor:"diffusers.AceStepPipeline.__call__.cfg_interval_start",description:`<strong>cfg_interval_start</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Start ratio (0.0-1.0) of the timestep range where CFG is applied.`,name:"cfg_interval_start"},{anchor:"diffusers.AceStepPipeline.__call__.cfg_interval_end",description:`<strong>cfg_interval_end</strong> (<code>float</code>, <em>optional</em>, defaults to 1.0) — | |
| End ratio (0.0-1.0) of the timestep range where CFG is applied.`,name:"cfg_interval_end"},{anchor:"diffusers.AceStepPipeline.__call__.timesteps",description:`<strong>timesteps</strong> (<code>List[float]</code>, <em>optional</em>) — | |
| Custom timestep schedule. If provided, overrides <code>num_inference_steps</code> and <code>shift</code>.`,name:"timesteps"}],source:"https://github.com/huggingface/diffusers/blob/vr_13751/src/diffusers/pipelines/ace_step/pipeline_ace_step.py#L777",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, an <code>AudioPipelineOutput</code> is returned, otherwise a tuple with the generated | |
| audio.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/diffusers/pr_13751/en/api/pipelines/audioldm2#diffusers.AudioPipelineOutput" | |
| >AudioPipelineOutput</a> or <code>tuple</code></p> | |
| `}}),k=new zt({props:{anchor:"diffusers.AceStepPipeline.__call__.example",$$slots:{default:[Wt]},$$scope:{ctx:be}}}),Y=new oe({props:{name:"check_inputs",anchor:"diffusers.AceStepPipeline.check_inputs",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"lyrics",val:": typing.Union[str, typing.List[str]]"},{name:"task_type",val:": str"},{name:"num_inference_steps",val:": int"},{name:"guidance_scale",val:": float"},{name:"shift",val:": float"},{name:"audio_cover_strength",val:": float"},{name:"cfg_interval_start",val:": float"},{name:"cfg_interval_end",val:": float"},{name:"repainting_start",val:": typing.Optional[float]"},{name:"repainting_end",val:": typing.Optional[float]"}],source:"https://github.com/huggingface/diffusers/blob/vr_13751/src/diffusers/pipelines/ace_step/pipeline_ace_step.py#L225"}}),O=new oe({props:{name:"encode_prompt",anchor:"diffusers.AceStepPipeline.encode_prompt",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"lyrics",val:": typing.Union[str, typing.List[str]]"},{name:"device",val:": device"},{name:"vocal_language",val:": typing.Union[str, typing.List[str]] = 'en'"},{name:"audio_duration",val:": float = 60.0"},{name:"instruction",val:": typing.Optional[str] = None"},{name:"bpm",val:": typing.Optional[int] = None"},{name:"keyscale",val:": typing.Optional[str] = None"},{name:"timesignature",val:": typing.Optional[str] = None"},{name:"max_text_length",val:": int = 256"},{name:"max_lyric_length",val:": int = 2048"}],parametersDescription:[{anchor:"diffusers.AceStepPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>) — | |
| Text caption(s) describing the music.`,name:"prompt"},{anchor:"diffusers.AceStepPipeline.encode_prompt.lyrics",description:`<strong>lyrics</strong> (<code>str</code> or <code>List[str]</code>) — | |
| Lyric text(s).`,name:"lyrics"},{anchor:"diffusers.AceStepPipeline.encode_prompt.device",description:`<strong>device</strong> (<code>torch.device</code>) — | |
| Device for tensors.`,name:"device"},{anchor:"diffusers.AceStepPipeline.encode_prompt.vocal_language",description:`<strong>vocal_language</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>, defaults to <code>"en"</code>) — | |
| Language code(s) for lyrics.`,name:"vocal_language"},{anchor:"diffusers.AceStepPipeline.encode_prompt.audio_duration",description:`<strong>audio_duration</strong> (<code>float</code>, <em>optional</em>, defaults to 60.0) — | |
| Duration of the audio in seconds.`,name:"audio_duration"},{anchor:"diffusers.AceStepPipeline.encode_prompt.instruction",description:`<strong>instruction</strong> (<code>str</code>, <em>optional</em>) — | |
| Instruction text for generation.`,name:"instruction"},{anchor:"diffusers.AceStepPipeline.encode_prompt.bpm",description:`<strong>bpm</strong> (<code>int</code>, <em>optional</em>) — | |
| BPM (beats per minute) for metadata.`,name:"bpm"},{anchor:"diffusers.AceStepPipeline.encode_prompt.keyscale",description:`<strong>keyscale</strong> (<code>str</code>, <em>optional</em>) — | |
| Musical key (e.g., <code>"C major"</code>).`,name:"keyscale"},{anchor:"diffusers.AceStepPipeline.encode_prompt.timesignature",description:`<strong>timesignature</strong> (<code>str</code>, <em>optional</em>) — | |
| Time signature (e.g., <code>"4"</code> for 4/4).`,name:"timesignature"},{anchor:"diffusers.AceStepPipeline.encode_prompt.max_text_length",description:`<strong>max_text_length</strong> (<code>int</code>, <em>optional</em>, defaults to 256) — | |
| Maximum token length for text prompts.`,name:"max_text_length"},{anchor:"diffusers.AceStepPipeline.encode_prompt.max_lyric_length",description:`<strong>max_lyric_length</strong> (<code>int</code>, <em>optional</em>, defaults to 2048) — | |
| Maximum token length for lyrics.`,name:"max_lyric_length"}],source:"https://github.com/huggingface/diffusers/blob/vr_13751/src/diffusers/pipelines/ace_step/pipeline_ace_step.py#L394",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Tuple of <code>(text_hidden_states, text_attention_mask, lyric_hidden_states, lyric_attention_mask)</code>.</p> | |
| `}}),K=new oe({props:{name:"prepare_latents",anchor:"diffusers.AceStepPipeline.prepare_latents",parameters:[{name:"batch_size",val:": int"},{name:"audio_duration",val:": float"},{name:"dtype",val:": dtype"},{name:"device",val:": device"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"}],parametersDescription:[{anchor:"diffusers.AceStepPipeline.prepare_latents.batch_size",description:"<strong>batch_size</strong> (<code>int</code>) — Number of samples to generate.",name:"batch_size"},{anchor:"diffusers.AceStepPipeline.prepare_latents.audio_duration",description:"<strong>audio_duration</strong> (<code>float</code>) — Duration of audio in seconds.",name:"audio_duration"},{anchor:"diffusers.AceStepPipeline.prepare_latents.dtype",description:"<strong>dtype</strong> (<code>torch.dtype</code>) — Data type for the latents.",name:"dtype"},{anchor:"diffusers.AceStepPipeline.prepare_latents.device",description:"<strong>device</strong> (<code>torch.device</code>) — Device for the latents.",name:"device"},{anchor:"diffusers.AceStepPipeline.prepare_latents.generator",description:"<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — Random number generator(s).",name:"generator"},{anchor:"diffusers.AceStepPipeline.prepare_latents.latents",description:"<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — Pre-generated latents.",name:"latents"}],source:"https://github.com/huggingface/diffusers/blob/vr_13751/src/diffusers/pipelines/ace_step/pipeline_ace_step.py#L499",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Noise latents of shape <code>(batch_size, latent_length, acoustic_dim)</code>.</p> | |
| `}}),ee=new oe({props:{name:"prepare_reference_audio_latents",anchor:"diffusers.AceStepPipeline.prepare_reference_audio_latents",parameters:[{name:"reference_audio",val:": Tensor"},{name:"batch_size",val:": int"},{name:"device",val:": device"},{name:"dtype",val:": dtype"}],parametersDescription:[{anchor:"diffusers.AceStepPipeline.prepare_reference_audio_latents.reference_audio",description:`<strong>reference_audio</strong> (<code>torch.Tensor</code>) — Reference audio tensor of shape <code>[channels, samples]</code> at | |
| <code>self.sample_rate</code>.`,name:"reference_audio"},{anchor:"diffusers.AceStepPipeline.prepare_reference_audio_latents.batch_size",description:"<strong>batch_size</strong> (<code>int</code>) — Batch size.",name:"batch_size"},{anchor:"diffusers.AceStepPipeline.prepare_reference_audio_latents.device",description:"<strong>device</strong> (<code>torch.device</code>) — Target device.",name:"device"},{anchor:"diffusers.AceStepPipeline.prepare_reference_audio_latents.dtype",description:"<strong>dtype</strong> (<code>torch.dtype</code>) — Target dtype.",name:"dtype"}],source:"https://github.com/huggingface/diffusers/blob/vr_13751/src/diffusers/pipelines/ace_step/pipeline_ace_step.py#L573",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Tuple of <code>(refer_audio_acoustic, refer_audio_order_mask)</code>.</p> | |
| `}}),te=new oe({props:{name:"prepare_src_latents",anchor:"diffusers.AceStepPipeline.prepare_src_latents",parameters:[{name:"device",val:": device"},{name:"dtype",val:": dtype"},{name:"batch_size",val:": int = 1"},{name:"src_audio",val:": typing.Optional[torch.Tensor] = None"},{name:"audio_codes",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"latent_length",val:": typing.Optional[int] = None"},{name:"task_type",val:": str = 'text2music'"}],parametersDescription:[{anchor:"diffusers.AceStepPipeline.prepare_src_latents.src_audio",description:`<strong>src_audio</strong> (<code>torch.Tensor</code>, <em>optional</em>) — Source audio tensor of shape <code>[channels, samples]</code> at | |
| <code>self.sample_rate</code>.`,name:"src_audio"},{anchor:"diffusers.AceStepPipeline.prepare_src_latents.audio_codes",description:"<strong>audio_codes</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — Audio semantic code strings.",name:"audio_codes"},{anchor:"diffusers.AceStepPipeline.prepare_src_latents.latent_length",description:"<strong>latent_length</strong> (<code>int</code>, <em>optional</em>) — Target latent length when no source audio or audio codes are given.",name:"latent_length"},{anchor:"diffusers.AceStepPipeline.prepare_src_latents.device",description:"<strong>device</strong> (<code>torch.device</code>) — Target device.",name:"device"},{anchor:"diffusers.AceStepPipeline.prepare_src_latents.dtype",description:"<strong>dtype</strong> (<code>torch.dtype</code>) — Target dtype.",name:"dtype"},{anchor:"diffusers.AceStepPipeline.prepare_src_latents.batch_size",description:"<strong>batch_size</strong> (<code>int</code>) — Batch size.",name:"batch_size"},{anchor:"diffusers.AceStepPipeline.prepare_src_latents.task_type",description:"<strong>task_type</strong> (<code>str</code>) — Current task type.",name:"task_type"}],source:"https://github.com/huggingface/diffusers/blob/vr_13751/src/diffusers/pipelines/ace_step/pipeline_ace_step.py#L626",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Tuple of <code>(src_latents, latent_length)</code> where <code>src_latents</code> has shape <code>[batch, T, D]</code>.</p> | |
| `}}),ne=new Lt({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/ace_step.md"}}),{c(){m=r("meta"),P=o(),x=r("p"),M=o(),f(b.$$.fragment),u=o(),f(T.$$.fragment),Te=o(),B=r("p"),B.innerHTML=dt,xe=o(),q=r("p"),q.innerHTML=ut,we=o(),I=r("p"),I.textContent=mt,je=o(),E=r("p"),E.innerHTML=ft,Je=o(),f(G.$$.fragment),Ae=o(),Z=r("p"),Z.textContent=gt,Se=o(),V=r("table"),V.innerHTML=ht,ke=o(),N=r("p"),N.innerHTML=_t,Ce=o(),f(D.$$.fragment),Ue=o(),z=r("p"),z.textContent=yt,$e=o(),L=r("ul"),L.innerHTML=vt,Pe=o(),W=r("p"),W.textContent=Mt,Be=o(),H=r("ul"),H.innerHTML=bt,qe=o(),f(X.$$.fragment),Ie=o(),f(Q.$$.fragment),Ee=o(),c=r("div"),f(R.$$.fragment),We=o(),se=r("p"),se.textContent=Tt,He=o(),ae=r("p"),ae.innerHTML=xt,Xe=o(),ie=r("p"),ie.textContent=wt,Qe=o(),re=r("p"),re.textContent=jt,Re=o(),le=r("ul"),le.innerHTML=Jt,Fe=o(),w=r("div"),f(F.$$.fragment),Ye=o(),ce=r("p"),ce.textContent=At,Oe=o(),f(k.$$.fragment),Ke=o(),C=r("div"),f(Y.$$.fragment),et=o(),pe=r("p"),pe.textContent=St,tt=o(),j=r("div"),f(O.$$.fragment),nt=o(),de=r("p"),de.textContent=kt,ot=o(),ue=r("p"),ue.textContent=Ct,st=o(),U=r("div"),f(K.$$.fragment),at=o(),me=r("p"),me.textContent=Ut,it=o(),J=r("div"),f(ee.$$.fragment),rt=o(),fe=r("p"),fe.textContent=$t,lt=o(),ge=r("p"),ge.textContent=Pt,ct=o(),$=r("div"),f(te.$$.fragment),pt=o(),he=r("p"),he.textContent=Bt,Ge=o(),f(ne.$$.fragment),Ze=o(),Me=r("p"),this.h()},l(e){const t=Nt("svelte-u9bgzb",document.head);m=l(t,"META",{name:!0,content:!0}),t.forEach(n),P=s(e),x=l(e,"P",{}),A(x).forEach(n),M=s(e),g(b.$$.fragment,e),u=s(e),g(T.$$.fragment,e),Te=s(e),B=l(e,"P",{"data-svelte-h":!0}),d(B)!=="svelte-1xmbfz5"&&(B.innerHTML=dt),xe=s(e),q=l(e,"P",{"data-svelte-h":!0}),d(q)!=="svelte-861w11"&&(q.innerHTML=ut),we=s(e),I=l(e,"P",{"data-svelte-h":!0}),d(I)!=="svelte-1jpmgzg"&&(I.textContent=mt),je=s(e),E=l(e,"P",{"data-svelte-h":!0}),d(E)!=="svelte-nix58r"&&(E.innerHTML=ft),Je=s(e),g(G.$$.fragment,e),Ae=s(e),Z=l(e,"P",{"data-svelte-h":!0}),d(Z)!=="svelte-1n2ep4u"&&(Z.textContent=gt),Se=s(e),V=l(e,"TABLE",{"data-svelte-h":!0}),d(V)!=="svelte-172gkcx"&&(V.innerHTML=ht),ke=s(e),N=l(e,"P",{"data-svelte-h":!0}),d(N)!=="svelte-14cgle6"&&(N.innerHTML=_t),Ce=s(e),g(D.$$.fragment,e),Ue=s(e),z=l(e,"P",{"data-svelte-h":!0}),d(z)!=="svelte-1dvtu0c"&&(z.textContent=yt),$e=s(e),L=l(e,"UL",{"data-svelte-h":!0}),d(L)!=="svelte-igfv1y"&&(L.innerHTML=vt),Pe=s(e),W=l(e,"P",{"data-svelte-h":!0}),d(W)!=="svelte-1g0t9wk"&&(W.textContent=Mt),Be=s(e),H=l(e,"UL",{"data-svelte-h":!0}),d(H)!=="svelte-zawy12"&&(H.innerHTML=bt),qe=s(e),g(X.$$.fragment,e),Ie=s(e),g(Q.$$.fragment,e),Ee=s(e),c=l(e,"DIV",{class:!0});var p=A(c);g(R.$$.fragment,p),We=s(p),se=l(p,"P",{"data-svelte-h":!0}),d(se)!=="svelte-19idt02"&&(se.textContent=Tt),He=s(p),ae=l(p,"P",{"data-svelte-h":!0}),d(ae)!=="svelte-d0rmi5"&&(ae.innerHTML=xt),Xe=s(p),ie=l(p,"P",{"data-svelte-h":!0}),d(ie)!=="svelte-xc6yjp"&&(ie.textContent=wt),Qe=s(p),re=l(p,"P",{"data-svelte-h":!0}),d(re)!=="svelte-1w6yaag"&&(re.textContent=jt),Re=s(p),le=l(p,"UL",{"data-svelte-h":!0}),d(le)!=="svelte-1piw6h1"&&(le.innerHTML=Jt),Fe=s(p),w=l(p,"DIV",{class:!0});var _e=A(w);g(F.$$.fragment,_e),Ye=s(_e),ce=l(_e,"P",{"data-svelte-h":!0}),d(ce)!=="svelte-1liw69f"&&(ce.textContent=At),Oe=s(_e),g(k.$$.fragment,_e),_e.forEach(n),Ke=s(p),C=l(p,"DIV",{class:!0});var Ne=A(C);g(Y.$$.fragment,Ne),et=s(Ne),pe=l(Ne,"P",{"data-svelte-h":!0}),d(pe)!=="svelte-1i5h6w3"&&(pe.textContent=St),Ne.forEach(n),tt=s(p),j=l(p,"DIV",{class:!0});var ye=A(j);g(O.$$.fragment,ye),nt=s(ye),de=l(ye,"P",{"data-svelte-h":!0}),d(de)!=="svelte-14s2s2t"&&(de.textContent=kt),ot=s(ye),ue=l(ye,"P",{"data-svelte-h":!0}),d(ue)!=="svelte-l7jx7"&&(ue.textContent=Ct),ye.forEach(n),st=s(p),U=l(p,"DIV",{class:!0});var De=A(U);g(K.$$.fragment,De),at=s(De),me=l(De,"P",{"data-svelte-h":!0}),d(me)!=="svelte-1kcgwsa"&&(me.textContent=Ut),De.forEach(n),it=s(p),J=l(p,"DIV",{class:!0});var ve=A(J);g(ee.$$.fragment,ve),rt=s(ve),fe=l(ve,"P",{"data-svelte-h":!0}),d(fe)!=="svelte-583sdx"&&(fe.textContent=$t),lt=s(ve),ge=l(ve,"P",{"data-svelte-h":!0}),d(ge)!=="svelte-17nvcr3"&&(ge.textContent=Pt),ve.forEach(n),ct=s(p),$=l(p,"DIV",{class:!0});var ze=A($);g(te.$$.fragment,ze),pt=s(ze),he=l(ze,"P",{"data-svelte-h":!0}),d(he)!=="svelte-wb5sxz"&&(he.textContent=Bt),ze.forEach(n),p.forEach(n),Ge=s(e),g(ne.$$.fragment,e),Ze=s(e),Me=l(e,"P",{}),A(Me).forEach(n),this.h()},h(){S(m,"name","hf:doc:metadata"),S(m,"content",Xt),S(w,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S(J,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S(c,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){a(document.head,m),i(e,P,t),i(e,x,t),i(e,M,t),h(b,e,t),i(e,u,t),h(T,e,t),i(e,Te,t),i(e,B,t),i(e,xe,t),i(e,q,t),i(e,we,t),i(e,I,t),i(e,je,t),i(e,E,t),i(e,Je,t),h(G,e,t),i(e,Ae,t),i(e,Z,t),i(e,Se,t),i(e,V,t),i(e,ke,t),i(e,N,t),i(e,Ce,t),h(D,e,t),i(e,Ue,t),i(e,z,t),i(e,$e,t),i(e,L,t),i(e,Pe,t),i(e,W,t),i(e,Be,t),i(e,H,t),i(e,qe,t),h(X,e,t),i(e,Ie,t),h(Q,e,t),i(e,Ee,t),i(e,c,t),h(R,c,null),a(c,We),a(c,se),a(c,He),a(c,ae),a(c,Xe),a(c,ie),a(c,Qe),a(c,re),a(c,Re),a(c,le),a(c,Fe),a(c,w),h(F,w,null),a(w,Ye),a(w,ce),a(w,Oe),h(k,w,null),a(c,Ke),a(c,C),h(Y,C,null),a(C,et),a(C,pe),a(c,tt),a(c,j),h(O,j,null),a(j,nt),a(j,de),a(j,ot),a(j,ue),a(c,st),a(c,U),h(K,U,null),a(U,at),a(U,me),a(c,it),a(c,J),h(ee,J,null),a(J,rt),a(J,fe),a(J,lt),a(J,ge),a(c,ct),a(c,$),h(te,$,null),a($,pt),a($,he),i(e,Ge,t),h(ne,e,t),i(e,Ze,t),i(e,Me,t),Ve=!0},p(e,[t]){const p={};t&2&&(p.$$scope={dirty:t,ctx:e}),k.$set(p)},i(e){Ve||(_(b.$$.fragment,e),_(T.$$.fragment,e),_(G.$$.fragment,e),_(D.$$.fragment,e),_(X.$$.fragment,e),_(Q.$$.fragment,e),_(R.$$.fragment,e),_(F.$$.fragment,e),_(k.$$.fragment,e),_(Y.$$.fragment,e),_(O.$$.fragment,e),_(K.$$.fragment,e),_(ee.$$.fragment,e),_(te.$$.fragment,e),_(ne.$$.fragment,e),Ve=!0)},o(e){y(b.$$.fragment,e),y(T.$$.fragment,e),y(G.$$.fragment,e),y(D.$$.fragment,e),y(X.$$.fragment,e),y(Q.$$.fragment,e),y(R.$$.fragment,e),y(F.$$.fragment,e),y(k.$$.fragment,e),y(Y.$$.fragment,e),y(O.$$.fragment,e),y(K.$$.fragment,e),y(ee.$$.fragment,e),y(te.$$.fragment,e),y(ne.$$.fragment,e),Ve=!1},d(e){e&&(n(P),n(x),n(M),n(u),n(Te),n(B),n(xe),n(q),n(we),n(I),n(je),n(E),n(Je),n(Ae),n(Z),n(Se),n(V),n(ke),n(N),n(Ce),n(Ue),n(z),n($e),n(L),n(Pe),n(W),n(Be),n(H),n(qe),n(Ie),n(Ee),n(c),n(Ge),n(Ze),n(Me)),n(m),v(b,e),v(T,e),v(G,e),v(D,e),v(X,e),v(Q,e),v(R),v(F),v(k),v(Y),v(O),v(K),v(ee),v(te),v(ne,e)}}}const Xt='{"title":"ACE-Step 1.5","local":"ace-step-15","sections":[{"title":"Variants","local":"variants","sections":[],"depth":2},{"title":"Tips","local":"tips","sections":[],"depth":2},{"title":"AceStepPipeline","local":"diffusers.AceStepPipeline","sections":[],"depth":2}],"depth":1}';function Qt(be){return Et(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class nn extends Zt{constructor(m){super(),Vt(this,m,Qt,Ht,It,{})}}export{nn as component}; | |
Xet Storage Details
- Size:
- 49.4 kB
- Xet hash:
- 1a63adddc96d34f14564cb01d3d6647cb407f1707470a271a87371dfe0f01032
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.