Buckets:
| import{s as te,n as ne,o as oe}from"../chunks/scheduler.53228c21.js";import{S as re,i as se,e as d,s as r,c as y,h as ae,a as c,d as n,b as s,f as F,g as w,j as G,k as I,l,m as a,n as M,t as D,o as A,p as S}from"../chunks/index.100fac89.js";import{C as ie}from"../chunks/CopyLLMTxtMenu.8a938ed8.js";import{D as Y}from"../chunks/Docstring.d920a7a5.js";import{H as ee,E as de}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.afd26599.js";function ce(W){let i,E,C,L,p,k,u,P,h,Z='A 1D Diffusion Transformer for music generation from <a href="https://github.com/ace-step/ACE-Step-1.5" rel="nofollow">ACE-Step 1.5</a>. The model operates on the 25 Hz stereo latents produced by <a href="/docs/diffusers/pr_13743/en/api/models/autoencoder_oobleck#diffusers.AutoencoderOobleck">AutoencoderOobleck</a> using flow matching, and is trained with a Qwen3-derived backbone (grouped-query attention, rotary position embedding, RMSNorm, AdaLN-Zero timestep conditioning) plus cross-attention to the text / lyric / timbre conditions built by <code>AceStepConditionEncoder</code>.',H,_,O,o,g,U,$,J="Diffusion Transformer for ACE-Step 1.5 music generation.",V,T,K=`Generates audio latents conditioned on text, lyrics, and timbre. Uses 1D patch embedding (<code>Conv1d</code> with stride | |
| <code>patch_size</code>) followed by a stack of <code>AceStepTransformerBlock</code>s with alternating sliding-window / full attention on | |
| the self-attention branch. Cross-attention consumes the packed <code>encoder_hidden_states</code> produced by | |
| <code>AceStepConditionEncoder</code>.`,B,m,v,Q,x,X='The <a href="/docs/diffusers/pr_13743/en/api/models/ace_step_transformer#diffusers.AceStepTransformer1DModel">AceStepTransformer1DModel</a> forward method.',q,b,N,z,R;return p=new ie({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),u=new ee({props:{title:"AceStepTransformer1DModel",local:"acesteptransformer1dmodel",headingTag:"h1"}}),_=new ee({props:{title:"AceStepTransformer1DModel",local:"diffusers.AceStepTransformer1DModel",headingTag:"h2"}}),g=new Y({props:{name:"class diffusers.AceStepTransformer1DModel",anchor:"diffusers.AceStepTransformer1DModel",parameters:[{name:"hidden_size",val:": int = 2048"},{name:"intermediate_size",val:": int = 6144"},{name:"num_hidden_layers",val:": int = 24"},{name:"num_attention_heads",val:": int = 16"},{name:"num_key_value_heads",val:": int = 8"},{name:"head_dim",val:": int = 128"},{name:"in_channels",val:": int = 192"},{name:"audio_acoustic_hidden_dim",val:": int = 64"},{name:"patch_size",val:": int = 2"},{name:"rope_theta",val:": float = 1000000.0"},{name:"attention_bias",val:": bool = False"},{name:"attention_dropout",val:": float = 0.0"},{name:"rms_norm_eps",val:": float = 1e-06"},{name:"sliding_window",val:": int = 128"},{name:"layer_types",val:": typing.Optional[typing.List[str]] = None"},{name:"encoder_hidden_size",val:": typing.Optional[int] = None"},{name:"is_turbo",val:": bool = False"},{name:"model_version",val:": typing.Optional[str] = None"}],source:"https://github.com/huggingface/diffusers/blob/vr_13743/src/diffusers/models/transformers/ace_step_transformer.py#L431"}}),v=new Y({props:{name:"forward",anchor:"diffusers.AceStepTransformer1DModel.forward",parameters:[{name:"hidden_states",val:": Tensor"},{name:"timestep",val:": Tensor"},{name:"timestep_r",val:": Tensor"},{name:"encoder_hidden_states",val:": Tensor"},{name:"context_latents",val:": Tensor"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.AceStepTransformer1DModel.forward.hidden_states",description:`<strong>hidden_states</strong> (<code>torch.Tensor</code> of shape <code>(batch_size, seq_len, channels)</code>) — | |
| Noisy latent input for the diffusion process.`,name:"hidden_states"},{anchor:"diffusers.AceStepTransformer1DModel.forward.timestep",description:`<strong>timestep</strong> (<code>torch.Tensor</code> of shape <code>(batch_size,)</code>) — | |
| Current diffusion timestep <code>t</code>.`,name:"timestep"},{anchor:"diffusers.AceStepTransformer1DModel.forward.timestep_r",description:`<strong>timestep_r</strong> (<code>torch.Tensor</code> of shape <code>(batch_size,)</code>) — | |
| Reference timestep <code>r</code> (set equal to <code>t</code> for standard inference).`,name:"timestep_r"},{anchor:"diffusers.AceStepTransformer1DModel.forward.encoder_hidden_states",description:`<strong>encoder_hidden_states</strong> (<code>torch.Tensor</code> of shape <code>(batch_size, encoder_seq_len, hidden_size)</code>) — | |
| Conditioning embeddings from the condition encoder (text + lyrics + timbre).`,name:"encoder_hidden_states"},{anchor:"diffusers.AceStepTransformer1DModel.forward.context_latents",description:`<strong>context_latents</strong> (<code>torch.Tensor</code> of shape <code>(batch_size, seq_len, context_dim)</code>) — | |
| Context latents (source latents concatenated with chunk masks) — fed to the patchify conv alongside | |
| <code>hidden_states</code>.`,name:"context_latents"},{anchor:"diffusers.AceStepTransformer1DModel.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, defaults to <code>True</code>) — | |
| Whether to return a <code>Transformer2DModelOutput</code> or a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/diffusers/blob/vr_13743/src/diffusers/models/transformers/ace_step_transformer.py#L531",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The predicted velocity field.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>Transformer2DModelOutput</code> or <code>tuple</code></p> | |
| `}}),b=new de({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/models/ace_step_transformer.md"}}),{c(){i=d("meta"),E=r(),C=d("p"),L=r(),y(p.$$.fragment),k=r(),y(u.$$.fragment),P=r(),h=d("p"),h.innerHTML=Z,H=r(),y(_.$$.fragment),O=r(),o=d("div"),y(g.$$.fragment),U=r(),$=d("p"),$.textContent=J,V=r(),T=d("p"),T.innerHTML=K,B=r(),m=d("div"),y(v.$$.fragment),Q=r(),x=d("p"),x.innerHTML=X,q=r(),y(b.$$.fragment),N=r(),z=d("p"),this.h()},l(e){const t=ae("svelte-u9bgzb",document.head);i=c(t,"META",{name:!0,content:!0}),t.forEach(n),E=s(e),C=c(e,"P",{}),F(C).forEach(n),L=s(e),w(p.$$.fragment,e),k=s(e),w(u.$$.fragment,e),P=s(e),h=c(e,"P",{"data-svelte-h":!0}),G(h)!=="svelte-qizihh"&&(h.innerHTML=Z),H=s(e),w(_.$$.fragment,e),O=s(e),o=c(e,"DIV",{class:!0});var f=F(o);w(g.$$.fragment,f),U=s(f),$=c(f,"P",{"data-svelte-h":!0}),G($)!=="svelte-1yjf44w"&&($.textContent=J),V=s(f),T=c(f,"P",{"data-svelte-h":!0}),G(T)!=="svelte-13cg7vr"&&(T.innerHTML=K),B=s(f),m=c(f,"DIV",{class:!0});var j=F(m);w(v.$$.fragment,j),Q=s(j),x=c(j,"P",{"data-svelte-h":!0}),G(x)!=="svelte-1sgrb67"&&(x.innerHTML=X),j.forEach(n),f.forEach(n),q=s(e),w(b.$$.fragment,e),N=s(e),z=c(e,"P",{}),F(z).forEach(n),this.h()},h(){I(i,"name","hf:doc:metadata"),I(i,"content",le),I(m,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),I(o,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){l(document.head,i),a(e,E,t),a(e,C,t),a(e,L,t),M(p,e,t),a(e,k,t),M(u,e,t),a(e,P,t),a(e,h,t),a(e,H,t),M(_,e,t),a(e,O,t),a(e,o,t),M(g,o,null),l(o,U),l(o,$),l(o,V),l(o,T),l(o,B),l(o,m),M(v,m,null),l(m,Q),l(m,x),a(e,q,t),M(b,e,t),a(e,N,t),a(e,z,t),R=!0},p:ne,i(e){R||(D(p.$$.fragment,e),D(u.$$.fragment,e),D(_.$$.fragment,e),D(g.$$.fragment,e),D(v.$$.fragment,e),D(b.$$.fragment,e),R=!0)},o(e){A(p.$$.fragment,e),A(u.$$.fragment,e),A(_.$$.fragment,e),A(g.$$.fragment,e),A(v.$$.fragment,e),A(b.$$.fragment,e),R=!1},d(e){e&&(n(E),n(C),n(L),n(k),n(P),n(h),n(H),n(O),n(o),n(q),n(N),n(z)),n(i),S(p,e),S(u,e),S(_,e),S(g),S(v),S(b,e)}}}const le='{"title":"AceStepTransformer1DModel","local":"acesteptransformer1dmodel","sections":[{"title":"AceStepTransformer1DModel","local":"diffusers.AceStepTransformer1DModel","sections":[],"depth":2}],"depth":1}';function me(W){return oe(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ge extends re{constructor(i){super(),se(this,i,me,ce,te,{})}}export{ge as component}; | |
Xet Storage Details
- Size:
- 8.22 kB
- Xet hash:
- 6fd76d964581b5da92f5eb524373312683ea144f94bf81ffa3e89cabd1cab786
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.