Buckets:
| import{s as se,n as ae,o as de}from"../chunks/scheduler.53228c21.js";import{S as ie,i as me,e as m,s as r,c as f,h as le,a as l,d as t,b as s,f as X,g as c,j as A,k as F,l as E,m as o,n as u,t as p,o as _,p as h}from"../chunks/index.100fac89.js";import{C as fe}from"../chunks/CopyLLMTxtMenu.8bfabc4c.js";import{D as re}from"../chunks/Docstring.058c954d.js";import{C as ce}from"../chunks/CodeBlock.0adb3827.js";import{H as B,E as ue}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.6e568675.js";function pe(K){let a,j,z,q,g,C,b,I,T,ee='A Diffusion Transformer model for 3D video-like data was introduced in <a href="https://github.com/Wan-Video/Wan2.1" rel="nofollow">Wan 2.1</a> by the Alibaba Wan Team.',Z,$,ne="The model can be loaded with the following code snippet.",N,M,V,v,L,d,D,Q,w,te="A Transformer model for video-like data used in the Wan model.",H,x,P,i,y,Y,k,oe='The output of <a href="/docs/diffusers/pr_13770/en/api/models/transformer2d#diffusers.Transformer2DModel">Transformer2DModel</a>.',R,W,G,J,O;return g=new fe({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),b=new B({props:{title:"WanTransformer3DModel",local:"wantransformer3dmodel",headingTag:"h1"}}),M=new ce({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMFdhblRyYW5zZm9ybWVyM0RNb2RlbCUwQSUwQXRyYW5zZm9ybWVyJTIwJTNEJTIwV2FuVHJhbnNmb3JtZXIzRE1vZGVsLmZyb21fcHJldHJhaW5lZCglMjJXYW4tQUklMkZXYW4yLjEtVDJWLTEuM0ItRGlmZnVzZXJzJTIyJTJDJTIwc3ViZm9sZGVyJTNEJTIydHJhbnNmb3JtZXIlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KQ==",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> WanTransformer3DModel | |
| transformer = WanTransformer3DModel.from_pretrained(<span class="hljs-string">"Wan-AI/Wan2.1-T2V-1.3B-Diffusers"</span>, subfolder=<span class="hljs-string">"transformer"</span>, torch_dtype=torch.bfloat16)`,lang:"python",wrap:!1}}),v=new B({props:{title:"WanTransformer3DModel",local:"diffusers.WanTransformer3DModel",headingTag:"h2"}}),D=new re({props:{name:"class diffusers.WanTransformer3DModel",anchor:"diffusers.WanTransformer3DModel",parameters:[{name:"patch_size",val:": tuple = (1, 2, 2)"},{name:"num_attention_heads",val:": int = 40"},{name:"attention_head_dim",val:": int = 128"},{name:"in_channels",val:": int = 16"},{name:"out_channels",val:": int = 16"},{name:"text_dim",val:": int = 4096"},{name:"freq_dim",val:": int = 256"},{name:"ffn_dim",val:": int = 13824"},{name:"num_layers",val:": int = 40"},{name:"cross_attn_norm",val:": bool = True"},{name:"qk_norm",val:": str | None = 'rms_norm_across_heads'"},{name:"eps",val:": float = 1e-06"},{name:"image_dim",val:": int | None = None"},{name:"added_kv_proj_dim",val:": int | None = None"},{name:"rope_max_seq_len",val:": int = 1024"},{name:"pos_embed_seq_len",val:": int | None = None"}],parametersDescription:[{anchor:"diffusers.WanTransformer3DModel.patch_size",description:`<strong>patch_size</strong> (<code>tuple[int]</code>, defaults to <code>(1, 2, 2)</code>) — | |
| 3D patch dimensions for video embedding (t_patch, h_patch, w_patch).`,name:"patch_size"},{anchor:"diffusers.WanTransformer3DModel.num_attention_heads",description:`<strong>num_attention_heads</strong> (<code>int</code>, defaults to <code>40</code>) — | |
| Fixed length for text embeddings.`,name:"num_attention_heads"},{anchor:"diffusers.WanTransformer3DModel.attention_head_dim",description:`<strong>attention_head_dim</strong> (<code>int</code>, defaults to <code>128</code>) — | |
| The number of channels in each head.`,name:"attention_head_dim"},{anchor:"diffusers.WanTransformer3DModel.in_channels",description:`<strong>in_channels</strong> (<code>int</code>, defaults to <code>16</code>) — | |
| The number of channels in the input.`,name:"in_channels"},{anchor:"diffusers.WanTransformer3DModel.out_channels",description:`<strong>out_channels</strong> (<code>int</code>, defaults to <code>16</code>) — | |
| The number of channels in the output.`,name:"out_channels"},{anchor:"diffusers.WanTransformer3DModel.text_dim",description:`<strong>text_dim</strong> (<code>int</code>, defaults to <code>512</code>) — | |
| Input dimension for text embeddings.`,name:"text_dim"},{anchor:"diffusers.WanTransformer3DModel.freq_dim",description:`<strong>freq_dim</strong> (<code>int</code>, defaults to <code>256</code>) — | |
| Dimension for sinusoidal time embeddings.`,name:"freq_dim"},{anchor:"diffusers.WanTransformer3DModel.ffn_dim",description:`<strong>ffn_dim</strong> (<code>int</code>, defaults to <code>13824</code>) — | |
| Intermediate dimension in feed-forward network.`,name:"ffn_dim"},{anchor:"diffusers.WanTransformer3DModel.num_layers",description:`<strong>num_layers</strong> (<code>int</code>, defaults to <code>40</code>) — | |
| The number of layers of transformer blocks to use.`,name:"num_layers"},{anchor:"diffusers.WanTransformer3DModel.window_size",description:`<strong>window_size</strong> (<code>tuple[int]</code>, defaults to <code>(-1, -1)</code>) — | |
| Window size for local attention (-1 indicates global attention).`,name:"window_size"},{anchor:"diffusers.WanTransformer3DModel.cross_attn_norm",description:`<strong>cross_attn_norm</strong> (<code>bool</code>, defaults to <code>True</code>) — | |
| Enable cross-attention normalization.`,name:"cross_attn_norm"},{anchor:"diffusers.WanTransformer3DModel.qk_norm",description:`<strong>qk_norm</strong> (<code>bool</code>, defaults to <code>True</code>) — | |
| Enable query/key normalization.`,name:"qk_norm"},{anchor:"diffusers.WanTransformer3DModel.eps",description:`<strong>eps</strong> (<code>float</code>, defaults to <code>1e-6</code>) — | |
| Epsilon value for normalization layers.`,name:"eps"},{anchor:"diffusers.WanTransformer3DModel.add_img_emb",description:`<strong>add_img_emb</strong> (<code>bool</code>, defaults to <code>False</code>) — | |
| Whether to use img_emb.`,name:"add_img_emb"},{anchor:"diffusers.WanTransformer3DModel.added_kv_proj_dim",description:`<strong>added_kv_proj_dim</strong> (<code>int</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| The number of channels to use for the added key and value projections. If <code>None</code>, no projection is used.`,name:"added_kv_proj_dim"}],source:"https://github.com/huggingface/diffusers/blob/vr_13770/src/diffusers/models/transformers/transformer_wan.py#L507"}}),x=new B({props:{title:"Transformer2DModelOutput",local:"diffusers.models.modeling_outputs.Transformer2DModelOutput",headingTag:"h2"}}),y=new re({props:{name:"class diffusers.models.modeling_outputs.Transformer2DModelOutput",anchor:"diffusers.models.modeling_outputs.Transformer2DModelOutput",parameters:[{name:"sample",val:": torch.Tensor"}],parametersDescription:[{anchor:"diffusers.models.modeling_outputs.Transformer2DModelOutput.sample",description:`<strong>sample</strong> (<code>torch.Tensor</code> of shape <code>(batch_size, num_channels, height, width)</code> or <code>(batch size, num_vector_embeds - 1, num_latent_pixels)</code> if <a href="/docs/diffusers/pr_13770/en/api/models/transformer2d#diffusers.Transformer2DModel">Transformer2DModel</a> is discrete) — | |
| The hidden states output conditioned on the <code>encoder_hidden_states</code> input. If discrete, returns probability | |
| distributions for the unnoised latent pixels.`,name:"sample"}],source:"https://github.com/huggingface/diffusers/blob/vr_13770/src/diffusers/models/modeling_outputs.py#L21"}}),W=new ue({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/models/wan_transformer_3d.md"}}),{c(){a=m("meta"),j=r(),z=m("p"),q=r(),f(g.$$.fragment),C=r(),f(b.$$.fragment),I=r(),T=m("p"),T.innerHTML=ee,Z=r(),$=m("p"),$.textContent=ne,N=r(),f(M.$$.fragment),V=r(),f(v.$$.fragment),L=r(),d=m("div"),f(D.$$.fragment),Q=r(),w=m("p"),w.textContent=te,H=r(),f(x.$$.fragment),P=r(),i=m("div"),f(y.$$.fragment),Y=r(),k=m("p"),k.innerHTML=oe,R=r(),f(W.$$.fragment),G=r(),J=m("p"),this.h()},l(e){const n=le("svelte-u9bgzb",document.head);a=l(n,"META",{name:!0,content:!0}),n.forEach(t),j=s(e),z=l(e,"P",{}),X(z).forEach(t),q=s(e),c(g.$$.fragment,e),C=s(e),c(b.$$.fragment,e),I=s(e),T=l(e,"P",{"data-svelte-h":!0}),A(T)!=="svelte-1h7uf3v"&&(T.innerHTML=ee),Z=s(e),$=l(e,"P",{"data-svelte-h":!0}),A($)!=="svelte-1vuni30"&&($.textContent=ne),N=s(e),c(M.$$.fragment,e),V=s(e),c(v.$$.fragment,e),L=s(e),d=l(e,"DIV",{class:!0});var U=X(d);c(D.$$.fragment,U),Q=s(U),w=l(U,"P",{"data-svelte-h":!0}),A(w)!=="svelte-1idrolf"&&(w.textContent=te),U.forEach(t),H=s(e),c(x.$$.fragment,e),P=s(e),i=l(e,"DIV",{class:!0});var S=X(i);c(y.$$.fragment,S),Y=s(S),k=l(S,"P",{"data-svelte-h":!0}),A(k)!=="svelte-11um37m"&&(k.innerHTML=oe),S.forEach(t),R=s(e),c(W.$$.fragment,e),G=s(e),J=l(e,"P",{}),X(J).forEach(t),this.h()},h(){F(a,"name","hf:doc:metadata"),F(a,"content",_e),F(d,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),F(i,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,n){E(document.head,a),o(e,j,n),o(e,z,n),o(e,q,n),u(g,e,n),o(e,C,n),u(b,e,n),o(e,I,n),o(e,T,n),o(e,Z,n),o(e,$,n),o(e,N,n),u(M,e,n),o(e,V,n),u(v,e,n),o(e,L,n),o(e,d,n),u(D,d,null),E(d,Q),E(d,w),o(e,H,n),u(x,e,n),o(e,P,n),o(e,i,n),u(y,i,null),E(i,Y),E(i,k),o(e,R,n),u(W,e,n),o(e,G,n),o(e,J,n),O=!0},p:ae,i(e){O||(p(g.$$.fragment,e),p(b.$$.fragment,e),p(M.$$.fragment,e),p(v.$$.fragment,e),p(D.$$.fragment,e),p(x.$$.fragment,e),p(y.$$.fragment,e),p(W.$$.fragment,e),O=!0)},o(e){_(g.$$.fragment,e),_(b.$$.fragment,e),_(M.$$.fragment,e),_(v.$$.fragment,e),_(D.$$.fragment,e),_(x.$$.fragment,e),_(y.$$.fragment,e),_(W.$$.fragment,e),O=!1},d(e){e&&(t(j),t(z),t(q),t(C),t(I),t(T),t(Z),t($),t(N),t(V),t(L),t(d),t(H),t(P),t(i),t(R),t(G),t(J)),t(a),h(g,e),h(b,e),h(M,e),h(v,e),h(D),h(x,e),h(y),h(W,e)}}}const _e='{"title":"WanTransformer3DModel","local":"wantransformer3dmodel","sections":[{"title":"WanTransformer3DModel","local":"diffusers.WanTransformer3DModel","sections":[],"depth":2},{"title":"Transformer2DModelOutput","local":"diffusers.models.modeling_outputs.Transformer2DModelOutput","sections":[],"depth":2}],"depth":1}';function he(K){return de(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class De extends ie{constructor(a){super(),me(this,a,he,pe,se,{})}}export{De as component}; | |
Xet Storage Details
- Size:
- 10.4 kB
- Xet hash:
- 91776e8cd67c2eb0c1567ac98be2aeeaa1101c2d1536a45c73205bfa624e8fbd
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.