Buckets:

rtrm's picture
download
raw
12.9 kB
import{s as ne,n as re,o as se}from"../chunks/scheduler.182ea377.js";import{S as ae,i as ie,g as i,s as r,r as y,A as de,h as d,f as o,c as s,j as D,u as M,x as R,k as P,y as f,a as n,v as O,d as L,t as C,w as N}from"../chunks/index.abf12888.js";import{D as K}from"../chunks/Docstring.93f6f462.js";import{H as Q}from"../chunks/Heading.16916d63.js";function me(X){let m,A,k,E,p,q,u,Y="A Transformer model for video-like data.",H,h,I,a,_,W,v,ee="A Transformer model for video-like data.",Z,c,g,G,$,oe="The <code>TransformerTemporal</code> forward method.",U,T,F,l,b,J,w,te="The output of <code>TransformerTemporalModel</code>.",S,z,B;return p=new Q({props:{title:"Transformer Temporal",local:"transformer-temporal",headingTag:"h1"}}),h=new Q({props:{title:"TransformerTemporalModel",local:"diffusers.models.TransformerTemporalModel",headingTag:"h2"}}),_=new K({props:{name:"class diffusers.models.TransformerTemporalModel",anchor:"diffusers.models.TransformerTemporalModel",parameters:[{name:"num_attention_heads",val:": int = 16"},{name:"attention_head_dim",val:": int = 88"},{name:"in_channels",val:": typing.Optional[int] = None"},{name:"out_channels",val:": typing.Optional[int] = None"},{name:"num_layers",val:": int = 1"},{name:"dropout",val:": float = 0.0"},{name:"norm_num_groups",val:": int = 32"},{name:"cross_attention_dim",val:": typing.Optional[int] = None"},{name:"attention_bias",val:": bool = False"},{name:"sample_size",val:": typing.Optional[int] = None"},{name:"activation_fn",val:": str = 'geglu'"},{name:"norm_elementwise_affine",val:": bool = True"},{name:"double_self_attention",val:": bool = True"},{name:"positional_embeddings",val:": typing.Optional[str] = None"},{name:"num_positional_embeddings",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.models.TransformerTemporalModel.num_attention_heads",description:"<strong>num_attention_heads</strong> (<code>int</code>, <em>optional</em>, defaults to 16) &#x2014; The number of heads to use for multi-head attention.",name:"num_attention_heads"},{anchor:"diffusers.models.TransformerTemporalModel.attention_head_dim",description:"<strong>attention_head_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 88) &#x2014; The number of channels in each head.",name:"attention_head_dim"},{anchor:"diffusers.models.TransformerTemporalModel.in_channels",description:`<strong>in_channels</strong> (<code>int</code>, <em>optional</em>) &#x2014;
The number of channels in the input and output (specify if the input is <strong>continuous</strong>).`,name:"in_channels"},{anchor:"diffusers.models.TransformerTemporalModel.num_layers",description:"<strong>num_layers</strong> (<code>int</code>, <em>optional</em>, defaults to 1) &#x2014; The number of layers of Transformer blocks to use.",name:"num_layers"},{anchor:"diffusers.models.TransformerTemporalModel.dropout",description:"<strong>dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) &#x2014; The dropout probability to use.",name:"dropout"},{anchor:"diffusers.models.TransformerTemporalModel.cross_attention_dim",description:"<strong>cross_attention_dim</strong> (<code>int</code>, <em>optional</em>) &#x2014; The number of <code>encoder_hidden_states</code> dimensions to use.",name:"cross_attention_dim"},{anchor:"diffusers.models.TransformerTemporalModel.attention_bias",description:`<strong>attention_bias</strong> (<code>bool</code>, <em>optional</em>) &#x2014;
Configure if the <code>TransformerBlock</code> attention should contain a bias parameter.`,name:"attention_bias"},{anchor:"diffusers.models.TransformerTemporalModel.sample_size",description:`<strong>sample_size</strong> (<code>int</code>, <em>optional</em>) &#x2014; The width of the latent images (specify if the input is <strong>discrete</strong>).
This is fixed during training since it is used to learn a number of position embeddings.`,name:"sample_size"},{anchor:"diffusers.models.TransformerTemporalModel.activation_fn",description:`<strong>activation_fn</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;geglu&quot;</code>) &#x2014;
Activation function to use in feed-forward. See <code>diffusers.models.activations.get_activation</code> for supported
activation functions.`,name:"activation_fn"},{anchor:"diffusers.models.TransformerTemporalModel.norm_elementwise_affine",description:`<strong>norm_elementwise_affine</strong> (<code>bool</code>, <em>optional</em>) &#x2014;
Configure if the <code>TransformerBlock</code> should use learnable elementwise affine parameters for normalization.`,name:"norm_elementwise_affine"},{anchor:"diffusers.models.TransformerTemporalModel.double_self_attention",description:`<strong>double_self_attention</strong> (<code>bool</code>, <em>optional</em>) &#x2014;
Configure if each <code>TransformerBlock</code> should contain two self-attention layers.
positional_embeddings &#x2014; (<code>str</code>, <em>optional</em>):
The type of positional embeddings to apply to the sequence input before passing use.
num_positional_embeddings &#x2014; (<code>int</code>, <em>optional</em>):
The maximum length of the sequence over which to apply positional embeddings.`,name:"double_self_attention"}],source:"https://github.com/huggingface/diffusers/blob/v0.22.3/src/diffusers/models/transformer_temporal.py#L39"}}),g=new K({props:{name:"forward",anchor:"diffusers.models.TransformerTemporalModel.forward",parameters:[{name:"hidden_states",val:": FloatTensor"},{name:"encoder_hidden_states",val:": typing.Optional[torch.LongTensor] = None"},{name:"timestep",val:": typing.Optional[torch.LongTensor] = None"},{name:"class_labels",val:": LongTensor = None"},{name:"num_frames",val:": int = 1"},{name:"cross_attention_kwargs",val:": typing.Union[typing.Dict[str, typing.Any], NoneType] = None"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.models.TransformerTemporalModel.forward.hidden_states",description:`<strong>hidden_states</strong> (<code>torch.LongTensor</code> of shape <code>(batch size, num latent pixels)</code> if discrete, <code>torch.FloatTensor</code> of shape <code>(batch size, channel, height, width)</code> if continuous) &#x2014;
Input hidden_states.`,name:"hidden_states"},{anchor:"diffusers.models.TransformerTemporalModel.forward.encoder_hidden_states",description:`<strong>encoder_hidden_states</strong> ( <code>torch.LongTensor</code> of shape <code>(batch size, encoder_hidden_states dim)</code>, <em>optional</em>) &#x2014;
Conditional embeddings for cross attention layer. If not given, cross-attention defaults to
self-attention.`,name:"encoder_hidden_states"},{anchor:"diffusers.models.TransformerTemporalModel.forward.timestep",description:`<strong>timestep</strong> ( <code>torch.LongTensor</code>, <em>optional</em>) &#x2014;
Used to indicate denoising step. Optional timestep to be applied as an embedding in <code>AdaLayerNorm</code>.`,name:"timestep"},{anchor:"diffusers.models.TransformerTemporalModel.forward.class_labels",description:`<strong>class_labels</strong> ( <code>torch.LongTensor</code> of shape <code>(batch size, num classes)</code>, <em>optional</em>) &#x2014;
Used to indicate class labels conditioning. Optional class labels to be applied as an embedding in
<code>AdaLayerZeroNorm</code>.`,name:"class_labels"},{anchor:"diffusers.models.TransformerTemporalModel.forward.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, <em>optional</em>, defaults to 1) &#x2014;
The number of frames to be processed per batch. This is used to reshape the hidden states.`,name:"num_frames"},{anchor:"diffusers.models.TransformerTemporalModel.forward.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) &#x2014;
A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under
<code>self.processor</code> in
<a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.models.TransformerTemporalModel.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether or not to return a <a href="/docs/diffusers/v0.22.3/en/api/models/unet2d-cond#diffusers.models.unet_2d_condition.UNet2DConditionOutput">UNet2DConditionOutput</a> instead of a plain
tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/diffusers/blob/v0.22.3/src/diffusers/models/transformer_temporal.py#L119",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>If <code>return_dict</code> is True, an <a
href="/docs/diffusers/v0.22.3/en/api/models/transformer_temporal#diffusers.models.transformer_temporal.TransformerTemporalModelOutput"
>TransformerTemporalModelOutput</a> is
returned, otherwise a <code>tuple</code> where the first element is the sample tensor.</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><a
href="/docs/diffusers/v0.22.3/en/api/models/transformer_temporal#diffusers.models.transformer_temporal.TransformerTemporalModelOutput"
>TransformerTemporalModelOutput</a> or <code>tuple</code></p>
`}}),T=new Q({props:{title:"TransformerTemporalModelOutput",local:"diffusers.models.transformer_temporal.TransformerTemporalModelOutput",headingTag:"h2"}}),b=new K({props:{name:"class diffusers.models.transformer_temporal.TransformerTemporalModelOutput",anchor:"diffusers.models.transformer_temporal.TransformerTemporalModelOutput",parameters:[{name:"sample",val:": FloatTensor"}],parametersDescription:[{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModelOutput.sample",description:`<strong>sample</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size x num_frames, num_channels, height, width)</code>) &#x2014;
The hidden states output conditioned on <code>encoder_hidden_states</code> input.`,name:"sample"}],source:"https://github.com/huggingface/diffusers/blob/v0.22.3/src/diffusers/models/transformer_temporal.py#L27"}}),{c(){m=i("meta"),A=r(),k=i("p"),E=r(),y(p.$$.fragment),q=r(),u=i("p"),u.textContent=Y,H=r(),y(h.$$.fragment),I=r(),a=i("div"),y(_.$$.fragment),W=r(),v=i("p"),v.textContent=ee,Z=r(),c=i("div"),y(g.$$.fragment),G=r(),$=i("p"),$.innerHTML=oe,U=r(),y(T.$$.fragment),F=r(),l=i("div"),y(b.$$.fragment),J=r(),w=i("p"),w.innerHTML=te,S=r(),z=i("p"),this.h()},l(e){const t=de("svelte-u9bgzb",document.head);m=d(t,"META",{name:!0,content:!0}),t.forEach(o),A=s(e),k=d(e,"P",{}),D(k).forEach(o),E=s(e),M(p.$$.fragment,e),q=s(e),u=d(e,"P",{"data-svelte-h":!0}),R(u)!=="svelte-1ywwpi7"&&(u.textContent=Y),H=s(e),M(h.$$.fragment,e),I=s(e),a=d(e,"DIV",{class:!0});var x=D(a);M(_.$$.fragment,x),W=s(x),v=d(x,"P",{"data-svelte-h":!0}),R(v)!=="svelte-1ywwpi7"&&(v.textContent=ee),Z=s(x),c=d(x,"DIV",{class:!0});var V=D(c);M(g.$$.fragment,V),G=s(V),$=d(V,"P",{"data-svelte-h":!0}),R($)!=="svelte-14zjqkb"&&($.innerHTML=oe),V.forEach(o),x.forEach(o),U=s(e),M(T.$$.fragment,e),F=s(e),l=d(e,"DIV",{class:!0});var j=D(l);M(b.$$.fragment,j),J=s(j),w=d(j,"P",{"data-svelte-h":!0}),R(w)!=="svelte-1wdi1wk"&&(w.innerHTML=te),j.forEach(o),S=s(e),z=d(e,"P",{}),D(z).forEach(o),this.h()},h(){P(m,"name","hf:doc:metadata"),P(m,"content",le),P(c,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),P(a,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),P(l,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){f(document.head,m),n(e,A,t),n(e,k,t),n(e,E,t),O(p,e,t),n(e,q,t),n(e,u,t),n(e,H,t),O(h,e,t),n(e,I,t),n(e,a,t),O(_,a,null),f(a,W),f(a,v),f(a,Z),f(a,c),O(g,c,null),f(c,G),f(c,$),n(e,U,t),O(T,e,t),n(e,F,t),n(e,l,t),O(b,l,null),f(l,J),f(l,w),n(e,S,t),n(e,z,t),B=!0},p:re,i(e){B||(L(p.$$.fragment,e),L(h.$$.fragment,e),L(_.$$.fragment,e),L(g.$$.fragment,e),L(T.$$.fragment,e),L(b.$$.fragment,e),B=!0)},o(e){C(p.$$.fragment,e),C(h.$$.fragment,e),C(_.$$.fragment,e),C(g.$$.fragment,e),C(T.$$.fragment,e),C(b.$$.fragment,e),B=!1},d(e){e&&(o(A),o(k),o(E),o(q),o(u),o(H),o(I),o(a),o(U),o(F),o(l),o(S),o(z)),o(m),N(p,e),N(h,e),N(_),N(g),N(T,e),N(b)}}}const le='{"title":"Transformer Temporal","local":"transformer-temporal","sections":[{"title":"TransformerTemporalModel","local":"diffusers.models.TransformerTemporalModel","sections":[],"depth":2},{"title":"TransformerTemporalModelOutput","local":"diffusers.models.transformer_temporal.TransformerTemporalModelOutput","sections":[],"depth":2}],"depth":1}';function fe(X){return se(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class _e extends ae{constructor(m){super(),ie(this,m,fe,me,ne,{})}}export{_e as component};

Xet Storage Details

Size:
12.9 kB
·
Xet hash:
dbcaac8c96fd42d73ab1debe3b78b2ae03136788100570525d5eecd4b06b051c

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.