Buckets:
| import{S as yx,i as xx,s as wx,e as n,k as i,w as f,t as l,$ as $x,M as Mx,c as s,d as t,m as d,a as r,x as u,h as c,a0 as Dx,b as a,G as e,g as p,y as h,q as _,o as g,B as b,v as Tx,L as sl}from"../../chunks/vendor-hf-doc-builder.js";import{T as bx}from"../../chunks/Tip-hf-doc-builder.js";import{D as y}from"../../chunks/Docstring-hf-doc-builder.js";import{C as rl}from"../../chunks/CodeBlock-hf-doc-builder.js";import{I as k}from"../../chunks/IconCopyLink-hf-doc-builder.js";import{E as nl}from"../../chunks/ExampleCodeBlock-hf-doc-builder.js";function kx(B){let x,T,M,w,D;return w=new rl({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVU5ldDJEQ29uZGl0aW9uTW9kZWwlMEFmcm9tJTIweGZvcm1lcnMub3BzJTIwaW1wb3J0JTIwTWVtb3J5RWZmaWNpZW50QXR0ZW50aW9uRmxhc2hBdHRlbnRpb25PcCUwQSUwQW1vZGVsJTIwJTNEJTIwVU5ldDJEQ29uZGl0aW9uTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnN0YWJpbGl0eWFpJTJGc3RhYmxlLWRpZmZ1c2lvbi0yLTElMjIlMkMlMjBzdWJmb2xkZXIlM0QlMjJ1bmV0JTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTBBKSUwQW1vZGVsJTIwJTNEJTIwbW9kZWwudG8oJTIyY3VkYSUyMiklMEFtb2RlbC5lbmFibGVfeGZvcm1lcnNfbWVtb3J5X2VmZmljaWVudF9hdHRlbnRpb24oYXR0ZW50aW9uX29wJTNETWVtb3J5RWZmaWNpZW50QXR0ZW50aW9uRmxhc2hBdHRlbnRpb25PcCk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> UNet2DConditionModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> xformers.ops <span class="hljs-keyword">import</span> MemoryEfficientAttentionFlashAttentionOp | |
| <span class="hljs-meta">>>> </span>model = UNet2DConditionModel.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"stabilityai/stable-diffusion-2-1"</span>, subfolder=<span class="hljs-string">"unet"</span>, torch_dtype=torch.float16 | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>model = model.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-meta">>>> </span>model.enable_xformers_memory_efficient_attention(attention_op=MemoryEfficientAttentionFlashAttentionOp)`}}),{c(){x=n("p"),T=l("Examples:"),M=i(),f(w.$$.fragment)},l(v){x=s(v,"P",{});var $=r(x);T=c($,"Examples:"),$.forEach(t),M=d(v),u(w.$$.fragment,v)},m(v,$){p(v,x,$),e(x,T),p(v,M,$),h(w,v,$),D=!0},p:sl,i(v){D||(_(w.$$.fragment,v),D=!0)},o(v){g(w.$$.fragment,v),D=!1},d(v){v&&t(x),v&&t(M),b(w,v)}}}function Ax(B){let x,T,M,w,D,v,$,se;return{c(){x=n("p"),T=l("It is required to be logged in ("),M=n("code"),w=l("huggingface-cli login"),D=l(") when you want to use private or "),v=n("a"),$=l(`gated | |
| models`),se=l("."),this.h()},l(Ue){x=s(Ue,"P",{});var Z=r(x);T=c(Z,"It is required to be logged in ("),M=s(Z,"CODE",{});var Y=r(M);w=c(Y,"huggingface-cli login"),Y.forEach(t),D=c(Z,") when you want to use private or "),v=s(Z,"A",{href:!0,rel:!0});var yr=r(v);$=c(yr,`gated | |
| models`),yr.forEach(t),se=c(Z,"."),Z.forEach(t),this.h()},h(){a(v,"href","https://huggingface.co/docs/hub/models-gated#gated-models"),a(v,"rel","nofollow")},m(Ue,Z){p(Ue,x,Z),e(x,T),e(x,M),e(M,w),e(x,D),e(x,v),e(v,$),e(x,se)},d(Ue){Ue&&t(x)}}}function Nx(B){let x,T,M,w,D;return{c(){x=n("p"),T=l("Activate the special "),M=n("a"),w=l("\u201Coffline-mode\u201D"),D=l(` to use | |
| this method in a firewalled environment.`),this.h()},l(v){x=s(v,"P",{});var $=r(x);T=c($,"Activate the special "),M=s($,"A",{href:!0,rel:!0});var se=r(M);w=c(se,"\u201Coffline-mode\u201D"),se.forEach(t),D=c($,` to use | |
| this method in a firewalled environment.`),$.forEach(t),this.h()},h(){a(M,"href","https://huggingface.co/diffusers/installation.html#offline-mode"),a(M,"rel","nofollow")},m(v,$){p(v,x,$),e(x,T),e(x,M),e(M,w),e(x,D)},d(v){v&&t(x)}}}function Ux(B){let x,T,M,w,D;return w=new rl({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEZsYXhVTmV0MkRDb25kaXRpb25Nb2RlbCUwQSUwQSUyMyUyMERvd25sb2FkJTIwbW9kZWwlMjBhbmQlMjBjb25maWd1cmF0aW9uJTIwZnJvbSUyMGh1Z2dpbmdmYWNlLmNvJTIwYW5kJTIwY2FjaGUuJTBBbW9kZWwlMkMlMjBwYXJhbXMlMjAlM0QlMjBGbGF4VU5ldDJEQ29uZGl0aW9uTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyKSUwQSUyMyUyME1vZGVsJTIwd2FzJTIwc2F2ZWQlMjB1c2luZyUyMCpzYXZlX3ByZXRyYWluZWQoJy4lMkZ0ZXN0JTJGc2F2ZWRfbW9kZWwlMkYnKSolMjAoZm9yJTIwZXhhbXBsZSUyMHB1cnBvc2VzJTJDJTIwbm90JTIwcnVubmFibGUpLiUwQW1vZGVsJTJDJTIwcGFyYW1zJTIwJTNEJTIwRmxheFVOZXQyRENvbmRpdGlvbk1vZGVsLmZyb21fcHJldHJhaW5lZCglMjIuJTJGdGVzdCUyRnNhdmVkX21vZGVsJTJGJTIyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FlaxUNet2DConditionModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Download model and configuration from huggingface.co and cache.</span> | |
| <span class="hljs-meta">>>> </span>model, params = FlaxUNet2DConditionModel.from_pretrained(<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Model was saved using *save_pretrained('./test/saved_model/')* (for example purposes, not runnable).</span> | |
| <span class="hljs-meta">>>> </span>model, params = FlaxUNet2DConditionModel.from_pretrained(<span class="hljs-string">"./test/saved_model/"</span>)`}}),{c(){x=n("p"),T=l("Examples:"),M=i(),f(w.$$.fragment)},l(v){x=s(v,"P",{});var $=r(x);T=c($,"Examples:"),$.forEach(t),M=d(v),u(w.$$.fragment,v)},m(v,$){p(v,x,$),e(x,T),p(v,M,$),h(w,v,$),D=!0},p:sl,i(v){D||(_(w.$$.fragment,v),D=!0)},o(v){g(w.$$.fragment,v),D=!1},d(v){v&&t(x),v&&t(M),b(w,v)}}}function Ex(B){let x,T,M,w,D;return w=new rl({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEZsYXhVTmV0MkRDb25kaXRpb25Nb2RlbCUwQSUwQSUyMyUyMGxvYWQlMjBtb2RlbCUwQW1vZGVsJTJDJTIwcGFyYW1zJTIwJTNEJTIwRmxheFVOZXQyRENvbmRpdGlvbk1vZGVsLmZyb21fcHJldHJhaW5lZCglMjJydW53YXltbCUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiklMEElMjMlMjBCeSUyMGRlZmF1bHQlMkMlMjB0aGUlMjBtb2RlbCUyMHBhcmFtZXRlcnMlMjB3aWxsJTIwYmUlMjBpbiUyMGZwMzIlMjBwcmVjaXNpb24lMkMlMjB0byUyMGNhc3QlMjB0aGVzZSUyMHRvJTIwYmZsb2F0MTYlMjBwcmVjaXNpb24lMEFwYXJhbXMlMjAlM0QlMjBtb2RlbC50b19iZjE2KHBhcmFtcyklMEElMjMlMjBJZiUyMHlvdSUyMGRvbid0JTIwd2FudCUyMHRvJTIwY2FzdCUyMGNlcnRhaW4lMjBwYXJhbWV0ZXJzJTIwKGZvciUyMGV4YW1wbGUlMjBsYXllciUyMG5vcm0lMjBiaWFzJTIwYW5kJTIwc2NhbGUpJTBBJTIzJTIwdGhlbiUyMHBhc3MlMjB0aGUlMjBtYXNrJTIwYXMlMjBmb2xsb3dzJTBBZnJvbSUyMGZsYXglMjBpbXBvcnQlMjB0cmF2ZXJzZV91dGlsJTBBJTBBbW9kZWwlMkMlMjBwYXJhbXMlMjAlM0QlMjBGbGF4VU5ldDJEQ29uZGl0aW9uTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyKSUwQWZsYXRfcGFyYW1zJTIwJTNEJTIwdHJhdmVyc2VfdXRpbC5mbGF0dGVuX2RpY3QocGFyYW1zKSUwQW1hc2slMjAlM0QlMjAlN0IlMEElMjAlMjAlMjAlMjBwYXRoJTNBJTIwKHBhdGglNUItMiU1RCUyMCElM0QlMjAoJTIyTGF5ZXJOb3JtJTIyJTJDJTIwJTIyYmlhcyUyMiklMjBhbmQlMjBwYXRoJTVCLTIlM0ElNUQlMjAhJTNEJTIwKCUyMkxheWVyTm9ybSUyMiUyQyUyMCUyMnNjYWxlJTIyKSklMEElMjAlMjAlMjAlMjBmb3IlMjBwYXRoJTIwaW4lMjBmbGF0X3BhcmFtcyUwQSU3RCUwQW1hc2slMjAlM0QlMjB0cmF2ZXJzZV91dGlsLnVuZmxhdHRlbl9kaWN0KG1hc2spJTBBcGFyYW1zJTIwJTNEJTIwbW9kZWwudG9fYmYxNihwYXJhbXMlMkMlMjBtYXNrKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FlaxUNet2DConditionModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># load model</span> | |
| <span class="hljs-meta">>>> </span>model, params = FlaxUNet2DConditionModel.from_pretrained(<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># By default, the model parameters will be in fp32 precision, to cast these to bfloat16 precision</span> | |
| <span class="hljs-meta">>>> </span>params = model.to_bf16(params) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># If you don't want to cast certain parameters (for example layer norm bias and scale)</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># then pass the mask as follows</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> flax <span class="hljs-keyword">import</span> traverse_util | |
| <span class="hljs-meta">>>> </span>model, params = FlaxUNet2DConditionModel.from_pretrained(<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>) | |
| <span class="hljs-meta">>>> </span>flat_params = traverse_util.flatten_dict(params) | |
| <span class="hljs-meta">>>> </span>mask = { | |
| <span class="hljs-meta">... </span> path: (path[-<span class="hljs-number">2</span>] != (<span class="hljs-string">"LayerNorm"</span>, <span class="hljs-string">"bias"</span>) <span class="hljs-keyword">and</span> path[-<span class="hljs-number">2</span>:] != (<span class="hljs-string">"LayerNorm"</span>, <span class="hljs-string">"scale"</span>)) | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> path <span class="hljs-keyword">in</span> flat_params | |
| <span class="hljs-meta">... </span>} | |
| <span class="hljs-meta">>>> </span>mask = traverse_util.unflatten_dict(mask) | |
| <span class="hljs-meta">>>> </span>params = model.to_bf16(params, mask)`}}),{c(){x=n("p"),T=l("Examples:"),M=i(),f(w.$$.fragment)},l(v){x=s(v,"P",{});var $=r(x);T=c($,"Examples:"),$.forEach(t),M=d(v),u(w.$$.fragment,v)},m(v,$){p(v,x,$),e(x,T),p(v,M,$),h(w,v,$),D=!0},p:sl,i(v){D||(_(w.$$.fragment,v),D=!0)},o(v){g(w.$$.fragment,v),D=!1},d(v){v&&t(x),v&&t(M),b(w,v)}}}function Cx(B){let x,T,M,w,D;return w=new rl({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEZsYXhVTmV0MkRDb25kaXRpb25Nb2RlbCUwQSUwQSUyMyUyMGxvYWQlMjBtb2RlbCUwQW1vZGVsJTJDJTIwcGFyYW1zJTIwJTNEJTIwRmxheFVOZXQyRENvbmRpdGlvbk1vZGVsLmZyb21fcHJldHJhaW5lZCglMjJydW53YXltbCUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiklMEElMjMlMjBCeSUyMGRlZmF1bHQlMkMlMjB0aGUlMjBtb2RlbCUyMHBhcmFtcyUyMHdpbGwlMjBiZSUyMGluJTIwZnAzMiUyQyUyMHRvJTIwY2FzdCUyMHRoZXNlJTIwdG8lMjBmbG9hdDE2JTBBcGFyYW1zJTIwJTNEJTIwbW9kZWwudG9fZnAxNihwYXJhbXMpJTBBJTIzJTIwSWYlMjB5b3UlMjB3YW50JTIwZG9uJ3QlMjB3YW50JTIwdG8lMjBjYXN0JTIwY2VydGFpbiUyMHBhcmFtZXRlcnMlMjAoZm9yJTIwZXhhbXBsZSUyMGxheWVyJTIwbm9ybSUyMGJpYXMlMjBhbmQlMjBzY2FsZSklMEElMjMlMjB0aGVuJTIwcGFzcyUyMHRoZSUyMG1hc2slMjBhcyUyMGZvbGxvd3MlMEFmcm9tJTIwZmxheCUyMGltcG9ydCUyMHRyYXZlcnNlX3V0aWwlMEElMEFtb2RlbCUyQyUyMHBhcmFtcyUyMCUzRCUyMEZsYXhVTmV0MkRDb25kaXRpb25Nb2RlbC5mcm9tX3ByZXRyYWluZWQoJTIycnVud2F5bWwlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIpJTBBZmxhdF9wYXJhbXMlMjAlM0QlMjB0cmF2ZXJzZV91dGlsLmZsYXR0ZW5fZGljdChwYXJhbXMpJTBBbWFzayUyMCUzRCUyMCU3QiUwQSUyMCUyMCUyMCUyMHBhdGglM0ElMjAocGF0aCU1Qi0yJTVEJTIwISUzRCUyMCglMjJMYXllck5vcm0lMjIlMkMlMjAlMjJiaWFzJTIyKSUyMGFuZCUyMHBhdGglNUItMiUzQSU1RCUyMCElM0QlMjAoJTIyTGF5ZXJOb3JtJTIyJTJDJTIwJTIyc2NhbGUlMjIpKSUwQSUyMCUyMCUyMCUyMGZvciUyMHBhdGglMjBpbiUyMGZsYXRfcGFyYW1zJTBBJTdEJTBBbWFzayUyMCUzRCUyMHRyYXZlcnNlX3V0aWwudW5mbGF0dGVuX2RpY3QobWFzayklMEFwYXJhbXMlMjAlM0QlMjBtb2RlbC50b19mcDE2KHBhcmFtcyUyQyUyMG1hc2sp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FlaxUNet2DConditionModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># load model</span> | |
| <span class="hljs-meta">>>> </span>model, params = FlaxUNet2DConditionModel.from_pretrained(<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># By default, the model params will be in fp32, to cast these to float16</span> | |
| <span class="hljs-meta">>>> </span>params = model.to_fp16(params) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># If you want don't want to cast certain parameters (for example layer norm bias and scale)</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># then pass the mask as follows</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> flax <span class="hljs-keyword">import</span> traverse_util | |
| <span class="hljs-meta">>>> </span>model, params = FlaxUNet2DConditionModel.from_pretrained(<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>) | |
| <span class="hljs-meta">>>> </span>flat_params = traverse_util.flatten_dict(params) | |
| <span class="hljs-meta">>>> </span>mask = { | |
| <span class="hljs-meta">... </span> path: (path[-<span class="hljs-number">2</span>] != (<span class="hljs-string">"LayerNorm"</span>, <span class="hljs-string">"bias"</span>) <span class="hljs-keyword">and</span> path[-<span class="hljs-number">2</span>:] != (<span class="hljs-string">"LayerNorm"</span>, <span class="hljs-string">"scale"</span>)) | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> path <span class="hljs-keyword">in</span> flat_params | |
| <span class="hljs-meta">... </span>} | |
| <span class="hljs-meta">>>> </span>mask = traverse_util.unflatten_dict(mask) | |
| <span class="hljs-meta">>>> </span>params = model.to_fp16(params, mask)`}}),{c(){x=n("p"),T=l("Examples:"),M=i(),f(w.$$.fragment)},l(v){x=s(v,"P",{});var $=r(x);T=c($,"Examples:"),$.forEach(t),M=d(v),u(w.$$.fragment,v)},m(v,$){p(v,x,$),e(x,T),p(v,M,$),h(w,v,$),D=!0},p:sl,i(v){D||(_(w.$$.fragment,v),D=!0)},o(v){g(w.$$.fragment,v),D=!1},d(v){v&&t(x),v&&t(M),b(w,v)}}}function Fx(B){let x,T,M,w,D;return w=new rl({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEZsYXhVTmV0MkRDb25kaXRpb25Nb2RlbCUwQSUwQSUyMyUyMERvd25sb2FkJTIwbW9kZWwlMjBhbmQlMjBjb25maWd1cmF0aW9uJTIwZnJvbSUyMGh1Z2dpbmdmYWNlLmNvJTBBbW9kZWwlMkMlMjBwYXJhbXMlMjAlM0QlMjBGbGF4VU5ldDJEQ29uZGl0aW9uTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyKSUwQSUyMyUyMEJ5JTIwZGVmYXVsdCUyQyUyMHRoZSUyMG1vZGVsJTIwcGFyYW1zJTIwd2lsbCUyMGJlJTIwaW4lMjBmcDMyJTJDJTIwdG8lMjBpbGx1c3RyYXRlJTIwdGhlJTIwdXNlJTIwb2YlMjB0aGlzJTIwbWV0aG9kJTJDJTBBJTIzJTIwd2UnbGwlMjBmaXJzdCUyMGNhc3QlMjB0byUyMGZwMTYlMjBhbmQlMjBiYWNrJTIwdG8lMjBmcDMyJTBBcGFyYW1zJTIwJTNEJTIwbW9kZWwudG9fZjE2KHBhcmFtcyklMEElMjMlMjBub3clMjBjYXN0JTIwYmFjayUyMHRvJTIwZnAzMiUwQXBhcmFtcyUyMCUzRCUyMG1vZGVsLnRvX2ZwMzIocGFyYW1zKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FlaxUNet2DConditionModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Download model and configuration from huggingface.co</span> | |
| <span class="hljs-meta">>>> </span>model, params = FlaxUNet2DConditionModel.from_pretrained(<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># By default, the model params will be in fp32, to illustrate the use of this method,</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># we'll first cast to fp16 and back to fp32</span> | |
| <span class="hljs-meta">>>> </span>params = model.to_f16(params) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># now cast back to fp32</span> | |
| <span class="hljs-meta">>>> </span>params = model.to_fp32(params)`}}),{c(){x=n("p"),T=l("Examples:"),M=i(),f(w.$$.fragment)},l(v){x=s(v,"P",{});var $=r(x);T=c($,"Examples:"),$.forEach(t),M=d(v),u(w.$$.fragment,v)},m(v,$){p(v,x,$),e(x,T),p(v,M,$),h(w,v,$),D=!0},p:sl,i(v){D||(_(w.$$.fragment,v),D=!0)},o(v){g(w.$$.fragment,v),D=!1},d(v){v&&t(x),v&&t(M),b(w,v)}}}function Px(B){let x,T,M,w,D,v,$,se,Ue,Z,Y,yr,al,vx='<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>p</mi><mi>\u03B8</mi></msub><mo stretchy="false">(</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>\u2212</mo><mn>1</mn></mrow></msub><mi mathvariant="normal">\u2223</mi><msub><mi>x</mi><mi>t</mi></msub><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">p_{\\theta}(x_{t-1}|x_{t})</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord"><span class="mord mathnormal">p</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3361em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathnormal mtight" style="margin-right:0.02778em;">\u03B8</span></span></span></span></span><span class="vlist-s">\u200B</span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mopen">(</span><span class="mord"><span class="mord mathnormal">x</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3011em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathnormal mtight">t</span><span class="mbin mtight">\u2212</span><span class="mord mtight">1</span></span></span></span></span><span class="vlist-s">\u200B</span></span><span class="vlist-r"><span class="vlist" style="height:0.2083em;"><span></span></span></span></span></span></span><span class="mord">\u2223</span><span class="mord"><span class="mord mathnormal">x</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathnormal mtight">t</span></span></span></span></span><span class="vlist-s">\u200B</span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mclose">)</span></span></span></span>',il,aa,Om,Lm,dl,Ee,ho,ia,Tt,Vm,da,Bm,ll,N,kt,qm,la,Jm,zm,xr,wr,Wm,Xm,Sm,ca,_e,ma,Gm,Km,pa,Rm,Zm,$r,Ym,Qm,Hm,ge,At,ep,fa,op,tp,ua,np,sp,_o,Nt,rp,ha,ap,ip,be,Ut,dp,_a,lp,cp,ga,mp,pp,W,Et,fp,ba,up,hp,va,_p,gp,ya,bp,vp,go,yp,I,Ct,xp,xa,wp,$p,Ce,Mp,wa,Dp,Tp,$a,kp,Ap,Np,Ft,Up,Ma,Ep,Cp,Fp,Pt,Pp,Da,jp,Ip,Op,bo,Lp,vo,Vp,yo,jt,Bp,Ta,qp,Jp,xo,It,zp,Ot,Wp,ka,Xp,Sp,cl,Fe,wo,Aa,Lt,Gp,Na,Kp,ml,Vt,Bt,pl,Pe,$o,Ua,qt,Rp,Ea,Zp,fl,Q,Jt,Yp,Ca,Qp,Hp,zt,ef,Mr,of,tf,nf,Dr,Wt,ul,je,Mo,Fa,Xt,sf,Pa,rf,hl,St,Gt,_l,Ie,Do,ja,Kt,af,Ia,df,gl,H,Rt,lf,Oa,cf,mf,Zt,pf,Tr,ff,uf,hf,kr,Yt,bl,Oe,To,La,Qt,_f,Va,gf,vl,Ht,en,yl,Le,ko,Ba,on,bf,qa,vf,xl,F,tn,yf,Ja,xf,wf,nn,$f,Ar,Mf,Df,Tf,Nr,sn,kf,ve,rn,Af,za,Nf,Uf,Wa,Ef,Cf,Ur,an,Ff,Ao,dn,Pf,Xa,jf,wl,Ve,No,Sa,ln,If,Ga,Of,$l,cn,mn,Ml,Be,Uo,Ka,pn,Lf,Ra,Vf,Dl,P,fn,Bf,Za,qf,Jf,un,zf,Er,Wf,Xf,Sf,Cr,hn,Gf,ye,_n,Kf,Ya,Rf,Zf,Qa,Yf,Qf,Fr,gn,Hf,Eo,bn,eu,Ha,ou,Tl,qe,Co,ei,vn,tu,oi,nu,kl,Je,yn,su,ti,ru,Al,ze,Fo,ni,xn,au,si,iu,Nl,We,wn,du,ri,lu,Ul,Xe,Po,ai,$n,cu,ii,mu,El,ee,Mn,pu,di,fu,uu,Dn,hu,Pr,_u,gu,bu,jr,Tn,Cl,Se,jo,li,kn,vu,ci,yu,Fl,Ge,An,xu,mi,wu,Pl,Ke,Io,pi,Nn,$u,fi,Mu,jl,A,Un,Du,ui,Tu,ku,En,Au,Ir,Nu,Uu,Eu,Oo,Cn,Cu,Fn,Fu,hi,Pu,ju,Iu,Lo,Pn,Ou,jn,Lu,_i,Vu,Bu,qu,Vo,In,Ju,gi,zu,Wu,Bo,On,Xu,bi,Su,Gu,Or,Ln,Ku,Lr,Vn,Ru,qo,Bn,Zu,vi,Yu,Qu,Jo,qn,Hu,yi,eh,oh,zo,Jn,th,xi,nh,Il,Re,Wo,wi,zn,sh,$i,rh,Ol,O,Wn,ah,Mi,ih,dh,Di,lh,ch,Xn,mh,Ti,ph,fh,uh,ki,hh,_h,Vr,Sn,Ll,Ze,Xo,Ai,Gn,gh,Ni,bh,Vl,Kn,Rn,Bl,Ye,So,Ui,Zn,vh,Ei,yh,ql,ce,Yn,xh,Ci,wh,$h,Br,Qn,Jl,Qe,Go,Fi,Hn,Mh,Pi,Dh,zl,es,os,Wl,He,Ko,ji,ts,Th,Ii,kh,Xl,j,ns,Ah,Oi,Nh,Uh,ss,Eh,qr,Ch,Fh,Ph,Jr,jh,rs,Ih,Oh,zr,as,Lh,Wr,is,Vh,Ro,ds,Bh,Li,qh,Sl,eo,Zo,Vi,ls,Jh,Bi,zh,Gl,cs,ms,Kl,oo,Yo,qi,ps,Wh,Ji,Xh,Rl,fs,us,Zl,to,Qo,zi,hs,Sh,Wi,Gh,Yl,J,_s,Kh,Ho,gs,Rh,Xi,Zh,Yh,xe,bs,Qh,Si,Hh,e_,Gi,o_,t_,Xr,vs,n_,et,ys,s_,Ki,r_,Ql,no,ot,Ri,xs,a_,Zi,i_,Hl,C,ws,d_,Yi,l_,c_,Sr,Gr,m_,p_,f_,X,$s,u_,Qi,h_,__,Ms,g_,Hi,b_,v_,y_,Ds,x_,ed,w_,$_,M_,tt,D_,nt,Ts,T_,ks,k_,od,A_,N_,U_,re,As,E_,oe,C_,td,F_,P_,nd,j_,I_,sd,O_,L_,rd,V_,B_,q_,ad,J_,z_,st,W_,ae,Ns,X_,te,S_,id,G_,K_,dd,R_,Z_,ld,Y_,Q_,cd,H_,eg,og,md,tg,ng,rt,sg,we,Us,rg,ne,ag,pd,ig,dg,fd,lg,cg,ud,mg,pg,hd,fg,ug,hg,at,ec,so,it,_d,Es,_g,gd,gg,oc,ro,Cs,bg,dt,Fs,vg,bd,yg,tc,ao,lt,vd,Ps,xg,yd,wg,nc,L,js,$g,xd,Mg,Dg,Is,Tg,Kr,kg,Ag,Ng,Os,Ug,Ls,Eg,Cg,Fg,wd,Pg,jg,me,$d,Vs,Ig,Og,Md,Bs,Lg,Vg,Dd,qs,Bg,qg,Td,Js,Jg,sc,io,ct,kd,zs,zg,Ad,Wg,rc,pe,Ws,Xg,Nd,Sg,Gg,mt,Xs,Kg,Ud,Rg,ac,lo,pt,Ed,Ss,Zg,Cd,Yg,ic,fe,Gs,Qg,Fd,Hg,eb,ft,Ks,ob,Pd,tb,dc,co,ut,jd,Rs,nb,Id,sb,lc,z,Zs,rb,Od,ab,ib,Ys,db,Qs,lb,cb,mb,Ld,pb,fb,ue,Vd,Hs,ub,hb,Bd,er,_b,gb,qd,or,bb,vb,Jd,tr,yb,cc,mo,ht,zd,nr,xb,Wd,wb,mc,po,sr,$b,_t,rr,Mb,Xd,Db,pc,fo,gt,Sd,ar,Tb,Gd,kb,fc,V,ir,Ab,dr,Nb,lr,Ub,Eb,Cb,cr,Fb,Rr,Pb,jb,Ib,mr,Ob,pr,Lb,Vb,Bb,Kd,qb,Jb,he,Rd,fr,zb,Wb,Zd,ur,Xb,Sb,Yd,hr,Gb,Kb,Qd,_r,Rb,uc;return v=new k({}),Tt=new k({}),kt=new y({props:{name:"class diffusers.ModelMixin",anchor:"diffusers.ModelMixin",parameters:[],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/modeling_utils.py#L153"}}),At=new y({props:{name:"disable_gradient_checkpointing",anchor:"diffusers.ModelMixin.disable_gradient_checkpointing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/modeling_utils.py#L210"}}),Nt=new y({props:{name:"disable_xformers_memory_efficient_attention",anchor:"diffusers.ModelMixin.disable_xformers_memory_efficient_attention",parameters:[],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/modeling_utils.py#L269"}}),Ut=new y({props:{name:"enable_gradient_checkpointing",anchor:"diffusers.ModelMixin.enable_gradient_checkpointing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/modeling_utils.py#L199"}}),Et=new y({props:{name:"enable_xformers_memory_efficient_attention",anchor:"diffusers.ModelMixin.enable_xformers_memory_efficient_attention",parameters:[{name:"attention_op",val:": typing.Optional[typing.Callable] = None"}],parametersDescription:[{anchor:"diffusers.ModelMixin.enable_xformers_memory_efficient_attention.attention_op",description:`<strong>attention_op</strong> (<code>Callable</code>, <em>optional</em>) — | |
| Override the default <code>None</code> operator for use as <code>op</code> argument to the | |
| <a href="https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.memory_efficient_attention" rel="nofollow"><code>memory_efficient_attention()</code></a> | |
| function of xFormers.`,name:"attention_op"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/modeling_utils.py#L237"}}),go=new nl({props:{anchor:"diffusers.ModelMixin.enable_xformers_memory_efficient_attention.example",$$slots:{default:[kx]},$$scope:{ctx:B}}}),Ct=new y({props:{name:"from_pretrained",anchor:"diffusers.ModelMixin.from_pretrained",parameters:[{name:"pretrained_model_name_or_path",val:": typing.Union[str, os.PathLike, NoneType]"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"diffusers.ModelMixin.from_pretrained.pretrained_model_name_or_path",description:`<strong>pretrained_model_name_or_path</strong> (<code>str</code> or <code>os.PathLike</code>, <em>optional</em>) — | |
| Can be either:</p> | |
| <ul> | |
| <li>A string, the <em>model id</em> of a pretrained model hosted inside a model repo on huggingface.co. | |
| Valid model ids should have an organization name, like <code>google/ddpm-celebahq-256</code>.</li> | |
| <li>A path to a <em>directory</em> containing model weights saved using <code>~ModelMixin.save_config</code>, e.g., | |
| <code>./my_model_directory/</code>.</li> | |
| </ul>`,name:"pretrained_model_name_or_path"},{anchor:"diffusers.ModelMixin.from_pretrained.cache_dir",description:`<strong>cache_dir</strong> (<code>Union[str, os.PathLike]</code>, <em>optional</em>) — | |
| Path to a directory in which a downloaded pretrained model configuration should be cached if the | |
| standard cache should not be used.`,name:"cache_dir"},{anchor:"diffusers.ModelMixin.from_pretrained.torch_dtype",description:`<strong>torch_dtype</strong> (<code>str</code> or <code>torch.dtype</code>, <em>optional</em>) — | |
| Override the default <code>torch.dtype</code> and load the model under this dtype. If <code>"auto"</code> is passed the dtype | |
| will be automatically derived from the model’s weights.`,name:"torch_dtype"},{anchor:"diffusers.ModelMixin.from_pretrained.force_download",description:`<strong>force_download</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to force the (re-)download of the model weights and configuration files, overriding the | |
| cached versions if they exist.`,name:"force_download"},{anchor:"diffusers.ModelMixin.from_pretrained.resume_download",description:`<strong>resume_download</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to delete incompletely received files. Will attempt to resume the download if such a | |
| file exists.`,name:"resume_download"},{anchor:"diffusers.ModelMixin.from_pretrained.proxies",description:`<strong>proxies</strong> (<code>Dict[str, str]</code>, <em>optional</em>) — | |
| A dictionary of proxy servers to use by protocol or endpoint, e.g., <code>{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}</code>. The proxies are used on each request.`,name:"proxies"},{anchor:"diffusers.ModelMixin.from_pretrained.output_loading_info(bool,",description:`<strong>output_loading_info(<code>bool</code>,</strong> <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages.`,name:"output_loading_info(bool,"},{anchor:"diffusers.ModelMixin.from_pretrained.local_files_only(bool,",description:`<strong>local_files_only(<code>bool</code>,</strong> <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to only look at local files (i.e., do not try to download the model).`,name:"local_files_only(bool,"},{anchor:"diffusers.ModelMixin.from_pretrained.use_auth_token",description:`<strong>use_auth_token</strong> (<code>str</code> or <em>bool</em>, <em>optional</em>) — | |
| The token to use as HTTP bearer authorization for remote files. If <code>True</code>, will use the token generated | |
| when running <code>diffusers-cli login</code> (stored in <code>~/.huggingface</code>).`,name:"use_auth_token"},{anchor:"diffusers.ModelMixin.from_pretrained.revision",description:`<strong>revision</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"main"</code>) — | |
| The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a | |
| git-based system for storing models and other artifacts on huggingface.co, so <code>revision</code> can be any | |
| identifier allowed by git.`,name:"revision"},{anchor:"diffusers.ModelMixin.from_pretrained.from_flax",description:`<strong>from_flax</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Load the model weights from a Flax checkpoint save file.`,name:"from_flax"},{anchor:"diffusers.ModelMixin.from_pretrained.subfolder",description:`<strong>subfolder</strong> (<code>str</code>, <em>optional</em>, defaults to <code>""</code>) — | |
| In case the relevant files are located inside a subfolder of the model repo (either remote in | |
| huggingface.co or downloaded locally), you can specify the folder name here.`,name:"subfolder"},{anchor:"diffusers.ModelMixin.from_pretrained.mirror",description:`<strong>mirror</strong> (<code>str</code>, <em>optional</em>) — | |
| Mirror source to accelerate downloads in China. If you are from China and have an accessibility | |
| problem, you can set this option to resolve it. Note that we do not guarantee the timeliness or safety. | |
| Please refer to the mirror site for more information.`,name:"mirror"},{anchor:"diffusers.ModelMixin.from_pretrained.device_map",description:`<strong>device_map</strong> (<code>str</code> or <code>Dict[str, Union[int, str, torch.device]]</code>, <em>optional</em>) — | |
| A map that specifies where each submodule should go. It doesn’t need to be refined to each | |
| parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the | |
| same device.</p> | |
| <p>To have Accelerate compute the most optimized <code>device_map</code> automatically, set <code>device_map="auto"</code>. For | |
| more information about each option see <a href="https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map" rel="nofollow">designing a device | |
| map</a>.`,name:"device_map"},{anchor:"diffusers.ModelMixin.from_pretrained.max_memory",description:`<strong>max_memory</strong> (<code>Dict</code>, <em>optional</em>) — | |
| A dictionary device identifier to maximum memory. Will default to the maximum memory available for each | |
| GPU and the available CPU RAM if unset.`,name:"max_memory"},{anchor:"diffusers.ModelMixin.from_pretrained.offload_folder",description:`<strong>offload_folder</strong> (<code>str</code> or <code>os.PathLike</code>, <em>optional</em>) — | |
| If the <code>device_map</code> contains any value <code>"disk"</code>, the folder where we will offload weights.`,name:"offload_folder"},{anchor:"diffusers.ModelMixin.from_pretrained.offload_state_dict",description:`<strong>offload_state_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| If <code>True</code>, will temporarily offload the CPU state dict to the hard drive to avoid getting out of CPU | |
| RAM if the weight of the CPU state dict + the biggest shard of the checkpoint does not fit. Defaults to | |
| <code>True</code> when there is some disk offload.`,name:"offload_state_dict"},{anchor:"diffusers.ModelMixin.from_pretrained.low_cpu_mem_usage",description:`<strong>low_cpu_mem_usage</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code> if torch version >= 1.9.0 else <code>False</code>) — | |
| Speed up model loading by not initializing the weights and only loading the pre-trained weights. This | |
| also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the | |
| model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch, | |
| setting this argument to <code>True</code> will raise an error.`,name:"low_cpu_mem_usage"},{anchor:"diffusers.ModelMixin.from_pretrained.variant",description:`<strong>variant</strong> (<code>str</code>, <em>optional</em>) — | |
| If specified load weights from <code>variant</code> filename, <em>e.g.</em> pytorch_model.<variant>.bin. <code>variant</code> is | |
| ignored when using <code>from_flax</code>.</variant>`,name:"variant"},{anchor:"diffusers.ModelMixin.from_pretrained.use_safetensors",description:`<strong>use_safetensors</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| If set to <code>None</code>, the <code>safetensors</code> weights will be downloaded if they’re available <strong>and</strong> if the | |
| <code>safetensors</code> library is installed. If set to <code>True</code>, the model will be forcibly loaded from | |
| <code>safetensors</code> weights. If set to <code>False</code>, loading will <em>not</em> use <code>safetensors</code>.`,name:"use_safetensors"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/modeling_utils.py#L335"}}),bo=new bx({props:{$$slots:{default:[Ax]},$$scope:{ctx:B}}}),vo=new bx({props:{$$slots:{default:[Nx]},$$scope:{ctx:B}}}),jt=new y({props:{name:"num_parameters",anchor:"diffusers.ModelMixin.num_parameters",parameters:[{name:"only_trainable",val:": bool = False"},{name:"exclude_embeddings",val:": bool = False"}],parametersDescription:[{anchor:"diffusers.ModelMixin.num_parameters.only_trainable",description:`<strong>only_trainable</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to return only the number of trainable parameters`,name:"only_trainable"},{anchor:"diffusers.ModelMixin.num_parameters.exclude_embeddings",description:`<strong>exclude_embeddings</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to return only the number of non-embeddings parameters`,name:"exclude_embeddings"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/modeling_utils.py#L853",returnDescription:` | |
| <p>The number of parameters.</p> | |
| `,returnType:` | |
| <p><code>int</code></p> | |
| `}}),It=new y({props:{name:"save_pretrained",anchor:"diffusers.ModelMixin.save_pretrained",parameters:[{name:"save_directory",val:": typing.Union[str, os.PathLike]"},{name:"is_main_process",val:": bool = True"},{name:"save_function",val:": typing.Callable = None"},{name:"safe_serialization",val:": bool = False"},{name:"variant",val:": typing.Optional[str] = None"}],parametersDescription:[{anchor:"diffusers.ModelMixin.save_pretrained.save_directory",description:`<strong>save_directory</strong> (<code>str</code> or <code>os.PathLike</code>) — | |
| Directory to which to save. Will be created if it doesn’t exist.`,name:"save_directory"},{anchor:"diffusers.ModelMixin.save_pretrained.is_main_process",description:`<strong>is_main_process</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether the process calling this is the main process or not. Useful when in distributed training like | |
| TPUs and need to call this function on all processes. In this case, set <code>is_main_process=True</code> only on | |
| the main process to avoid race conditions.`,name:"is_main_process"},{anchor:"diffusers.ModelMixin.save_pretrained.save_function",description:`<strong>save_function</strong> (<code>Callable</code>) — | |
| The function to use to save the state dictionary. Useful on distributed training like TPUs when one | |
| need to replace <code>torch.save</code> by another method. Can be configured with the environment variable | |
| <code>DIFFUSERS_SAVE_MODE</code>.`,name:"save_function"},{anchor:"diffusers.ModelMixin.save_pretrained.safe_serialization",description:`<strong>safe_serialization</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to save the model using <code>safetensors</code> or the traditional PyTorch way (that uses <code>pickle</code>).`,name:"safe_serialization"},{anchor:"diffusers.ModelMixin.save_pretrained.variant",description:`<strong>variant</strong> (<code>str</code>, <em>optional</em>) — | |
| If specified, weights are saved in the format pytorch_model.<variant>.bin.</variant>`,name:"variant"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/modeling_utils.py#L275"}}),Lt=new k({}),Bt=new y({props:{name:"class diffusers.models.unet_2d.UNet2DOutput",anchor:"diffusers.models.unet_2d.UNet2DOutput",parameters:[{name:"sample",val:": FloatTensor"}],parametersDescription:[{anchor:"diffusers.models.unet_2d.UNet2DOutput.sample",description:`<strong>sample</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_channels, height, width)</code>) — | |
| Hidden states output. Output of last layer of model.`,name:"sample"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_2d.py#L28"}}),qt=new k({}),Jt=new y({props:{name:"class diffusers.UNet2DModel",anchor:"diffusers.UNet2DModel",parameters:[{name:"sample_size",val:": typing.Union[int, typing.Tuple[int, int], NoneType] = None"},{name:"in_channels",val:": int = 3"},{name:"out_channels",val:": int = 3"},{name:"center_input_sample",val:": bool = False"},{name:"time_embedding_type",val:": str = 'positional'"},{name:"freq_shift",val:": int = 0"},{name:"flip_sin_to_cos",val:": bool = True"},{name:"down_block_types",val:": typing.Tuple[str] = ('DownBlock2D', 'AttnDownBlock2D', 'AttnDownBlock2D', 'AttnDownBlock2D')"},{name:"up_block_types",val:": typing.Tuple[str] = ('AttnUpBlock2D', 'AttnUpBlock2D', 'AttnUpBlock2D', 'UpBlock2D')"},{name:"block_out_channels",val:": typing.Tuple[int] = (224, 448, 672, 896)"},{name:"layers_per_block",val:": int = 2"},{name:"mid_block_scale_factor",val:": float = 1"},{name:"downsample_padding",val:": int = 1"},{name:"act_fn",val:": str = 'silu'"},{name:"attention_head_dim",val:": typing.Optional[int] = 8"},{name:"norm_num_groups",val:": int = 32"},{name:"norm_eps",val:": float = 1e-05"},{name:"resnet_time_scale_shift",val:": str = 'default'"},{name:"add_attention",val:": bool = True"},{name:"class_embed_type",val:": typing.Optional[str] = None"},{name:"num_class_embeds",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.UNet2DModel.sample_size",description:`<strong>sample_size</strong> (<code>int</code> or <code>Tuple[int, int]</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Height and width of input/output sample. Dimensions must be a multiple of <code>2 ** (len(block_out_channels) - 1)</code>.`,name:"sample_size"},{anchor:"diffusers.UNet2DModel.in_channels",description:"<strong>in_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 3) — Number of channels in the input image.",name:"in_channels"},{anchor:"diffusers.UNet2DModel.out_channels",description:"<strong>out_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 3) — Number of channels in the output.",name:"out_channels"},{anchor:"diffusers.UNet2DModel.center_input_sample",description:"<strong>center_input_sample</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — Whether to center the input sample.",name:"center_input_sample"},{anchor:"diffusers.UNet2DModel.time_embedding_type",description:"<strong>time_embedding_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"positional"</code>) — Type of time embedding to use.",name:"time_embedding_type"},{anchor:"diffusers.UNet2DModel.freq_shift",description:"<strong>freq_shift</strong> (<code>int</code>, <em>optional</em>, defaults to 0) — Frequency shift for fourier time embedding.",name:"freq_shift"},{anchor:"diffusers.UNet2DModel.flip_sin_to_cos",description:`<strong>flip_sin_to_cos</strong> (<code>bool</code>, <em>optional</em>, defaults to — | |
| obj:<code>True</code>): Whether to flip sin to cos for fourier time embedding.`,name:"flip_sin_to_cos"},{anchor:"diffusers.UNet2DModel.down_block_types",description:`<strong>down_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to — | |
| obj:<code>("DownBlock2D", "AttnDownBlock2D", "AttnDownBlock2D", "AttnDownBlock2D")</code>): Tuple of downsample block | |
| types.`,name:"down_block_types"},{anchor:"diffusers.UNet2DModel.mid_block_type",description:`<strong>mid_block_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"UNetMidBlock2D"</code>) — | |
| The mid block type. Choose from <code>UNetMidBlock2D</code> or <code>UnCLIPUNetMidBlock2D</code>.`,name:"mid_block_type"},{anchor:"diffusers.UNet2DModel.up_block_types",description:`<strong>up_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to — | |
| obj:<code>("AttnUpBlock2D", "AttnUpBlock2D", "AttnUpBlock2D", "UpBlock2D")</code>): Tuple of upsample block types.`,name:"up_block_types"},{anchor:"diffusers.UNet2DModel.block_out_channels",description:`<strong>block_out_channels</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to — | |
| obj:<code>(224, 448, 672, 896)</code>): Tuple of block output channels.`,name:"block_out_channels"},{anchor:"diffusers.UNet2DModel.layers_per_block",description:"<strong>layers_per_block</strong> (<code>int</code>, <em>optional</em>, defaults to <code>2</code>) — The number of layers per block.",name:"layers_per_block"},{anchor:"diffusers.UNet2DModel.mid_block_scale_factor",description:"<strong>mid_block_scale_factor</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1</code>) — The scale factor for the mid block.",name:"mid_block_scale_factor"},{anchor:"diffusers.UNet2DModel.downsample_padding",description:"<strong>downsample_padding</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) — The padding for the downsample convolution.",name:"downsample_padding"},{anchor:"diffusers.UNet2DModel.act_fn",description:"<strong>act_fn</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"silu"</code>) — The activation function to use.",name:"act_fn"},{anchor:"diffusers.UNet2DModel.attention_head_dim",description:"<strong>attention_head_dim</strong> (<code>int</code>, <em>optional</em>, defaults to <code>8</code>) — The attention head dimension.",name:"attention_head_dim"},{anchor:"diffusers.UNet2DModel.norm_num_groups",description:"<strong>norm_num_groups</strong> (<code>int</code>, <em>optional</em>, defaults to <code>32</code>) — The number of groups for the normalization.",name:"norm_num_groups"},{anchor:"diffusers.UNet2DModel.norm_eps",description:"<strong>norm_eps</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1e-5</code>) — The epsilon for the normalization.",name:"norm_eps"},{anchor:"diffusers.UNet2DModel.resnet_time_scale_shift",description:`<strong>resnet_time_scale_shift</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"default"</code>) — Time scale shift config | |
| for resnet blocks, see <code>ResnetBlock2D</code>. Choose from <code>default</code> or <code>scale_shift</code>.`,name:"resnet_time_scale_shift"},{anchor:"diffusers.UNet2DModel.class_embed_type",description:`<strong>class_embed_type</strong> (<code>str</code>, <em>optional</em>, defaults to None) — | |
| The type of class embedding to use which is ultimately summed with the time embeddings. Choose from <code>None</code>, | |
| <code>"timestep"</code>, or <code>"identity"</code>.`,name:"class_embed_type"},{anchor:"diffusers.UNet2DModel.num_class_embeds",description:`<strong>num_class_embeds</strong> (<code>int</code>, <em>optional</em>, defaults to None) — | |
| Input dimension of the learnable embedding matrix to be projected to <code>time_embed_dim</code>, when performing | |
| class conditioning with <code>class_embed_type</code> equal to <code>None</code>.`,name:"num_class_embeds"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_2d.py#L38"}}),Wt=new y({props:{name:"forward",anchor:"diffusers.UNet2DModel.forward",parameters:[{name:"sample",val:": FloatTensor"},{name:"timestep",val:": typing.Union[torch.Tensor, float, int]"},{name:"class_labels",val:": typing.Optional[torch.Tensor] = None"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.UNet2DModel.forward.sample",description:"<strong>sample</strong> (<code>torch.FloatTensor</code>) — (batch, channel, height, width) noisy inputs tensor",name:"sample"},{anchor:"diffusers.UNet2DModel.forward.timestep",description:"<strong>timestep</strong> (<code>torch.FloatTensor</code> or <code>float</code> or `int) — (batch) timesteps",name:"timestep"},{anchor:"diffusers.UNet2DModel.forward.class_labels",description:`<strong>class_labels</strong> (<code>torch.FloatTensor</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Optional class labels for conditioning. Their embeddings will be summed with the timestep embeddings.`,name:"class_labels"},{anchor:"diffusers.UNet2DModel.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <a href="/docs/diffusers/main/en/api/models#diffusers.models.unet_2d.UNet2DOutput">UNet2DOutput</a> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_2d.py#L219",returnDescription:` | |
| <p><a | |
| href="/docs/diffusers/main/en/api/models#diffusers.models.unet_2d.UNet2DOutput" | |
| >UNet2DOutput</a> if <code>return_dict</code> is True, | |
| otherwise a <code>tuple</code>. When returning a tuple, the first element is the sample tensor.</p> | |
| `,returnType:` | |
| <p><a | |
| href="/docs/diffusers/main/en/api/models#diffusers.models.unet_2d.UNet2DOutput" | |
| >UNet2DOutput</a> or <code>tuple</code></p> | |
| `}}),Xt=new k({}),Gt=new y({props:{name:"class diffusers.models.unet_1d.UNet1DOutput",anchor:"diffusers.models.unet_1d.UNet1DOutput",parameters:[{name:"sample",val:": FloatTensor"}],parametersDescription:[{anchor:"diffusers.models.unet_1d.UNet1DOutput.sample",description:`<strong>sample</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_channels, sample_size)</code>) — | |
| Hidden states output. Output of last layer of model.`,name:"sample"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_1d.py#L29"}}),Kt=new k({}),Rt=new y({props:{name:"class diffusers.UNet1DModel",anchor:"diffusers.UNet1DModel",parameters:[{name:"sample_size",val:": int = 65536"},{name:"sample_rate",val:": typing.Optional[int] = None"},{name:"in_channels",val:": int = 2"},{name:"out_channels",val:": int = 2"},{name:"extra_in_channels",val:": int = 0"},{name:"time_embedding_type",val:": str = 'fourier'"},{name:"flip_sin_to_cos",val:": bool = True"},{name:"use_timestep_embedding",val:": bool = False"},{name:"freq_shift",val:": float = 0.0"},{name:"down_block_types",val:": typing.Tuple[str] = ('DownBlock1DNoSkip', 'DownBlock1D', 'AttnDownBlock1D')"},{name:"up_block_types",val:": typing.Tuple[str] = ('AttnUpBlock1D', 'UpBlock1D', 'UpBlock1DNoSkip')"},{name:"mid_block_type",val:": typing.Tuple[str] = 'UNetMidBlock1D'"},{name:"out_block_type",val:": str = None"},{name:"block_out_channels",val:": typing.Tuple[int] = (32, 32, 64)"},{name:"act_fn",val:": str = None"},{name:"norm_num_groups",val:": int = 8"},{name:"layers_per_block",val:": int = 1"},{name:"downsample_each_block",val:": bool = False"}],parametersDescription:[{anchor:"diffusers.UNet1DModel.sample_size",description:"<strong>sample_size</strong> (<code>int</code>, <em>optional</em>) — Default length of sample. Should be adaptable at runtime.",name:"sample_size"},{anchor:"diffusers.UNet1DModel.in_channels",description:"<strong>in_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 2) — Number of channels in the input sample.",name:"in_channels"},{anchor:"diffusers.UNet1DModel.out_channels",description:"<strong>out_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 2) — Number of channels in the output.",name:"out_channels"},{anchor:"diffusers.UNet1DModel.extra_in_channels",description:`<strong>extra_in_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 0) — | |
| Number of additional channels to be added to the input of the first down block. Useful for cases where the | |
| input data has more channels than what the model is initially designed for.`,name:"extra_in_channels"},{anchor:"diffusers.UNet1DModel.time_embedding_type",description:"<strong>time_embedding_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"fourier"</code>) — Type of time embedding to use.",name:"time_embedding_type"},{anchor:"diffusers.UNet1DModel.freq_shift",description:"<strong>freq_shift</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — Frequency shift for fourier time embedding.",name:"freq_shift"},{anchor:"diffusers.UNet1DModel.flip_sin_to_cos",description:`<strong>flip_sin_to_cos</strong> (<code>bool</code>, <em>optional</em>, defaults to — | |
| obj:<code>False</code>): Whether to flip sin to cos for fourier time embedding.`,name:"flip_sin_to_cos"},{anchor:"diffusers.UNet1DModel.down_block_types",description:`<strong>down_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to — | |
| obj:<code>("DownBlock1D", "DownBlock1DNoSkip", "AttnDownBlock1D")</code>): Tuple of downsample block types.`,name:"down_block_types"},{anchor:"diffusers.UNet1DModel.up_block_types",description:`<strong>up_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to — | |
| obj:<code>("UpBlock1D", "UpBlock1DNoSkip", "AttnUpBlock1D")</code>): Tuple of upsample block types.`,name:"up_block_types"},{anchor:"diffusers.UNet1DModel.block_out_channels",description:`<strong>block_out_channels</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to — | |
| obj:<code>(32, 32, 64)</code>): Tuple of block output channels.`,name:"block_out_channels"},{anchor:"diffusers.UNet1DModel.mid_block_type",description:"<strong>mid_block_type</strong> (<code>str</code>, <em>optional</em>, defaults to “UNetMidBlock1D”) — block type for middle of UNet.",name:"mid_block_type"},{anchor:"diffusers.UNet1DModel.out_block_type",description:"<strong>out_block_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>None</code>) — optional output processing of UNet.",name:"out_block_type"},{anchor:"diffusers.UNet1DModel.act_fn",description:"<strong>act_fn</strong> (<code>str</code>, <em>optional</em>, defaults to None) — optional activation function in UNet blocks.",name:"act_fn"},{anchor:"diffusers.UNet1DModel.norm_num_groups",description:"<strong>norm_num_groups</strong> (<code>int</code>, <em>optional</em>, defaults to 8) — group norm member count in UNet blocks.",name:"norm_num_groups"},{anchor:"diffusers.UNet1DModel.layers_per_block",description:"<strong>layers_per_block</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — added number of layers in a UNet block.",name:"layers_per_block"},{anchor:"diffusers.UNet1DModel.downsample_each_block",description:`<strong>downsample_each_block</strong> (<code>int</code>, <em>optional</em>, defaults to False — | |
| experimental feature for using a UNet without upsampling.`,name:"downsample_each_block"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_1d.py#L39"}}),Yt=new y({props:{name:"forward",anchor:"diffusers.UNet1DModel.forward",parameters:[{name:"sample",val:": FloatTensor"},{name:"timestep",val:": typing.Union[torch.Tensor, float, int]"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.UNet1DModel.forward.sample",description:"<strong>sample</strong> (<code>torch.FloatTensor</code>) — <code>(batch_size, num_channels, sample_size)</code> noisy inputs tensor",name:"sample"},{anchor:"diffusers.UNet1DModel.forward.timestep",description:"<strong>timestep</strong> (<code>torch.FloatTensor</code> or <code>float</code> or `int) — (batch) timesteps",name:"timestep"},{anchor:"diffusers.UNet1DModel.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <a href="/docs/diffusers/main/en/api/models#diffusers.models.unet_1d.UNet1DOutput">UNet1DOutput</a> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_1d.py#L193",returnDescription:` | |
| <p><a | |
| href="/docs/diffusers/main/en/api/models#diffusers.models.unet_1d.UNet1DOutput" | |
| >UNet1DOutput</a> if <code>return_dict</code> is True, | |
| otherwise a <code>tuple</code>. When returning a tuple, the first element is the sample tensor.</p> | |
| `,returnType:` | |
| <p><a | |
| href="/docs/diffusers/main/en/api/models#diffusers.models.unet_1d.UNet1DOutput" | |
| >UNet1DOutput</a> or <code>tuple</code></p> | |
| `}}),Qt=new k({}),en=new y({props:{name:"class diffusers.models.unet_2d_condition.UNet2DConditionOutput",anchor:"diffusers.models.unet_2d_condition.UNet2DConditionOutput",parameters:[{name:"sample",val:": FloatTensor"}],parametersDescription:[{anchor:"diffusers.models.unet_2d_condition.UNet2DConditionOutput.sample",description:`<strong>sample</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_channels, height, width)</code>) — | |
| Hidden states conditioned on <code>encoder_hidden_states</code> input. Output of last layer of model.`,name:"sample"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_2d_condition.py#L51"}}),on=new k({}),tn=new y({props:{name:"class diffusers.UNet2DConditionModel",anchor:"diffusers.UNet2DConditionModel",parameters:[{name:"sample_size",val:": typing.Optional[int] = None"},{name:"in_channels",val:": int = 4"},{name:"out_channels",val:": int = 4"},{name:"center_input_sample",val:": bool = False"},{name:"flip_sin_to_cos",val:": bool = True"},{name:"freq_shift",val:": int = 0"},{name:"down_block_types",val:": typing.Tuple[str] = ('CrossAttnDownBlock2D', 'CrossAttnDownBlock2D', 'CrossAttnDownBlock2D', 'DownBlock2D')"},{name:"mid_block_type",val:": typing.Optional[str] = 'UNetMidBlock2DCrossAttn'"},{name:"up_block_types",val:": typing.Tuple[str] = ('UpBlock2D', 'CrossAttnUpBlock2D', 'CrossAttnUpBlock2D', 'CrossAttnUpBlock2D')"},{name:"only_cross_attention",val:": typing.Union[bool, typing.Tuple[bool]] = False"},{name:"block_out_channels",val:": typing.Tuple[int] = (320, 640, 1280, 1280)"},{name:"layers_per_block",val:": typing.Union[int, typing.Tuple[int]] = 2"},{name:"downsample_padding",val:": int = 1"},{name:"mid_block_scale_factor",val:": float = 1"},{name:"act_fn",val:": str = 'silu'"},{name:"norm_num_groups",val:": typing.Optional[int] = 32"},{name:"norm_eps",val:": float = 1e-05"},{name:"cross_attention_dim",val:": typing.Union[int, typing.Tuple[int]] = 1280"},{name:"encoder_hid_dim",val:": typing.Optional[int] = None"},{name:"encoder_hid_dim_type",val:": typing.Optional[str] = None"},{name:"attention_head_dim",val:": typing.Union[int, typing.Tuple[int]] = 8"},{name:"num_attention_heads",val:": typing.Union[int, typing.Tuple[int], NoneType] = None"},{name:"dual_cross_attention",val:": bool = False"},{name:"use_linear_projection",val:": bool = False"},{name:"class_embed_type",val:": typing.Optional[str] = None"},{name:"addition_embed_type",val:": typing.Optional[str] = None"},{name:"num_class_embeds",val:": typing.Optional[int] = None"},{name:"upcast_attention",val:": bool = False"},{name:"resnet_time_scale_shift",val:": str = 'default'"},{name:"resnet_skip_time_act",val:": bool = False"},{name:"resnet_out_scale_factor",val:": int = 1.0"},{name:"time_embedding_type",val:": str = 'positional'"},{name:"time_embedding_dim",val:": typing.Optional[int] = None"},{name:"time_embedding_act_fn",val:": typing.Optional[str] = None"},{name:"timestep_post_act",val:": typing.Optional[str] = None"},{name:"time_cond_proj_dim",val:": typing.Optional[int] = None"},{name:"conv_in_kernel",val:": int = 3"},{name:"conv_out_kernel",val:": int = 3"},{name:"projection_class_embeddings_input_dim",val:": typing.Optional[int] = None"},{name:"class_embeddings_concat",val:": bool = False"},{name:"mid_block_only_cross_attention",val:": typing.Optional[bool] = None"},{name:"cross_attention_norm",val:": typing.Optional[str] = None"},{name:"addition_embed_type_num_heads",val:" = 64"}],parametersDescription:[{anchor:"diffusers.UNet2DConditionModel.sample_size",description:`<strong>sample_size</strong> (<code>int</code> or <code>Tuple[int, int]</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Height and width of input/output sample.`,name:"sample_size"},{anchor:"diffusers.UNet2DConditionModel.in_channels",description:"<strong>in_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 4) — The number of channels in the input sample.",name:"in_channels"},{anchor:"diffusers.UNet2DConditionModel.out_channels",description:"<strong>out_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 4) — The number of channels in the output.",name:"out_channels"},{anchor:"diffusers.UNet2DConditionModel.center_input_sample",description:"<strong>center_input_sample</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — Whether to center the input sample.",name:"center_input_sample"},{anchor:"diffusers.UNet2DConditionModel.flip_sin_to_cos",description:`<strong>flip_sin_to_cos</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to flip the sin to cos in the time embedding.`,name:"flip_sin_to_cos"},{anchor:"diffusers.UNet2DConditionModel.freq_shift",description:"<strong>freq_shift</strong> (<code>int</code>, <em>optional</em>, defaults to 0) — The frequency shift to apply to the time embedding.",name:"freq_shift"},{anchor:"diffusers.UNet2DConditionModel.down_block_types",description:`<strong>down_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to <code>("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D")</code>) — | |
| The tuple of downsample blocks to use.`,name:"down_block_types"},{anchor:"diffusers.UNet2DConditionModel.mid_block_type",description:`<strong>mid_block_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"UNetMidBlock2DCrossAttn"</code>) — | |
| The mid block type. Choose from <code>UNetMidBlock2DCrossAttn</code> or <code>UNetMidBlock2DSimpleCrossAttn</code>, will skip the | |
| mid block layer if <code>None</code>.`,name:"mid_block_type"},{anchor:"diffusers.UNet2DConditionModel.up_block_types",description:`<strong>up_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to <code>("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D",)</code>) — | |
| The tuple of upsample blocks to use.`,name:"up_block_types"},{anchor:"diffusers.UNet2DConditionModel.only_cross_attention(bool",description:`<strong>only_cross_attention(<code>bool</code></strong> or <code>Tuple[bool]</code>, <em>optional</em>, default to <code>False</code>) — | |
| Whether to include self-attention in the basic transformer blocks, see | |
| <code>BasicTransformerBlock</code>.`,name:"only_cross_attention(bool"},{anchor:"diffusers.UNet2DConditionModel.block_out_channels",description:`<strong>block_out_channels</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to <code>(320, 640, 1280, 1280)</code>) — | |
| The tuple of output channels for each block.`,name:"block_out_channels"},{anchor:"diffusers.UNet2DConditionModel.layers_per_block",description:"<strong>layers_per_block</strong> (<code>int</code>, <em>optional</em>, defaults to 2) — The number of layers per block.",name:"layers_per_block"},{anchor:"diffusers.UNet2DConditionModel.downsample_padding",description:"<strong>downsample_padding</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — The padding to use for the downsampling convolution.",name:"downsample_padding"},{anchor:"diffusers.UNet2DConditionModel.mid_block_scale_factor",description:"<strong>mid_block_scale_factor</strong> (<code>float</code>, <em>optional</em>, defaults to 1.0) — The scale factor to use for the mid block.",name:"mid_block_scale_factor"},{anchor:"diffusers.UNet2DConditionModel.act_fn",description:"<strong>act_fn</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"silu"</code>) — The activation function to use.",name:"act_fn"},{anchor:"diffusers.UNet2DConditionModel.norm_num_groups",description:`<strong>norm_num_groups</strong> (<code>int</code>, <em>optional</em>, defaults to 32) — The number of groups to use for the normalization. | |
| If <code>None</code>, it will skip the normalization and activation layers in post-processing`,name:"norm_num_groups"},{anchor:"diffusers.UNet2DConditionModel.norm_eps",description:"<strong>norm_eps</strong> (<code>float</code>, <em>optional</em>, defaults to 1e-5) — The epsilon to use for the normalization.",name:"norm_eps"},{anchor:"diffusers.UNet2DConditionModel.cross_attention_dim",description:`<strong>cross_attention_dim</strong> (<code>int</code> or <code>Tuple[int]</code>, <em>optional</em>, defaults to 1280) — | |
| The dimension of the cross attention features.`,name:"cross_attention_dim"},{anchor:"diffusers.UNet2DConditionModel.encoder_hid_dim",description:`<strong>encoder_hid_dim</strong> (<code>int</code>, <em>optional</em>, defaults to None) — | |
| If <code>encoder_hid_dim_type</code> is defined, <code>encoder_hidden_states</code> will be projected from <code>encoder_hid_dim</code> | |
| dimension to <code>cross_attention_dim</code>.`,name:"encoder_hid_dim"},{anchor:"diffusers.UNet2DConditionModel.encoder_hid_dim_type",description:`<strong>encoder_hid_dim_type</strong> (<code>str</code>, <em>optional</em>, defaults to None) — | |
| If given, the <code>encoder_hidden_states</code> and potentially other embeddings will be down-projected to text | |
| embeddings of dimension <code>cross_attention</code> according to <code>encoder_hid_dim_type</code>.`,name:"encoder_hid_dim_type"},{anchor:"diffusers.UNet2DConditionModel.attention_head_dim",description:"<strong>attention_head_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 8) — The dimension of the attention heads.",name:"attention_head_dim"},{anchor:"diffusers.UNet2DConditionModel.num_attention_heads",description:`<strong>num_attention_heads</strong> (<code>int</code>, <em>optional</em>) — | |
| The number of attention heads. If not defined, defaults to <code>attention_head_dim</code>`,name:"num_attention_heads"},{anchor:"diffusers.UNet2DConditionModel.resnet_time_scale_shift",description:`<strong>resnet_time_scale_shift</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"default"</code>) — Time scale shift config | |
| for resnet blocks, see <code>ResnetBlock2D</code>. Choose from <code>default</code> or <code>scale_shift</code>.`,name:"resnet_time_scale_shift"},{anchor:"diffusers.UNet2DConditionModel.class_embed_type",description:`<strong>class_embed_type</strong> (<code>str</code>, <em>optional</em>, defaults to None) — | |
| The type of class embedding to use which is ultimately summed with the time embeddings. Choose from <code>None</code>, | |
| <code>"timestep"</code>, <code>"identity"</code>, <code>"projection"</code>, or <code>"simple_projection"</code>.`,name:"class_embed_type"},{anchor:"diffusers.UNet2DConditionModel.addition_embed_type",description:`<strong>addition_embed_type</strong> (<code>str</code>, <em>optional</em>, defaults to None) — | |
| Configures an optional embedding which will be summed with the time embeddings. Choose from <code>None</code> or | |
| “text”. “text” will use the <code>TextTimeEmbedding</code> layer.`,name:"addition_embed_type"},{anchor:"diffusers.UNet2DConditionModel.num_class_embeds",description:`<strong>num_class_embeds</strong> (<code>int</code>, <em>optional</em>, defaults to None) — | |
| Input dimension of the learnable embedding matrix to be projected to <code>time_embed_dim</code>, when performing | |
| class conditioning with <code>class_embed_type</code> equal to <code>None</code>.`,name:"num_class_embeds"},{anchor:"diffusers.UNet2DConditionModel.time_embedding_type",description:`<strong>time_embedding_type</strong> (<code>str</code>, <em>optional</em>, default to <code>positional</code>) — | |
| The type of position embedding to use for timesteps. Choose from <code>positional</code> or <code>fourier</code>.`,name:"time_embedding_type"},{anchor:"diffusers.UNet2DConditionModel.time_embedding_dim",description:`<strong>time_embedding_dim</strong> (<code>int</code>, <em>optional</em>, default to <code>None</code>) — | |
| An optional override for the dimension of the projected time embedding.`,name:"time_embedding_dim"},{anchor:"diffusers.UNet2DConditionModel.time_embedding_act_fn",description:`<strong>time_embedding_act_fn</strong> (<code>str</code>, <em>optional</em>, default to <code>None</code>) — | |
| Optional activation function to use on the time embeddings only one time before they as passed to the rest | |
| of the unet. Choose from <code>silu</code>, <code>mish</code>, <code>gelu</code>, and <code>swish</code>.`,name:"time_embedding_act_fn"},{anchor:"diffusers.UNet2DConditionModel.timestep_post_act",description:"<strong>timestep_post_act</strong> (<code>str, *optional*, default to </code>None<code>) -- The second activation function to use in timestep embedding. Choose from </code>silu<code>, </code>mish<code>and</code>gelu`.",name:"timestep_post_act"},{anchor:"diffusers.UNet2DConditionModel.time_cond_proj_dim",description:`<strong>time_cond_proj_dim</strong> (<code>int</code>, <em>optional</em>, default to <code>None</code>) — | |
| The dimension of <code>cond_proj</code> layer in timestep embedding.`,name:"time_cond_proj_dim"},{anchor:"diffusers.UNet2DConditionModel.conv_in_kernel",description:"<strong>conv_in_kernel</strong> (<code>int</code>, <em>optional</em>, default to <code>3</code>) — The kernel size of <code>conv_in</code> layer.",name:"conv_in_kernel"},{anchor:"diffusers.UNet2DConditionModel.conv_out_kernel",description:"<strong>conv_out_kernel</strong> (<code>int</code>, <em>optional</em>, default to <code>3</code>) — The kernel size of <code>conv_out</code> layer.",name:"conv_out_kernel"},{anchor:"diffusers.UNet2DConditionModel.projection_class_embeddings_input_dim",description:`<strong>projection_class_embeddings_input_dim</strong> (<code>int</code>, <em>optional</em>) — The dimension of the <code>class_labels</code> input when | |
| using the “projection” <code>class_embed_type</code>. Required when using the “projection” <code>class_embed_type</code>.`,name:"projection_class_embeddings_input_dim"},{anchor:"diffusers.UNet2DConditionModel.class_embeddings_concat",description:`<strong>class_embeddings_concat</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — Whether to concatenate the time | |
| embeddings with the class embeddings.`,name:"class_embeddings_concat"},{anchor:"diffusers.UNet2DConditionModel.mid_block_only_cross_attention",description:`<strong>mid_block_only_cross_attention</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Whether to use cross attention with the mid block when using the <code>UNetMidBlock2DSimpleCrossAttn</code>. If | |
| <code>only_cross_attention</code> is given as a single boolean and <code>mid_block_only_cross_attention</code> is None, the | |
| <code>only_cross_attention</code> value will be used as the value for <code>mid_block_only_cross_attention</code>. Else, it will | |
| default to <code>False</code>.`,name:"mid_block_only_cross_attention"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_2d_condition.py#L61"}}),sn=new y({props:{name:"forward",anchor:"diffusers.UNet2DConditionModel.forward",parameters:[{name:"sample",val:": FloatTensor"},{name:"timestep",val:": typing.Union[torch.Tensor, float, int]"},{name:"encoder_hidden_states",val:": Tensor"},{name:"class_labels",val:": typing.Optional[torch.Tensor] = None"},{name:"timestep_cond",val:": typing.Optional[torch.Tensor] = None"},{name:"attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"cross_attention_kwargs",val:": typing.Union[typing.Dict[str, typing.Any], NoneType] = None"},{name:"added_cond_kwargs",val:": typing.Union[typing.Dict[str, torch.Tensor], NoneType] = None"},{name:"down_block_additional_residuals",val:": typing.Optional[typing.Tuple[torch.Tensor]] = None"},{name:"mid_block_additional_residual",val:": typing.Optional[torch.Tensor] = None"},{name:"encoder_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.UNet2DConditionModel.forward.sample",description:"<strong>sample</strong> (<code>torch.FloatTensor</code>) — (batch, channel, height, width) noisy inputs tensor",name:"sample"},{anchor:"diffusers.UNet2DConditionModel.forward.timestep",description:"<strong>timestep</strong> (<code>torch.FloatTensor</code> or <code>float</code> or <code>int</code>) — (batch) timesteps",name:"timestep"},{anchor:"diffusers.UNet2DConditionModel.forward.encoder_hidden_states",description:"<strong>encoder_hidden_states</strong> (<code>torch.FloatTensor</code>) — (batch, sequence_length, feature_dim) encoder hidden states",name:"encoder_hidden_states"},{anchor:"diffusers.UNet2DConditionModel.forward.encoder_attention_mask",description:`<strong>encoder_attention_mask</strong> (<code>torch.Tensor</code>) — | |
| (batch, sequence_length) cross-attention mask, applied to encoder_hidden_states. True = keep, False = | |
| discard. Mask will be converted into a bias, which adds large negative values to attention scores | |
| corresponding to “discard” tokens.`,name:"encoder_attention_mask"},{anchor:"diffusers.UNet2DConditionModel.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <a href="/docs/diffusers/main/en/api/models#diffusers.models.unet_2d_condition.UNet2DConditionOutput">models.unet_2d_condition.UNet2DConditionOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.UNet2DConditionModel.forward.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py" rel="nofollow">diffusers.cross_attention</a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.UNet2DConditionModel.forward.added_cond_kwargs",description:`<strong>added_cond_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified includes additonal conditions that can be used for additonal time | |
| embeddings or encoder hidden states projections. See the configurations <code>encoder_hid_dim_type</code> and | |
| <code>addition_embed_type</code> for more information.`,name:"added_cond_kwargs"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_2d_condition.py#L657",returnDescription:` | |
| <p><a | |
| href="/docs/diffusers/main/en/api/models#diffusers.models.unet_2d_condition.UNet2DConditionOutput" | |
| >UNet2DConditionOutput</a> if <code>return_dict</code> is True, otherwise a <code>tuple</code>. When | |
| returning a tuple, the first element is the sample tensor.</p> | |
| `,returnType:` | |
| <p><a | |
| href="/docs/diffusers/main/en/api/models#diffusers.models.unet_2d_condition.UNet2DConditionOutput" | |
| >UNet2DConditionOutput</a> or <code>tuple</code></p> | |
| `}}),rn=new y({props:{name:"set_attention_slice",anchor:"diffusers.UNet2DConditionModel.set_attention_slice",parameters:[{name:"slice_size",val:""}],parametersDescription:[{anchor:"diffusers.UNet2DConditionModel.set_attention_slice.slice_size",description:`<strong>slice_size</strong> (<code>str</code> or <code>int</code> or <code>list(int)</code>, <em>optional</em>, defaults to <code>"auto"</code>) — | |
| When <code>"auto"</code>, halves the input to the attention heads, so attention will be computed in two steps. If | |
| <code>"max"</code>, maximum amount of memory will be saved by running only one slice at a time. If a number is | |
| provided, uses as many slices as <code>num_attention_heads // slice_size</code>. In this case, | |
| <code>num_attention_heads</code> must be a multiple of <code>slice_size</code>.`,name:"slice_size"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_2d_condition.py#L588"}}),an=new y({props:{name:"set_attn_processor",anchor:"diffusers.UNet2DConditionModel.set_attn_processor",parameters:[{name:"processor",val:": typing.Union[diffusers.models.attention_processor.AttnProcessor, diffusers.models.attention_processor.AttnProcessor2_0, diffusers.models.attention_processor.XFormersAttnProcessor, diffusers.models.attention_processor.SlicedAttnProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor, diffusers.models.attention_processor.SlicedAttnAddedKVProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor2_0, diffusers.models.attention_processor.XFormersAttnAddedKVProcessor, diffusers.models.attention_processor.LoRAAttnProcessor, diffusers.models.attention_processor.LoRAXFormersAttnProcessor, diffusers.models.attention_processor.LoRAAttnProcessor2_0, diffusers.models.attention_processor.LoRAAttnAddedKVProcessor, diffusers.models.attention_processor.CustomDiffusionAttnProcessor, diffusers.models.attention_processor.CustomDiffusionXFormersAttnProcessor, typing.Dict[str, typing.Union[diffusers.models.attention_processor.AttnProcessor, diffusers.models.attention_processor.AttnProcessor2_0, diffusers.models.attention_processor.XFormersAttnProcessor, diffusers.models.attention_processor.SlicedAttnProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor, diffusers.models.attention_processor.SlicedAttnAddedKVProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor2_0, diffusers.models.attention_processor.XFormersAttnAddedKVProcessor, diffusers.models.attention_processor.LoRAAttnProcessor, diffusers.models.attention_processor.LoRAXFormersAttnProcessor, diffusers.models.attention_processor.LoRAAttnProcessor2_0, diffusers.models.attention_processor.LoRAAttnAddedKVProcessor, diffusers.models.attention_processor.CustomDiffusionAttnProcessor, diffusers.models.attention_processor.CustomDiffusionXFormersAttnProcessor]]]"}],parametersDescription:[{anchor:"diffusers.UNet2DConditionModel.set_attn_processor.`processor",description:`<strong>\`processor</strong> (<code>dict</code> of <code>AttentionProcessor</code> or <code>AttentionProcessor</code>) — | |
| The instantiated processor class or a dictionary of processor classes that will be set as the processor | |
| of <strong>all</strong> <code>Attention</code> layers.`,name:"`processor"},{anchor:"diffusers.UNet2DConditionModel.set_attn_processor.In",description:"<strong>In</strong> case <code>processor</code> is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainable attention processors. —",name:"In"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_2d_condition.py#L552"}}),dn=new y({props:{name:"set_default_attn_processor",anchor:"diffusers.UNet2DConditionModel.set_default_attn_processor",parameters:[],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_2d_condition.py#L582"}}),ln=new k({}),mn=new y({props:{name:"class diffusers.models.unet_3d_condition.UNet3DConditionOutput",anchor:"diffusers.models.unet_3d_condition.UNet3DConditionOutput",parameters:[{name:"sample",val:": FloatTensor"}],parametersDescription:[{anchor:"diffusers.models.unet_3d_condition.UNet3DConditionOutput.sample",description:`<strong>sample</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_frames, num_channels, height, width)</code>) — | |
| Hidden states conditioned on <code>encoder_hidden_states</code> input. Output of last layer of model.`,name:"sample"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_3d_condition.py#L44"}}),pn=new k({}),fn=new y({props:{name:"class diffusers.UNet3DConditionModel",anchor:"diffusers.UNet3DConditionModel",parameters:[{name:"sample_size",val:": typing.Optional[int] = None"},{name:"in_channels",val:": int = 4"},{name:"out_channels",val:": int = 4"},{name:"down_block_types",val:": typing.Tuple[str] = ('CrossAttnDownBlock3D', 'CrossAttnDownBlock3D', 'CrossAttnDownBlock3D', 'DownBlock3D')"},{name:"up_block_types",val:": typing.Tuple[str] = ('UpBlock3D', 'CrossAttnUpBlock3D', 'CrossAttnUpBlock3D', 'CrossAttnUpBlock3D')"},{name:"block_out_channels",val:": typing.Tuple[int] = (320, 640, 1280, 1280)"},{name:"layers_per_block",val:": int = 2"},{name:"downsample_padding",val:": int = 1"},{name:"mid_block_scale_factor",val:": float = 1"},{name:"act_fn",val:": str = 'silu'"},{name:"norm_num_groups",val:": typing.Optional[int] = 32"},{name:"norm_eps",val:": float = 1e-05"},{name:"cross_attention_dim",val:": int = 1024"},{name:"attention_head_dim",val:": typing.Union[int, typing.Tuple[int]] = 64"},{name:"num_attention_heads",val:": typing.Union[int, typing.Tuple[int], NoneType] = None"}],parametersDescription:[{anchor:"diffusers.UNet3DConditionModel.sample_size",description:`<strong>sample_size</strong> (<code>int</code> or <code>Tuple[int, int]</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Height and width of input/output sample.`,name:"sample_size"},{anchor:"diffusers.UNet3DConditionModel.in_channels",description:"<strong>in_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 4) — The number of channels in the input sample.",name:"in_channels"},{anchor:"diffusers.UNet3DConditionModel.out_channels",description:"<strong>out_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 4) — The number of channels in the output.",name:"out_channels"},{anchor:"diffusers.UNet3DConditionModel.down_block_types",description:`<strong>down_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to <code>("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D")</code>) — | |
| The tuple of downsample blocks to use.`,name:"down_block_types"},{anchor:"diffusers.UNet3DConditionModel.up_block_types",description:`<strong>up_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to <code>("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D",)</code>) — | |
| The tuple of upsample blocks to use.`,name:"up_block_types"},{anchor:"diffusers.UNet3DConditionModel.block_out_channels",description:`<strong>block_out_channels</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to <code>(320, 640, 1280, 1280)</code>) — | |
| The tuple of output channels for each block.`,name:"block_out_channels"},{anchor:"diffusers.UNet3DConditionModel.layers_per_block",description:"<strong>layers_per_block</strong> (<code>int</code>, <em>optional</em>, defaults to 2) — The number of layers per block.",name:"layers_per_block"},{anchor:"diffusers.UNet3DConditionModel.downsample_padding",description:"<strong>downsample_padding</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — The padding to use for the downsampling convolution.",name:"downsample_padding"},{anchor:"diffusers.UNet3DConditionModel.mid_block_scale_factor",description:"<strong>mid_block_scale_factor</strong> (<code>float</code>, <em>optional</em>, defaults to 1.0) — The scale factor to use for the mid block.",name:"mid_block_scale_factor"},{anchor:"diffusers.UNet3DConditionModel.act_fn",description:"<strong>act_fn</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"silu"</code>) — The activation function to use.",name:"act_fn"},{anchor:"diffusers.UNet3DConditionModel.norm_num_groups",description:`<strong>norm_num_groups</strong> (<code>int</code>, <em>optional</em>, defaults to 32) — The number of groups to use for the normalization. | |
| If <code>None</code>, it will skip the normalization and activation layers in post-processing`,name:"norm_num_groups"},{anchor:"diffusers.UNet3DConditionModel.norm_eps",description:"<strong>norm_eps</strong> (<code>float</code>, <em>optional</em>, defaults to 1e-5) — The epsilon to use for the normalization.",name:"norm_eps"},{anchor:"diffusers.UNet3DConditionModel.cross_attention_dim",description:"<strong>cross_attention_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 1280) — The dimension of the cross attention features.",name:"cross_attention_dim"},{anchor:"diffusers.UNet3DConditionModel.attention_head_dim",description:"<strong>attention_head_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 8) — The dimension of the attention heads.",name:"attention_head_dim"},{anchor:"diffusers.UNet3DConditionModel.num_attention_heads",description:"<strong>num_attention_heads</strong> (<code>int</code>, <em>optional</em>) — The number of attention heads.",name:"num_attention_heads"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_3d_condition.py#L54"}}),hn=new y({props:{name:"forward",anchor:"diffusers.UNet3DConditionModel.forward",parameters:[{name:"sample",val:": FloatTensor"},{name:"timestep",val:": typing.Union[torch.Tensor, float, int]"},{name:"encoder_hidden_states",val:": Tensor"},{name:"class_labels",val:": typing.Optional[torch.Tensor] = None"},{name:"timestep_cond",val:": typing.Optional[torch.Tensor] = None"},{name:"attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"cross_attention_kwargs",val:": typing.Union[typing.Dict[str, typing.Any], NoneType] = None"},{name:"down_block_additional_residuals",val:": typing.Optional[typing.Tuple[torch.Tensor]] = None"},{name:"mid_block_additional_residual",val:": typing.Optional[torch.Tensor] = None"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.UNet3DConditionModel.forward.sample",description:"<strong>sample</strong> (<code>torch.FloatTensor</code>) — (batch, num_frames, channel, height, width) noisy inputs tensor",name:"sample"},{anchor:"diffusers.UNet3DConditionModel.forward.timestep",description:"<strong>timestep</strong> (<code>torch.FloatTensor</code> or <code>float</code> or <code>int</code>) — (batch) timesteps",name:"timestep"},{anchor:"diffusers.UNet3DConditionModel.forward.encoder_hidden_states",description:"<strong>encoder_hidden_states</strong> (<code>torch.FloatTensor</code>) — (batch, sequence_length, feature_dim) encoder hidden states",name:"encoder_hidden_states"},{anchor:"diffusers.UNet3DConditionModel.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>models.unet_2d_condition.UNet3DConditionOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.UNet3DConditionModel.forward.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py" rel="nofollow">diffusers.cross_attention</a>.`,name:"cross_attention_kwargs"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_3d_condition.py#L397",returnDescription:` | |
| <p><code>~models.unet_2d_condition.UNet3DConditionOutput</code> if <code>return_dict</code> is True, otherwise a <code>tuple</code>. When | |
| returning a tuple, the first element is the sample tensor.</p> | |
| `,returnType:` | |
| <p><code>~models.unet_2d_condition.UNet3DConditionOutput</code> or <code>tuple</code></p> | |
| `}}),_n=new y({props:{name:"set_attention_slice",anchor:"diffusers.UNet3DConditionModel.set_attention_slice",parameters:[{name:"slice_size",val:""}],parametersDescription:[{anchor:"diffusers.UNet3DConditionModel.set_attention_slice.slice_size",description:`<strong>slice_size</strong> (<code>str</code> or <code>int</code> or <code>list(int)</code>, <em>optional</em>, defaults to <code>"auto"</code>) — | |
| When <code>"auto"</code>, halves the input to the attention heads, so attention will be computed in two steps. If | |
| <code>"max"</code>, maximum amount of memory will be saved by running only one slice at a time. If a number is | |
| provided, uses as many slices as <code>num_attention_heads // slice_size</code>. In this case, | |
| <code>num_attention_heads</code> must be a multiple of <code>slice_size</code>.`,name:"slice_size"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_3d_condition.py#L290"}}),gn=new y({props:{name:"set_attn_processor",anchor:"diffusers.UNet3DConditionModel.set_attn_processor",parameters:[{name:"processor",val:": typing.Union[diffusers.models.attention_processor.AttnProcessor, diffusers.models.attention_processor.AttnProcessor2_0, diffusers.models.attention_processor.XFormersAttnProcessor, diffusers.models.attention_processor.SlicedAttnProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor, diffusers.models.attention_processor.SlicedAttnAddedKVProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor2_0, diffusers.models.attention_processor.XFormersAttnAddedKVProcessor, diffusers.models.attention_processor.LoRAAttnProcessor, diffusers.models.attention_processor.LoRAXFormersAttnProcessor, diffusers.models.attention_processor.LoRAAttnProcessor2_0, diffusers.models.attention_processor.LoRAAttnAddedKVProcessor, diffusers.models.attention_processor.CustomDiffusionAttnProcessor, diffusers.models.attention_processor.CustomDiffusionXFormersAttnProcessor, typing.Dict[str, typing.Union[diffusers.models.attention_processor.AttnProcessor, diffusers.models.attention_processor.AttnProcessor2_0, diffusers.models.attention_processor.XFormersAttnProcessor, diffusers.models.attention_processor.SlicedAttnProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor, diffusers.models.attention_processor.SlicedAttnAddedKVProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor2_0, diffusers.models.attention_processor.XFormersAttnAddedKVProcessor, diffusers.models.attention_processor.LoRAAttnProcessor, diffusers.models.attention_processor.LoRAXFormersAttnProcessor, diffusers.models.attention_processor.LoRAAttnProcessor2_0, diffusers.models.attention_processor.LoRAAttnAddedKVProcessor, diffusers.models.attention_processor.CustomDiffusionAttnProcessor, diffusers.models.attention_processor.CustomDiffusionXFormersAttnProcessor]]]"}],parametersDescription:[{anchor:"diffusers.UNet3DConditionModel.set_attn_processor.`processor",description:`<strong>\`processor</strong> (<code>dict</code> of <code>AttentionProcessor</code> or <code>AttentionProcessor</code>) — | |
| The instantiated processor class or a dictionary of processor classes that will be set as the processor | |
| of <strong>all</strong> <code>Attention</code> layers.`,name:"`processor"},{anchor:"diffusers.UNet3DConditionModel.set_attn_processor.In",description:"<strong>In</strong> case <code>processor</code> is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainable attention processors. —",name:"In"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_3d_condition.py#L356"}}),bn=new y({props:{name:"set_default_attn_processor",anchor:"diffusers.UNet3DConditionModel.set_default_attn_processor",parameters:[],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_3d_condition.py#L387"}}),vn=new k({}),yn=new y({props:{name:"class diffusers.models.vae.DecoderOutput",anchor:"diffusers.models.vae.DecoderOutput",parameters:[{name:"sample",val:": FloatTensor"}],parametersDescription:[{anchor:"diffusers.models.vae.DecoderOutput.sample",description:`<strong>sample</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_channels, height, width)</code>) — | |
| Decoded output sample of the model. Output of the last layer of the model.`,name:"sample"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/vae.py#L27"}}),xn=new k({}),wn=new y({props:{name:"class diffusers.models.vq_model.VQEncoderOutput",anchor:"diffusers.models.vq_model.VQEncoderOutput",parameters:[{name:"latents",val:": FloatTensor"}],parametersDescription:[{anchor:"diffusers.models.vq_model.VQEncoderOutput.latents",description:`<strong>latents</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_channels, height, width)</code>) — | |
| Encoded output sample of the model. Output of the last layer of the model.`,name:"latents"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/vq_model.py#L27"}}),$n=new k({}),Mn=new y({props:{name:"class diffusers.VQModel",anchor:"diffusers.VQModel",parameters:[{name:"in_channels",val:": int = 3"},{name:"out_channels",val:": int = 3"},{name:"down_block_types",val:": typing.Tuple[str] = ('DownEncoderBlock2D',)"},{name:"up_block_types",val:": typing.Tuple[str] = ('UpDecoderBlock2D',)"},{name:"block_out_channels",val:": typing.Tuple[int] = (64,)"},{name:"layers_per_block",val:": int = 1"},{name:"act_fn",val:": str = 'silu'"},{name:"latent_channels",val:": int = 3"},{name:"sample_size",val:": int = 32"},{name:"num_vq_embeddings",val:": int = 256"},{name:"norm_num_groups",val:": int = 32"},{name:"vq_embed_dim",val:": typing.Optional[int] = None"},{name:"scaling_factor",val:": float = 0.18215"},{name:"norm_type",val:": str = 'group'"}],parametersDescription:[{anchor:"diffusers.VQModel.in_channels",description:"<strong>in_channels</strong> (int, <em>optional</em>, defaults to 3) — Number of channels in the input image.",name:"in_channels"},{anchor:"diffusers.VQModel.out_channels",description:"<strong>out_channels</strong> (int, <em>optional</em>, defaults to 3) — Number of channels in the output.",name:"out_channels"},{anchor:"diffusers.VQModel.down_block_types",description:`<strong>down_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to — | |
| obj:<code>("DownEncoderBlock2D",)</code>): Tuple of downsample block types.`,name:"down_block_types"},{anchor:"diffusers.VQModel.up_block_types",description:`<strong>up_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to — | |
| obj:<code>("UpDecoderBlock2D",)</code>): Tuple of upsample block types.`,name:"up_block_types"},{anchor:"diffusers.VQModel.block_out_channels",description:`<strong>block_out_channels</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to — | |
| obj:<code>(64,)</code>): Tuple of block output channels.`,name:"block_out_channels"},{anchor:"diffusers.VQModel.act_fn",description:"<strong>act_fn</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"silu"</code>) — The activation function to use.",name:"act_fn"},{anchor:"diffusers.VQModel.latent_channels",description:"<strong>latent_channels</strong> (<code>int</code>, <em>optional</em>, defaults to <code>3</code>) — Number of channels in the latent space.",name:"latent_channels"},{anchor:"diffusers.VQModel.sample_size",description:"<strong>sample_size</strong> (<code>int</code>, <em>optional</em>, defaults to <code>32</code>) — TODO",name:"sample_size"},{anchor:"diffusers.VQModel.num_vq_embeddings",description:"<strong>num_vq_embeddings</strong> (<code>int</code>, <em>optional</em>, defaults to <code>256</code>) — Number of codebook vectors in the VQ-VAE.",name:"num_vq_embeddings"},{anchor:"diffusers.VQModel.vq_embed_dim",description:"<strong>vq_embed_dim</strong> (<code>int</code>, <em>optional</em>) — Hidden dim of codebook vectors in the VQ-VAE.",name:"vq_embed_dim"},{anchor:"diffusers.VQModel.scaling_factor",description:`<strong>scaling_factor</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.18215</code>) — | |
| The component-wise standard deviation of the trained latent space computed using the first batch of the | |
| training set. This is used to scale the latent space to have unit variance when training the diffusion | |
| model. The latents are scaled with the formula <code>z = z * scaling_factor</code> before being passed to the | |
| diffusion model. When decoding, the latents are scaled back to the original scale with the formula: <code>z = 1 / scaling_factor * z</code>. For more details, refer to sections 4.3.2 and D.1 of the <a href="https://arxiv.org/abs/2112.10752" rel="nofollow">High-Resolution Image | |
| Synthesis with Latent Diffusion Models</a> paper.`,name:"scaling_factor"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/vq_model.py#L39"}}),Tn=new y({props:{name:"forward",anchor:"diffusers.VQModel.forward",parameters:[{name:"sample",val:": FloatTensor"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.VQModel.forward.sample",description:"<strong>sample</strong> (<code>torch.FloatTensor</code>) — Input sample.",name:"sample"},{anchor:"diffusers.VQModel.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>DecoderOutput</code> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/vq_model.py#L144"}}),kn=new k({}),An=new y({props:{name:"class diffusers.models.autoencoder_kl.AutoencoderKLOutput",anchor:"diffusers.models.autoencoder_kl.AutoencoderKLOutput",parameters:[{name:"latent_dist",val:": DiagonalGaussianDistribution"}],parametersDescription:[{anchor:"diffusers.models.autoencoder_kl.AutoencoderKLOutput.latent_dist",description:`<strong>latent_dist</strong> (<code>DiagonalGaussianDistribution</code>) — | |
| Encoded outputs of <code>Encoder</code> represented as the mean and logvar of <code>DiagonalGaussianDistribution</code>. | |
| <code>DiagonalGaussianDistribution</code> allows for sampling latents from the distribution.`,name:"latent_dist"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/autoencoder_kl.py#L28"}}),Nn=new k({}),Un=new y({props:{name:"class diffusers.AutoencoderKL",anchor:"diffusers.AutoencoderKL",parameters:[{name:"in_channels",val:": int = 3"},{name:"out_channels",val:": int = 3"},{name:"down_block_types",val:": typing.Tuple[str] = ('DownEncoderBlock2D',)"},{name:"up_block_types",val:": typing.Tuple[str] = ('UpDecoderBlock2D',)"},{name:"block_out_channels",val:": typing.Tuple[int] = (64,)"},{name:"layers_per_block",val:": int = 1"},{name:"act_fn",val:": str = 'silu'"},{name:"latent_channels",val:": int = 4"},{name:"norm_num_groups",val:": int = 32"},{name:"sample_size",val:": int = 32"},{name:"scaling_factor",val:": float = 0.18215"}],parametersDescription:[{anchor:"diffusers.AutoencoderKL.in_channels",description:"<strong>in_channels</strong> (int, <em>optional</em>, defaults to 3) — Number of channels in the input image.",name:"in_channels"},{anchor:"diffusers.AutoencoderKL.out_channels",description:"<strong>out_channels</strong> (int, <em>optional</em>, defaults to 3) — Number of channels in the output.",name:"out_channels"},{anchor:"diffusers.AutoencoderKL.down_block_types",description:`<strong>down_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to — | |
| obj:<code>("DownEncoderBlock2D",)</code>): Tuple of downsample block types.`,name:"down_block_types"},{anchor:"diffusers.AutoencoderKL.up_block_types",description:`<strong>up_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to — | |
| obj:<code>("UpDecoderBlock2D",)</code>): Tuple of upsample block types.`,name:"up_block_types"},{anchor:"diffusers.AutoencoderKL.block_out_channels",description:`<strong>block_out_channels</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to — | |
| obj:<code>(64,)</code>): Tuple of block output channels.`,name:"block_out_channels"},{anchor:"diffusers.AutoencoderKL.act_fn",description:"<strong>act_fn</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"silu"</code>) — The activation function to use.",name:"act_fn"},{anchor:"diffusers.AutoencoderKL.latent_channels",description:"<strong>latent_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 4) — Number of channels in the latent space.",name:"latent_channels"},{anchor:"diffusers.AutoencoderKL.sample_size",description:"<strong>sample_size</strong> (<code>int</code>, <em>optional</em>, defaults to <code>32</code>) — TODO",name:"sample_size"},{anchor:"diffusers.AutoencoderKL.scaling_factor",description:`<strong>scaling_factor</strong> (<code>float</code>, <em>optional</em>, defaults to 0.18215) — | |
| The component-wise standard deviation of the trained latent space computed using the first batch of the | |
| training set. This is used to scale the latent space to have unit variance when training the diffusion | |
| model. The latents are scaled with the formula <code>z = z * scaling_factor</code> before being passed to the | |
| diffusion model. When decoding, the latents are scaled back to the original scale with the formula: <code>z = 1 / scaling_factor * z</code>. For more details, refer to sections 4.3.2 and D.1 of the <a href="https://arxiv.org/abs/2112.10752" rel="nofollow">High-Resolution Image | |
| Synthesis with Latent Diffusion Models</a> paper.`,name:"scaling_factor"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/autoencoder_kl.py#L41"}}),Cn=new y({props:{name:"disable_slicing",anchor:"diffusers.AutoencoderKL.disable_slicing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/autoencoder_kl.py#L153"}}),Pn=new y({props:{name:"disable_tiling",anchor:"diffusers.AutoencoderKL.disable_tiling",parameters:[],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/autoencoder_kl.py#L139"}}),In=new y({props:{name:"enable_slicing",anchor:"diffusers.AutoencoderKL.enable_slicing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/autoencoder_kl.py#L146"}}),On=new y({props:{name:"enable_tiling",anchor:"diffusers.AutoencoderKL.enable_tiling",parameters:[{name:"use_tiling",val:": bool = True"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/autoencoder_kl.py#L131"}}),Ln=new y({props:{name:"forward",anchor:"diffusers.AutoencoderKL.forward",parameters:[{name:"sample",val:": FloatTensor"},{name:"sample_posterior",val:": bool = False"},{name:"return_dict",val:": bool = True"},{name:"generator",val:": typing.Optional[torch._C.Generator] = None"}],parametersDescription:[{anchor:"diffusers.AutoencoderKL.forward.sample",description:"<strong>sample</strong> (<code>torch.FloatTensor</code>) — Input sample.",name:"sample"},{anchor:"diffusers.AutoencoderKL.forward.sample_posterior",description:`<strong>sample_posterior</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to sample from the posterior.`,name:"sample_posterior"},{anchor:"diffusers.AutoencoderKL.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>DecoderOutput</code> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/autoencoder_kl.py#L368"}}),Vn=new y({props:{name:"set_attn_processor",anchor:"diffusers.AutoencoderKL.set_attn_processor",parameters:[{name:"processor",val:": typing.Union[diffusers.models.attention_processor.AttnProcessor, diffusers.models.attention_processor.AttnProcessor2_0, diffusers.models.attention_processor.XFormersAttnProcessor, diffusers.models.attention_processor.SlicedAttnProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor, diffusers.models.attention_processor.SlicedAttnAddedKVProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor2_0, diffusers.models.attention_processor.XFormersAttnAddedKVProcessor, diffusers.models.attention_processor.LoRAAttnProcessor, diffusers.models.attention_processor.LoRAXFormersAttnProcessor, diffusers.models.attention_processor.LoRAAttnProcessor2_0, diffusers.models.attention_processor.LoRAAttnAddedKVProcessor, diffusers.models.attention_processor.CustomDiffusionAttnProcessor, diffusers.models.attention_processor.CustomDiffusionXFormersAttnProcessor, typing.Dict[str, typing.Union[diffusers.models.attention_processor.AttnProcessor, diffusers.models.attention_processor.AttnProcessor2_0, diffusers.models.attention_processor.XFormersAttnProcessor, diffusers.models.attention_processor.SlicedAttnProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor, diffusers.models.attention_processor.SlicedAttnAddedKVProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor2_0, diffusers.models.attention_processor.XFormersAttnAddedKVProcessor, diffusers.models.attention_processor.LoRAAttnProcessor, diffusers.models.attention_processor.LoRAXFormersAttnProcessor, diffusers.models.attention_processor.LoRAAttnProcessor2_0, diffusers.models.attention_processor.LoRAAttnAddedKVProcessor, diffusers.models.attention_processor.CustomDiffusionAttnProcessor, diffusers.models.attention_processor.CustomDiffusionXFormersAttnProcessor]]]"}],parametersDescription:[{anchor:"diffusers.AutoencoderKL.set_attn_processor.`processor",description:`<strong>\`processor</strong> (<code>dict</code> of <code>AttentionProcessor</code> or <code>AttentionProcessor</code>) — | |
| The instantiated processor class or a dictionary of processor classes that will be set as the processor | |
| of <strong>all</strong> <code>Attention</code> layers.`,name:"`processor"},{anchor:"diffusers.AutoencoderKL.set_attn_processor.In",description:"<strong>In</strong> case <code>processor</code> is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainable attention processors. —",name:"In"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/autoencoder_kl.py#L186"}}),Bn=new y({props:{name:"set_default_attn_processor",anchor:"diffusers.AutoencoderKL.set_default_attn_processor",parameters:[],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/autoencoder_kl.py#L217"}}),qn=new y({props:{name:"tiled_decode",anchor:"diffusers.AutoencoderKL.tiled_decode",parameters:[{name:"z",val:": FloatTensor"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.AutoencoderKL.tiled_decode.When",description:"<strong>When</strong> this option is enabled, the VAE will split the input tensor into tiles to compute decoding in several —",name:"When"},{anchor:"diffusers.AutoencoderKL.tiled_decode.steps.",description:"<strong>steps.</strong> This is useful to keep memory use constant regardless of image size. The end result of tiled decoding is —",name:"steps."},{anchor:"diffusers.AutoencoderKL.tiled_decode.different",description:"<strong>different</strong> from non-tiled decoding due to each tile using a different decoder. To avoid tiling artifacts, the —",name:"different"},{anchor:"diffusers.AutoencoderKL.tiled_decode.tiles",description:"<strong>tiles</strong> overlap and are blended together to form a smooth output. You may still see tile-sized changes in the —",name:"tiles"},{anchor:"diffusers.AutoencoderKL.tiled_decode.look",description:`<strong>look</strong> of the output, but they should be much less noticeable. — | |
| z (<code>torch.FloatTensor</code>): Input batch of latent vectors. return_dict (<code>bool</code>, <em>optional</em>, defaults to | |
| <code>True</code>): | |
| Whether or not to return a <code>DecoderOutput</code> instead of a plain tuple.`,name:"look"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/autoencoder_kl.py#L321"}}),Jn=new y({props:{name:"tiled_encode",anchor:"diffusers.AutoencoderKL.tiled_encode",parameters:[{name:"x",val:": FloatTensor"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.AutoencoderKL.tiled_encode.When",description:"<strong>When</strong> this option is enabled, the VAE will split the input tensor into tiles to compute encoding in several —",name:"When"},{anchor:"diffusers.AutoencoderKL.tiled_encode.steps.",description:"<strong>steps.</strong> This is useful to keep memory use constant regardless of image size. The end result of tiled encoding is —",name:"steps."},{anchor:"diffusers.AutoencoderKL.tiled_encode.different",description:"<strong>different</strong> from non-tiled encoding due to each tile using a different encoder. To avoid tiling artifacts, the —",name:"different"},{anchor:"diffusers.AutoencoderKL.tiled_encode.tiles",description:"<strong>tiles</strong> overlap and are blended together to form a smooth output. You may still see tile-sized changes in the —",name:"tiles"},{anchor:"diffusers.AutoencoderKL.tiled_encode.look",description:`<strong>look</strong> of the output, but they should be much less noticeable. — | |
| x (<code>torch.FloatTensor</code>): Input batch of images. return_dict (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>): | |
| Whether or not to return a <code>AutoencoderKLOutput</code> instead of a plain tuple.`,name:"look"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/autoencoder_kl.py#L274"}}),zn=new k({}),Wn=new y({props:{name:"class diffusers.Transformer2DModel",anchor:"diffusers.Transformer2DModel",parameters:[{name:"num_attention_heads",val:": int = 16"},{name:"attention_head_dim",val:": int = 88"},{name:"in_channels",val:": typing.Optional[int] = None"},{name:"out_channels",val:": typing.Optional[int] = None"},{name:"num_layers",val:": int = 1"},{name:"dropout",val:": float = 0.0"},{name:"norm_num_groups",val:": int = 32"},{name:"cross_attention_dim",val:": typing.Optional[int] = None"},{name:"attention_bias",val:": bool = False"},{name:"sample_size",val:": typing.Optional[int] = None"},{name:"num_vector_embeds",val:": typing.Optional[int] = None"},{name:"patch_size",val:": typing.Optional[int] = None"},{name:"activation_fn",val:": str = 'geglu'"},{name:"num_embeds_ada_norm",val:": typing.Optional[int] = None"},{name:"use_linear_projection",val:": bool = False"},{name:"only_cross_attention",val:": bool = False"},{name:"upcast_attention",val:": bool = False"},{name:"norm_type",val:": str = 'layer_norm'"},{name:"norm_elementwise_affine",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.Transformer2DModel.num_attention_heads",description:"<strong>num_attention_heads</strong> (<code>int</code>, <em>optional</em>, defaults to 16) — The number of heads to use for multi-head attention.",name:"num_attention_heads"},{anchor:"diffusers.Transformer2DModel.attention_head_dim",description:"<strong>attention_head_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 88) — The number of channels in each head.",name:"attention_head_dim"},{anchor:"diffusers.Transformer2DModel.in_channels",description:`<strong>in_channels</strong> (<code>int</code>, <em>optional</em>) — | |
| Pass if the input is continuous. The number of channels in the input and output.`,name:"in_channels"},{anchor:"diffusers.Transformer2DModel.num_layers",description:"<strong>num_layers</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — The number of layers of Transformer blocks to use.",name:"num_layers"},{anchor:"diffusers.Transformer2DModel.dropout",description:"<strong>dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — The dropout probability to use.",name:"dropout"},{anchor:"diffusers.Transformer2DModel.cross_attention_dim",description:"<strong>cross_attention_dim</strong> (<code>int</code>, <em>optional</em>) — The number of encoder_hidden_states dimensions to use.",name:"cross_attention_dim"},{anchor:"diffusers.Transformer2DModel.sample_size",description:`<strong>sample_size</strong> (<code>int</code>, <em>optional</em>) — Pass if the input is discrete. The width of the latent images. | |
| Note that this is fixed at training time as it is used for learning a number of position embeddings. See | |
| <code>ImagePositionalEmbeddings</code>.`,name:"sample_size"},{anchor:"diffusers.Transformer2DModel.num_vector_embeds",description:`<strong>num_vector_embeds</strong> (<code>int</code>, <em>optional</em>) — | |
| Pass if the input is discrete. The number of classes of the vector embeddings of the latent pixels. | |
| Includes the class for the masked latent pixel.`,name:"num_vector_embeds"},{anchor:"diffusers.Transformer2DModel.activation_fn",description:"<strong>activation_fn</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"geglu"</code>) — Activation function to be used in feed-forward.",name:"activation_fn"},{anchor:"diffusers.Transformer2DModel.num_embeds_ada_norm",description:`<strong>num_embeds_ada_norm</strong> ( <code>int</code>, <em>optional</em>) — Pass if at least one of the norm_layers is <code>AdaLayerNorm</code>. | |
| The number of diffusion steps used during training. Note that this is fixed at training time as it is used | |
| to learn a number of embeddings that are added to the hidden states. During inference, you can denoise for | |
| up to but not more than steps than <code>num_embeds_ada_norm</code>.`,name:"num_embeds_ada_norm"},{anchor:"diffusers.Transformer2DModel.attention_bias",description:`<strong>attention_bias</strong> (<code>bool</code>, <em>optional</em>) — | |
| Configure if the TransformerBlocks’ attention should contain a bias parameter.`,name:"attention_bias"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/transformer_2d.py#L41"}}),Sn=new y({props:{name:"forward",anchor:"diffusers.Transformer2DModel.forward",parameters:[{name:"hidden_states",val:": Tensor"},{name:"encoder_hidden_states",val:": typing.Optional[torch.Tensor] = None"},{name:"timestep",val:": typing.Optional[torch.LongTensor] = None"},{name:"class_labels",val:": typing.Optional[torch.LongTensor] = None"},{name:"cross_attention_kwargs",val:": typing.Dict[str, typing.Any] = None"},{name:"attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"encoder_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.Transformer2DModel.forward.hidden_states",description:`<strong>hidden_states</strong> ( When discrete, <code>torch.LongTensor</code> of shape <code>(batch size, num latent pixels)</code>. — | |
| When continuous, <code>torch.FloatTensor</code> of shape <code>(batch size, channel, height, width)</code>): Input | |
| hidden_states`,name:"hidden_states"},{anchor:"diffusers.Transformer2DModel.forward.encoder_hidden_states",description:`<strong>encoder_hidden_states</strong> ( <code>torch.FloatTensor</code> of shape <code>(batch size, sequence len, embed dims)</code>, <em>optional</em>) — | |
| Conditional embeddings for cross attention layer. If not given, cross-attention defaults to | |
| self-attention.`,name:"encoder_hidden_states"},{anchor:"diffusers.Transformer2DModel.forward.timestep",description:`<strong>timestep</strong> ( <code>torch.LongTensor</code>, <em>optional</em>) — | |
| Optional timestep to be applied as an embedding in AdaLayerNorm’s. Used to indicate denoising step.`,name:"timestep"},{anchor:"diffusers.Transformer2DModel.forward.class_labels",description:`<strong>class_labels</strong> ( <code>torch.LongTensor</code> of shape <code>(batch size, num classes)</code>, <em>optional</em>) — | |
| Optional class labels to be applied as an embedding in AdaLayerZeroNorm. Used to indicate class labels | |
| conditioning.`,name:"class_labels"},{anchor:"diffusers.Transformer2DModel.forward.encoder_attention_mask",description:`<strong>encoder_attention_mask</strong> ( <code>torch.Tensor</code>, <em>optional</em> ). — | |
| Cross-attention mask, applied to encoder_hidden_states. Two formats supported: | |
| Mask <code>(batch, sequence_length)</code> True = keep, False = discard. Bias <code>(batch, 1, sequence_length)</code> 0 | |
| = keep, -10000 = discard. | |
| If ndim == 2: will be interpreted as a mask, then converted into a bias consistent with the format | |
| above. This bias will be added to the cross-attention scores.`,name:"encoder_attention_mask"},{anchor:"diffusers.Transformer2DModel.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <a href="/docs/diffusers/main/en/api/models#diffusers.models.unet_2d_condition.UNet2DConditionOutput">models.unet_2d_condition.UNet2DConditionOutput</a> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/transformer_2d.py#L214",returnDescription:` | |
| <p><a | |
| href="/docs/diffusers/main/en/api/models#diffusers.models.transformer_2d.Transformer2DModelOutput" | |
| >Transformer2DModelOutput</a> if <code>return_dict</code> is True, otherwise a <code>tuple</code>. When | |
| returning a tuple, the first element is the sample tensor.</p> | |
| `,returnType:` | |
| <p><a | |
| href="/docs/diffusers/main/en/api/models#diffusers.models.transformer_2d.Transformer2DModelOutput" | |
| >Transformer2DModelOutput</a> or <code>tuple</code></p> | |
| `}}),Gn=new k({}),Rn=new y({props:{name:"class diffusers.models.transformer_2d.Transformer2DModelOutput",anchor:"diffusers.models.transformer_2d.Transformer2DModelOutput",parameters:[{name:"sample",val:": FloatTensor"}],parametersDescription:[{anchor:"diffusers.models.transformer_2d.Transformer2DModelOutput.sample",description:`<strong>sample</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_channels, height, width)</code> or <code>(batch size, num_vector_embeds - 1, num_latent_pixels)</code> if <a href="/docs/diffusers/main/en/api/models#diffusers.Transformer2DModel">Transformer2DModel</a> is discrete) — | |
| Hidden states conditioned on <code>encoder_hidden_states</code> input. If discrete, returns probability distributions | |
| for the unnoised latent pixels.`,name:"sample"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/transformer_2d.py#L30"}}),Zn=new k({}),Yn=new y({props:{name:"class diffusers.models.transformer_temporal.TransformerTemporalModel",anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel",parameters:[{name:"num_attention_heads",val:": int = 16"},{name:"attention_head_dim",val:": int = 88"},{name:"in_channels",val:": typing.Optional[int] = None"},{name:"out_channels",val:": typing.Optional[int] = None"},{name:"num_layers",val:": int = 1"},{name:"dropout",val:": float = 0.0"},{name:"norm_num_groups",val:": int = 32"},{name:"cross_attention_dim",val:": typing.Optional[int] = None"},{name:"attention_bias",val:": bool = False"},{name:"sample_size",val:": typing.Optional[int] = None"},{name:"activation_fn",val:": str = 'geglu'"},{name:"norm_elementwise_affine",val:": bool = True"},{name:"double_self_attention",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.num_attention_heads",description:"<strong>num_attention_heads</strong> (<code>int</code>, <em>optional</em>, defaults to 16) — The number of heads to use for multi-head attention.",name:"num_attention_heads"},{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.attention_head_dim",description:"<strong>attention_head_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 88) — The number of channels in each head.",name:"attention_head_dim"},{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.in_channels",description:`<strong>in_channels</strong> (<code>int</code>, <em>optional</em>) — | |
| Pass if the input is continuous. The number of channels in the input and output.`,name:"in_channels"},{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.num_layers",description:"<strong>num_layers</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — The number of layers of Transformer blocks to use.",name:"num_layers"},{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.dropout",description:"<strong>dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — The dropout probability to use.",name:"dropout"},{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.cross_attention_dim",description:"<strong>cross_attention_dim</strong> (<code>int</code>, <em>optional</em>) — The number of encoder_hidden_states dimensions to use.",name:"cross_attention_dim"},{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.sample_size",description:`<strong>sample_size</strong> (<code>int</code>, <em>optional</em>) — Pass if the input is discrete. The width of the latent images. | |
| Note that this is fixed at training time as it is used for learning a number of position embeddings. See | |
| <code>ImagePositionalEmbeddings</code>.`,name:"sample_size"},{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.activation_fn",description:"<strong>activation_fn</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"geglu"</code>) — Activation function to be used in feed-forward.",name:"activation_fn"},{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.attention_bias",description:`<strong>attention_bias</strong> (<code>bool</code>, <em>optional</em>) — | |
| Configure if the TransformerBlocks’ attention should contain a bias parameter.`,name:"attention_bias"},{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.double_self_attention",description:`<strong>double_self_attention</strong> (<code>bool</code>, <em>optional</em>) — | |
| Configure if each TransformerBlock should contain two self-attention layers`,name:"double_self_attention"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/transformer_temporal.py#L37"}}),Qn=new y({props:{name:"forward",anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.forward",parameters:[{name:"hidden_states",val:""},{name:"encoder_hidden_states",val:" = None"},{name:"timestep",val:" = None"},{name:"class_labels",val:" = None"},{name:"num_frames",val:" = 1"},{name:"cross_attention_kwargs",val:" = None"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.forward.hidden_states",description:`<strong>hidden_states</strong> ( When discrete, <code>torch.LongTensor</code> of shape <code>(batch size, num latent pixels)</code>. — | |
| When continous, <code>torch.FloatTensor</code> of shape <code>(batch size, channel, height, width)</code>): Input | |
| hidden_states`,name:"hidden_states"},{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.forward.encoder_hidden_states",description:`<strong>encoder_hidden_states</strong> ( <code>torch.LongTensor</code> of shape <code>(batch size, encoder_hidden_states dim)</code>, <em>optional</em>) — | |
| Conditional embeddings for cross attention layer. If not given, cross-attention defaults to | |
| self-attention.`,name:"encoder_hidden_states"},{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.forward.timestep",description:`<strong>timestep</strong> ( <code>torch.long</code>, <em>optional</em>) — | |
| Optional timestep to be applied as an embedding in AdaLayerNorm’s. Used to indicate denoising step.`,name:"timestep"},{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.forward.class_labels",description:`<strong>class_labels</strong> ( <code>torch.LongTensor</code> of shape <code>(batch size, num classes)</code>, <em>optional</em>) — | |
| Optional class labels to be applied as an embedding in AdaLayerZeroNorm. Used to indicate class labels | |
| conditioning.`,name:"class_labels"},{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModel.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <a href="/docs/diffusers/main/en/api/models#diffusers.models.unet_2d_condition.UNet2DConditionOutput">models.unet_2d_condition.UNet2DConditionOutput</a> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/transformer_temporal.py#L106",returnDescription:` | |
| <p><code>~models.transformer_2d.TransformerTemporalModelOutput</code> if <code>return_dict</code> is True, otherwise a <code>tuple</code>. | |
| When returning a tuple, the first element is the sample tensor.</p> | |
| `,returnType:` | |
| <p><code>~models.transformer_2d.TransformerTemporalModelOutput</code> or <code>tuple</code></p> | |
| `}}),Hn=new k({}),os=new y({props:{name:"class diffusers.models.transformer_temporal.TransformerTemporalModelOutput",anchor:"diffusers.models.transformer_temporal.TransformerTemporalModelOutput",parameters:[{name:"sample",val:": FloatTensor"}],parametersDescription:[{anchor:"diffusers.models.transformer_temporal.TransformerTemporalModelOutput.sample",description:`<strong>sample</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size x num_frames, num_channels, height, width)</code>) — | |
| Hidden states conditioned on <code>encoder_hidden_states</code> input.`,name:"sample"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/transformer_temporal.py#L27"}}),ts=new k({}),ns=new y({props:{name:"class diffusers.PriorTransformer",anchor:"diffusers.PriorTransformer",parameters:[{name:"num_attention_heads",val:": int = 32"},{name:"attention_head_dim",val:": int = 64"},{name:"num_layers",val:": int = 20"},{name:"embedding_dim",val:": int = 768"},{name:"num_embeddings",val:" = 77"},{name:"additional_embeddings",val:" = 4"},{name:"dropout",val:": float = 0.0"}],parametersDescription:[{anchor:"diffusers.PriorTransformer.num_attention_heads",description:"<strong>num_attention_heads</strong> (<code>int</code>, <em>optional</em>, defaults to 32) — The number of heads to use for multi-head attention.",name:"num_attention_heads"},{anchor:"diffusers.PriorTransformer.attention_head_dim",description:"<strong>attention_head_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 64) — The number of channels in each head.",name:"attention_head_dim"},{anchor:"diffusers.PriorTransformer.num_layers",description:"<strong>num_layers</strong> (<code>int</code>, <em>optional</em>, defaults to 20) — The number of layers of Transformer blocks to use.",name:"num_layers"},{anchor:"diffusers.PriorTransformer.embedding_dim",description:`<strong>embedding_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 768) — The dimension of the CLIP embeddings. Note that CLIP | |
| image embeddings and text embeddings are both the same dimension.`,name:"embedding_dim"},{anchor:"diffusers.PriorTransformer.num_embeddings",description:`<strong>num_embeddings</strong> (<code>int</code>, <em>optional</em>, defaults to 77) — The max number of clip embeddings allowed. I.e. the | |
| length of the prompt after it has been tokenized.`,name:"num_embeddings"},{anchor:"diffusers.PriorTransformer.additional_embeddings",description:`<strong>additional_embeddings</strong> (<code>int</code>, <em>optional</em>, defaults to 4) — The number of additional tokens appended to the | |
| projected hidden_states. The actual length of the used hidden_states is <code>num_embeddings + additional_embeddings</code>.`,name:"additional_embeddings"},{anchor:"diffusers.PriorTransformer.dropout",description:"<strong>dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — The dropout probability to use.",name:"dropout"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/prior_transformer.py#L27"}}),as=new y({props:{name:"forward",anchor:"diffusers.PriorTransformer.forward",parameters:[{name:"hidden_states",val:""},{name:"timestep",val:": typing.Union[torch.Tensor, float, int]"},{name:"proj_embedding",val:": FloatTensor"},{name:"encoder_hidden_states",val:": FloatTensor"},{name:"attention_mask",val:": typing.Optional[torch.BoolTensor] = None"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.PriorTransformer.forward.hidden_states",description:`<strong>hidden_states</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, embedding_dim)</code>) — | |
| x_t, the currently predicted image embeddings.`,name:"hidden_states"},{anchor:"diffusers.PriorTransformer.forward.timestep",description:`<strong>timestep</strong> (<code>torch.long</code>) — | |
| Current denoising step.`,name:"timestep"},{anchor:"diffusers.PriorTransformer.forward.proj_embedding",description:`<strong>proj_embedding</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, embedding_dim)</code>) — | |
| Projected embedding vector the denoising process is conditioned on.`,name:"proj_embedding"},{anchor:"diffusers.PriorTransformer.forward.encoder_hidden_states",description:`<strong>encoder_hidden_states</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_embeddings, embedding_dim)</code>) — | |
| Hidden states of the text embeddings the denoising process is conditioned on.`,name:"encoder_hidden_states"},{anchor:"diffusers.PriorTransformer.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.BoolTensor</code> of shape <code>(batch_size, num_embeddings)</code>) — | |
| Text mask for the text embeddings.`,name:"attention_mask"},{anchor:"diffusers.PriorTransformer.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <a href="/docs/diffusers/main/en/api/models#diffusers.models.prior_transformer.PriorTransformerOutput">models.prior_transformer.PriorTransformerOutput</a> instead of a plain | |
| tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/prior_transformer.py#L171",returnDescription:` | |
| <p><a | |
| href="/docs/diffusers/main/en/api/models#diffusers.models.prior_transformer.PriorTransformerOutput" | |
| >PriorTransformerOutput</a> if <code>return_dict</code> is True, otherwise a <code>tuple</code>. When | |
| returning a tuple, the first element is the sample tensor.</p> | |
| `,returnType:` | |
| <p><a | |
| href="/docs/diffusers/main/en/api/models#diffusers.models.prior_transformer.PriorTransformerOutput" | |
| >PriorTransformerOutput</a> or <code>tuple</code></p> | |
| `}}),is=new y({props:{name:"set_attn_processor",anchor:"diffusers.PriorTransformer.set_attn_processor",parameters:[{name:"processor",val:": typing.Union[diffusers.models.attention_processor.AttnProcessor, diffusers.models.attention_processor.AttnProcessor2_0, diffusers.models.attention_processor.XFormersAttnProcessor, diffusers.models.attention_processor.SlicedAttnProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor, diffusers.models.attention_processor.SlicedAttnAddedKVProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor2_0, diffusers.models.attention_processor.XFormersAttnAddedKVProcessor, diffusers.models.attention_processor.LoRAAttnProcessor, diffusers.models.attention_processor.LoRAXFormersAttnProcessor, diffusers.models.attention_processor.LoRAAttnProcessor2_0, diffusers.models.attention_processor.LoRAAttnAddedKVProcessor, diffusers.models.attention_processor.CustomDiffusionAttnProcessor, diffusers.models.attention_processor.CustomDiffusionXFormersAttnProcessor, typing.Dict[str, typing.Union[diffusers.models.attention_processor.AttnProcessor, diffusers.models.attention_processor.AttnProcessor2_0, diffusers.models.attention_processor.XFormersAttnProcessor, diffusers.models.attention_processor.SlicedAttnProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor, diffusers.models.attention_processor.SlicedAttnAddedKVProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor2_0, diffusers.models.attention_processor.XFormersAttnAddedKVProcessor, diffusers.models.attention_processor.LoRAAttnProcessor, diffusers.models.attention_processor.LoRAXFormersAttnProcessor, diffusers.models.attention_processor.LoRAAttnProcessor2_0, diffusers.models.attention_processor.LoRAAttnAddedKVProcessor, diffusers.models.attention_processor.CustomDiffusionAttnProcessor, diffusers.models.attention_processor.CustomDiffusionXFormersAttnProcessor]]]"}],parametersDescription:[{anchor:"diffusers.PriorTransformer.set_attn_processor.`processor",description:`<strong>\`processor</strong> (<code>dict</code> of <code>AttentionProcessor</code> or <code>AttentionProcessor</code>) — | |
| The instantiated processor class or a dictionary of processor classes that will be set as the processor | |
| of <strong>all</strong> <code>Attention</code> layers.`,name:"`processor"},{anchor:"diffusers.PriorTransformer.set_attn_processor.In",description:"<strong>In</strong> case <code>processor</code> is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainable attention processors. —",name:"In"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/prior_transformer.py#L134"}}),ds=new y({props:{name:"set_default_attn_processor",anchor:"diffusers.PriorTransformer.set_default_attn_processor",parameters:[],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/prior_transformer.py#L165"}}),ls=new k({}),ms=new y({props:{name:"class diffusers.models.prior_transformer.PriorTransformerOutput",anchor:"diffusers.models.prior_transformer.PriorTransformerOutput",parameters:[{name:"predicted_image_embedding",val:": FloatTensor"}],parametersDescription:[{anchor:"diffusers.models.prior_transformer.PriorTransformerOutput.predicted_image_embedding",description:`<strong>predicted_image_embedding</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, embedding_dim)</code>) — | |
| The predicted CLIP image embedding conditioned on the CLIP text embedding input.`,name:"predicted_image_embedding"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/prior_transformer.py#L17"}}),ps=new k({}),us=new y({props:{name:"class diffusers.models.controlnet.ControlNetOutput",anchor:"diffusers.models.controlnet.ControlNetOutput",parameters:[{name:"down_block_res_samples",val:": typing.Tuple[torch.Tensor]"},{name:"mid_block_res_sample",val:": Tensor"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/controlnet.py#L39"}}),hs=new k({}),_s=new y({props:{name:"class diffusers.ControlNetModel",anchor:"diffusers.ControlNetModel",parameters:[{name:"in_channels",val:": int = 4"},{name:"conditioning_channels",val:": int = 3"},{name:"flip_sin_to_cos",val:": bool = True"},{name:"freq_shift",val:": int = 0"},{name:"down_block_types",val:": typing.Tuple[str] = ('CrossAttnDownBlock2D', 'CrossAttnDownBlock2D', 'CrossAttnDownBlock2D', 'DownBlock2D')"},{name:"only_cross_attention",val:": typing.Union[bool, typing.Tuple[bool]] = False"},{name:"block_out_channels",val:": typing.Tuple[int] = (320, 640, 1280, 1280)"},{name:"layers_per_block",val:": int = 2"},{name:"downsample_padding",val:": int = 1"},{name:"mid_block_scale_factor",val:": float = 1"},{name:"act_fn",val:": str = 'silu'"},{name:"norm_num_groups",val:": typing.Optional[int] = 32"},{name:"norm_eps",val:": float = 1e-05"},{name:"cross_attention_dim",val:": int = 1280"},{name:"attention_head_dim",val:": typing.Union[int, typing.Tuple[int]] = 8"},{name:"num_attention_heads",val:": typing.Union[int, typing.Tuple[int], NoneType] = None"},{name:"use_linear_projection",val:": bool = False"},{name:"class_embed_type",val:": typing.Optional[str] = None"},{name:"num_class_embeds",val:": typing.Optional[int] = None"},{name:"upcast_attention",val:": bool = False"},{name:"resnet_time_scale_shift",val:": str = 'default'"},{name:"projection_class_embeddings_input_dim",val:": typing.Optional[int] = None"},{name:"controlnet_conditioning_channel_order",val:": str = 'rgb'"},{name:"conditioning_embedding_out_channels",val:": typing.Optional[typing.Tuple[int]] = (16, 32, 96, 256)"},{name:"global_pool_conditions",val:": bool = False"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/controlnet.py#L89"}}),gs=new y({props:{name:"from_unet",anchor:"diffusers.ControlNetModel.from_unet",parameters:[{name:"unet",val:": UNet2DConditionModel"},{name:"controlnet_conditioning_channel_order",val:": str = 'rgb'"},{name:"conditioning_embedding_out_channels",val:": typing.Optional[typing.Tuple[int]] = (16, 32, 96, 256)"},{name:"load_weights_from_unet",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.ControlNetModel.from_unet.unet",description:`<strong>unet</strong> (<code>UNet2DConditionModel</code>) — | |
| UNet model which weights are copied to the ControlNet. Note that all configuration options are also | |
| copied where applicable.`,name:"unet"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/controlnet.py#L277"}}),bs=new y({props:{name:"set_attention_slice",anchor:"diffusers.ControlNetModel.set_attention_slice",parameters:[{name:"slice_size",val:""}],parametersDescription:[{anchor:"diffusers.ControlNetModel.set_attention_slice.slice_size",description:`<strong>slice_size</strong> (<code>str</code> or <code>int</code> or <code>list(int)</code>, <em>optional</em>, defaults to <code>"auto"</code>) — | |
| When <code>"auto"</code>, halves the input to the attention heads, so attention will be computed in two steps. If | |
| <code>"max"</code>, maximum amount of memory will be saved by running only one slice at a time. If a number is | |
| provided, uses as many slices as <code>num_attention_heads // slice_size</code>. In this case, | |
| <code>num_attention_heads</code> must be a multiple of <code>slice_size</code>.`,name:"slice_size"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/controlnet.py#L396"}}),vs=new y({props:{name:"set_attn_processor",anchor:"diffusers.ControlNetModel.set_attn_processor",parameters:[{name:"processor",val:": typing.Union[diffusers.models.attention_processor.AttnProcessor, diffusers.models.attention_processor.AttnProcessor2_0, diffusers.models.attention_processor.XFormersAttnProcessor, diffusers.models.attention_processor.SlicedAttnProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor, diffusers.models.attention_processor.SlicedAttnAddedKVProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor2_0, diffusers.models.attention_processor.XFormersAttnAddedKVProcessor, diffusers.models.attention_processor.LoRAAttnProcessor, diffusers.models.attention_processor.LoRAXFormersAttnProcessor, diffusers.models.attention_processor.LoRAAttnProcessor2_0, diffusers.models.attention_processor.LoRAAttnAddedKVProcessor, diffusers.models.attention_processor.CustomDiffusionAttnProcessor, diffusers.models.attention_processor.CustomDiffusionXFormersAttnProcessor, typing.Dict[str, typing.Union[diffusers.models.attention_processor.AttnProcessor, diffusers.models.attention_processor.AttnProcessor2_0, diffusers.models.attention_processor.XFormersAttnProcessor, diffusers.models.attention_processor.SlicedAttnProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor, diffusers.models.attention_processor.SlicedAttnAddedKVProcessor, diffusers.models.attention_processor.AttnAddedKVProcessor2_0, diffusers.models.attention_processor.XFormersAttnAddedKVProcessor, diffusers.models.attention_processor.LoRAAttnProcessor, diffusers.models.attention_processor.LoRAXFormersAttnProcessor, diffusers.models.attention_processor.LoRAAttnProcessor2_0, diffusers.models.attention_processor.LoRAAttnAddedKVProcessor, diffusers.models.attention_processor.CustomDiffusionAttnProcessor, diffusers.models.attention_processor.CustomDiffusionXFormersAttnProcessor]]]"}],parametersDescription:[{anchor:"diffusers.ControlNetModel.set_attn_processor.`processor",description:`<strong>\`processor</strong> (<code>dict</code> of <code>AttentionProcessor</code> or <code>AttentionProcessor</code>) — | |
| The instantiated processor class or a dictionary of processor classes that will be set as the processor | |
| of <strong>all</strong> <code>Attention</code> layers.`,name:"`processor"},{anchor:"diffusers.ControlNetModel.set_attn_processor.In",description:"<strong>In</strong> case <code>processor</code> is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainable attention processors. —",name:"In"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/controlnet.py#L358"}}),ys=new y({props:{name:"set_default_attn_processor",anchor:"diffusers.ControlNetModel.set_default_attn_processor",parameters:[],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/controlnet.py#L389"}}),xs=new k({}),ws=new y({props:{name:"class diffusers.FlaxModelMixin",anchor:"diffusers.FlaxModelMixin",parameters:[],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/modeling_flax_utils.py#L45"}}),$s=new y({props:{name:"from_pretrained",anchor:"diffusers.FlaxModelMixin.from_pretrained",parameters:[{name:"pretrained_model_name_or_path",val:": typing.Union[str, os.PathLike]"},{name:"dtype",val:": dtype = <class 'jax.numpy.float32'>"},{name:"*model_args",val:""},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"diffusers.FlaxModelMixin.from_pretrained.pretrained_model_name_or_path",description:`<strong>pretrained_model_name_or_path</strong> (<code>str</code> or <code>os.PathLike</code>) — | |
| Can be either:</p> | |
| <ul> | |
| <li>A string, the <em>model id</em> of a pretrained model hosted inside a model repo on huggingface.co. | |
| Valid model ids are namespaced under a user or organization name, like | |
| <code>runwayml/stable-diffusion-v1-5</code>.</li> | |
| <li>A path to a <em>directory</em> containing model weights saved using <a href="/docs/diffusers/main/en/api/models#diffusers.ModelMixin.save_pretrained">save_pretrained()</a>, | |
| e.g., <code>./my_model_directory/</code>.</li> | |
| </ul>`,name:"pretrained_model_name_or_path"},{anchor:"diffusers.FlaxModelMixin.from_pretrained.dtype",description:`<strong>dtype</strong> (<code>jax.numpy.dtype</code>, <em>optional</em>, defaults to <code>jax.numpy.float32</code>) — | |
| The data type of the computation. Can be one of <code>jax.numpy.float32</code>, <code>jax.numpy.float16</code> (on GPUs) and | |
| <code>jax.numpy.bfloat16</code> (on TPUs).</p> | |
| <p>This can be used to enable mixed-precision training or half-precision inference on GPUs or TPUs. If | |
| specified all the computation will be performed with the given <code>dtype</code>.</p> | |
| <p><strong>Note that this only specifies the dtype of the computation and does not influence the dtype of model | |
| parameters.</strong></p> | |
| <p>If you wish to change the dtype of the model parameters, see <code>~ModelMixin.to_fp16</code> and | |
| <code>~ModelMixin.to_bf16</code>.`,name:"dtype"},{anchor:"diffusers.FlaxModelMixin.from_pretrained.model_args",description:`<strong>model_args</strong> (sequence of positional arguments, <em>optional</em>) — | |
| All remaining positional arguments will be passed to the underlying model’s <code>__init__</code> method.`,name:"model_args"},{anchor:"diffusers.FlaxModelMixin.from_pretrained.cache_dir",description:`<strong>cache_dir</strong> (<code>Union[str, os.PathLike]</code>, <em>optional</em>) — | |
| Path to a directory in which a downloaded pretrained model configuration should be cached if the | |
| standard cache should not be used.`,name:"cache_dir"},{anchor:"diffusers.FlaxModelMixin.from_pretrained.force_download",description:`<strong>force_download</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to force the (re-)download of the model weights and configuration files, overriding the | |
| cached versions if they exist.`,name:"force_download"},{anchor:"diffusers.FlaxModelMixin.from_pretrained.resume_download",description:`<strong>resume_download</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to delete incompletely received files. Will attempt to resume the download if such a | |
| file exists.`,name:"resume_download"},{anchor:"diffusers.FlaxModelMixin.from_pretrained.proxies",description:`<strong>proxies</strong> (<code>Dict[str, str]</code>, <em>optional</em>) — | |
| A dictionary of proxy servers to use by protocol or endpoint, e.g., <code>{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}</code>. The proxies are used on each request.`,name:"proxies"},{anchor:"diffusers.FlaxModelMixin.from_pretrained.local_files_only(bool,",description:`<strong>local_files_only(<code>bool</code>,</strong> <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to only look at local files (i.e., do not try to download the model).`,name:"local_files_only(bool,"},{anchor:"diffusers.FlaxModelMixin.from_pretrained.revision",description:`<strong>revision</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"main"</code>) — | |
| The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a | |
| git-based system for storing models and other artifacts on huggingface.co, so <code>revision</code> can be any | |
| identifier allowed by git.`,name:"revision"},{anchor:"diffusers.FlaxModelMixin.from_pretrained.from_pt",description:`<strong>from_pt</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Load the model weights from a PyTorch checkpoint save file.`,name:"from_pt"},{anchor:"diffusers.FlaxModelMixin.from_pretrained.kwargs",description:`<strong>kwargs</strong> (remaining dictionary of keyword arguments, <em>optional</em>) — | |
| Can be used to update the configuration object (after it being loaded) and initiate the model (e.g., | |
| <code>output_attentions=True</code>). Behaves differently depending on whether a <code>config</code> is provided or | |
| automatically loaded:</p> | |
| <ul> | |
| <li>If a configuration is provided with <code>config</code>, <code>**kwargs</code> will be directly passed to the | |
| underlying model’s <code>__init__</code> method (we assume all relevant updates to the configuration have | |
| already been done)</li> | |
| <li>If a configuration is not provided, <code>kwargs</code> will be first passed to the configuration class | |
| initialization function (<a href="/docs/diffusers/main/en/api/configuration#diffusers.ConfigMixin.from_config">from_config()</a>). Each key of <code>kwargs</code> that corresponds to | |
| a configuration attribute will be used to override said attribute with the supplied <code>kwargs</code> | |
| value. Remaining keys that do not correspond to any configuration attribute will be passed to the | |
| underlying model’s <code>__init__</code> function.</li> | |
| </ul>`,name:"kwargs"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/modeling_flax_utils.py#L195"}}),tt=new nl({props:{anchor:"diffusers.FlaxModelMixin.from_pretrained.example",$$slots:{default:[Ux]},$$scope:{ctx:B}}}),Ts=new y({props:{name:"save_pretrained",anchor:"diffusers.FlaxModelMixin.save_pretrained",parameters:[{name:"save_directory",val:": typing.Union[str, os.PathLike]"},{name:"params",val:": typing.Union[typing.Dict, flax.core.frozen_dict.FrozenDict]"},{name:"is_main_process",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.FlaxModelMixin.save_pretrained.save_directory",description:`<strong>save_directory</strong> (<code>str</code> or <code>os.PathLike</code>) — | |
| Directory to which to save. Will be created if it doesn’t exist.`,name:"save_directory"},{anchor:"diffusers.FlaxModelMixin.save_pretrained.params",description:`<strong>params</strong> (<code>Union[Dict, FrozenDict]</code>) — | |
| A <code>PyTree</code> of model parameters.`,name:"params"},{anchor:"diffusers.FlaxModelMixin.save_pretrained.is_main_process",description:`<strong>is_main_process</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether the process calling this is the main process or not. Useful when in distributed training like | |
| TPUs and need to call this function on all processes. In this case, set <code>is_main_process=True</code> only on | |
| the main process to avoid race conditions.`,name:"is_main_process"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/modeling_flax_utils.py#L487"}}),As=new y({props:{name:"to_bf16",anchor:"diffusers.FlaxModelMixin.to_bf16",parameters:[{name:"params",val:": typing.Union[typing.Dict, flax.core.frozen_dict.FrozenDict]"},{name:"mask",val:": typing.Any = None"}],parametersDescription:[{anchor:"diffusers.FlaxModelMixin.to_bf16.params",description:`<strong>params</strong> (<code>Union[Dict, FrozenDict]</code>) — | |
| A <code>PyTree</code> of model parameters.`,name:"params"},{anchor:"diffusers.FlaxModelMixin.to_bf16.mask",description:`<strong>mask</strong> (<code>Union[Dict, FrozenDict]</code>) — | |
| A <code>PyTree</code> with same structure as the <code>params</code> tree. The leaves should be booleans, <code>True</code> for params | |
| you want to cast, and should be <code>False</code> for those you want to skip.`,name:"mask"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/modeling_flax_utils.py#L87"}}),st=new nl({props:{anchor:"diffusers.FlaxModelMixin.to_bf16.example",$$slots:{default:[Ex]},$$scope:{ctx:B}}}),Ns=new y({props:{name:"to_fp16",anchor:"diffusers.FlaxModelMixin.to_fp16",parameters:[{name:"params",val:": typing.Union[typing.Dict, flax.core.frozen_dict.FrozenDict]"},{name:"mask",val:": typing.Any = None"}],parametersDescription:[{anchor:"diffusers.FlaxModelMixin.to_fp16.params",description:`<strong>params</strong> (<code>Union[Dict, FrozenDict]</code>) — | |
| A <code>PyTree</code> of model parameters.`,name:"params"},{anchor:"diffusers.FlaxModelMixin.to_fp16.mask",description:`<strong>mask</strong> (<code>Union[Dict, FrozenDict]</code>) — | |
| A <code>PyTree</code> with same structure as the <code>params</code> tree. The leaves should be booleans, <code>True</code> for params | |
| you want to cast, and should be <code>False</code> for those you want to skip`,name:"mask"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/modeling_flax_utils.py#L153"}}),rt=new nl({props:{anchor:"diffusers.FlaxModelMixin.to_fp16.example",$$slots:{default:[Cx]},$$scope:{ctx:B}}}),Us=new y({props:{name:"to_fp32",anchor:"diffusers.FlaxModelMixin.to_fp32",parameters:[{name:"params",val:": typing.Union[typing.Dict, flax.core.frozen_dict.FrozenDict]"},{name:"mask",val:": typing.Any = None"}],parametersDescription:[{anchor:"diffusers.FlaxModelMixin.to_fp32.params",description:`<strong>params</strong> (<code>Union[Dict, FrozenDict]</code>) — | |
| A <code>PyTree</code> of model parameters.`,name:"params"},{anchor:"diffusers.FlaxModelMixin.to_fp32.mask",description:`<strong>mask</strong> (<code>Union[Dict, FrozenDict]</code>) — | |
| A <code>PyTree</code> with same structure as the <code>params</code> tree. The leaves should be booleans, <code>True</code> for params | |
| you want to cast, and should be <code>False</code> for those you want to skip`,name:"mask"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/modeling_flax_utils.py#L126"}}),at=new nl({props:{anchor:"diffusers.FlaxModelMixin.to_fp32.example",$$slots:{default:[Fx]},$$scope:{ctx:B}}}),Es=new k({}),Cs=new y({props:{name:"class diffusers.models.unet_2d_condition_flax.FlaxUNet2DConditionOutput",anchor:"diffusers.models.unet_2d_condition_flax.FlaxUNet2DConditionOutput",parameters:[{name:"sample",val:": ndarray"}],parametersDescription:[{anchor:"diffusers.models.unet_2d_condition_flax.FlaxUNet2DConditionOutput.sample",description:`<strong>sample</strong> (<code>jnp.ndarray</code> of shape <code>(batch_size, num_channels, height, width)</code>) — | |
| Hidden states conditioned on <code>encoder_hidden_states</code> input. Output of last layer of model.`,name:"sample"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_2d_condition_flax.py#L36"}}),Fs=new y({props:{name:"replace",anchor:"diffusers.models.unet_2d_condition_flax.FlaxUNet2DConditionOutput.replace",parameters:[{name:"**updates",val:""}],source:"https://github.com/huggingface/diffusers/blob/main/src/flax/struct.py#L108"}}),Ps=new k({}),js=new y({props:{name:"class diffusers.FlaxUNet2DConditionModel",anchor:"diffusers.FlaxUNet2DConditionModel",parameters:[{name:"sample_size",val:": int = 32"},{name:"in_channels",val:": int = 4"},{name:"out_channels",val:": int = 4"},{name:"down_block_types",val:": typing.Tuple[str] = ('CrossAttnDownBlock2D', 'CrossAttnDownBlock2D', 'CrossAttnDownBlock2D', 'DownBlock2D')"},{name:"up_block_types",val:": typing.Tuple[str] = ('UpBlock2D', 'CrossAttnUpBlock2D', 'CrossAttnUpBlock2D', 'CrossAttnUpBlock2D')"},{name:"only_cross_attention",val:": typing.Union[bool, typing.Tuple[bool]] = False"},{name:"block_out_channels",val:": typing.Tuple[int] = (320, 640, 1280, 1280)"},{name:"layers_per_block",val:": int = 2"},{name:"attention_head_dim",val:": typing.Union[int, typing.Tuple[int]] = 8"},{name:"num_attention_heads",val:": typing.Union[int, typing.Tuple[int], NoneType] = None"},{name:"cross_attention_dim",val:": int = 1280"},{name:"dropout",val:": float = 0.0"},{name:"use_linear_projection",val:": bool = False"},{name:"dtype",val:": dtype = <class 'jax.numpy.float32'>"},{name:"flip_sin_to_cos",val:": bool = True"},{name:"freq_shift",val:": int = 0"},{name:"use_memory_efficient_attention",val:": bool = False"},{name:"parent",val:": typing.Union[typing.Type[flax.linen.module.Module], typing.Type[flax.core.scope.Scope], typing.Type[flax.linen.module._Sentinel], NoneType] = <flax.linen.module._Sentinel object at 0x7fa57076a310>"},{name:"name",val:": str = None"}],parametersDescription:[{anchor:"diffusers.FlaxUNet2DConditionModel.sample_size",description:`<strong>sample_size</strong> (<code>int</code>, <em>optional</em>) — | |
| The size of the input sample.`,name:"sample_size"},{anchor:"diffusers.FlaxUNet2DConditionModel.in_channels",description:`<strong>in_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 4) — | |
| The number of channels in the input sample.`,name:"in_channels"},{anchor:"diffusers.FlaxUNet2DConditionModel.out_channels",description:`<strong>out_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 4) — | |
| The number of channels in the output.`,name:"out_channels"},{anchor:"diffusers.FlaxUNet2DConditionModel.down_block_types",description:`<strong>down_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to <code>("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D")</code>) — | |
| The tuple of downsample blocks to use. The corresponding class names will be: “FlaxCrossAttnDownBlock2D”, | |
| “FlaxCrossAttnDownBlock2D”, “FlaxCrossAttnDownBlock2D”, “FlaxDownBlock2D”`,name:"down_block_types"},{anchor:"diffusers.FlaxUNet2DConditionModel.up_block_types",description:`<strong>up_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to <code>("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D",)</code>) — | |
| The tuple of upsample blocks to use. The corresponding class names will be: “FlaxUpBlock2D”, | |
| “FlaxCrossAttnUpBlock2D”, “FlaxCrossAttnUpBlock2D”, “FlaxCrossAttnUpBlock2D”`,name:"up_block_types"},{anchor:"diffusers.FlaxUNet2DConditionModel.block_out_channels",description:`<strong>block_out_channels</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to <code>(320, 640, 1280, 1280)</code>) — | |
| The tuple of output channels for each block.`,name:"block_out_channels"},{anchor:"diffusers.FlaxUNet2DConditionModel.layers_per_block",description:`<strong>layers_per_block</strong> (<code>int</code>, <em>optional</em>, defaults to 2) — | |
| The number of layers per block.`,name:"layers_per_block"},{anchor:"diffusers.FlaxUNet2DConditionModel.attention_head_dim",description:`<strong>attention_head_dim</strong> (<code>int</code> or <code>Tuple[int]</code>, <em>optional</em>, defaults to 8) — | |
| The dimension of the attention heads.`,name:"attention_head_dim"},{anchor:"diffusers.FlaxUNet2DConditionModel.num_attention_heads",description:`<strong>num_attention_heads</strong> (<code>int</code> or <code>Tuple[int]</code>, <em>optional</em>) — | |
| The number of attention heads.`,name:"num_attention_heads"},{anchor:"diffusers.FlaxUNet2DConditionModel.cross_attention_dim",description:`<strong>cross_attention_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 768) — | |
| The dimension of the cross attention features.`,name:"cross_attention_dim"},{anchor:"diffusers.FlaxUNet2DConditionModel.dropout",description:`<strong>dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0) — | |
| Dropout probability for down, up and bottleneck blocks.`,name:"dropout"},{anchor:"diffusers.FlaxUNet2DConditionModel.flip_sin_to_cos",description:`<strong>flip_sin_to_cos</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to flip the sin to cos in the time embedding.`,name:"flip_sin_to_cos"},{anchor:"diffusers.FlaxUNet2DConditionModel.freq_shift",description:"<strong>freq_shift</strong> (<code>int</code>, <em>optional</em>, defaults to 0) — The frequency shift to apply to the time embedding.",name:"freq_shift"},{anchor:"diffusers.FlaxUNet2DConditionModel.use_memory_efficient_attention",description:`<strong>use_memory_efficient_attention</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| enable memory efficient attention <a href="https://arxiv.org/abs/2112.05682" rel="nofollow">https://arxiv.org/abs/2112.05682</a>`,name:"use_memory_efficient_attention"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_2d_condition_flax.py#L47"}}),zs=new k({}),Ws=new y({props:{name:"class diffusers.models.vae_flax.FlaxDecoderOutput",anchor:"diffusers.models.vae_flax.FlaxDecoderOutput",parameters:[{name:"sample",val:": ndarray"}],parametersDescription:[{anchor:"diffusers.models.vae_flax.FlaxDecoderOutput.sample",description:`<strong>sample</strong> (<em>jnp.ndarray</em> of shape <em>(batch_size, num_channels, height, width)</em>) — | |
| Decoded output sample of the model. Output of the last layer of the model.`,name:"sample"},{anchor:"diffusers.models.vae_flax.FlaxDecoderOutput.dtype",description:`<strong>dtype</strong> (<code>jnp.dtype</code>, <em>optional</em>, defaults to jnp.float32) — | |
| Parameters <em>dtype</em>`,name:"dtype"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/vae_flax.py#L33"}}),Xs=new y({props:{name:"replace",anchor:"diffusers.models.vae_flax.FlaxDecoderOutput.replace",parameters:[{name:"**updates",val:""}],source:"https://github.com/huggingface/diffusers/blob/main/src/flax/struct.py#L108"}}),Ss=new k({}),Gs=new y({props:{name:"class diffusers.models.vae_flax.FlaxAutoencoderKLOutput",anchor:"diffusers.models.vae_flax.FlaxAutoencoderKLOutput",parameters:[{name:"latent_dist",val:": FlaxDiagonalGaussianDistribution"}],parametersDescription:[{anchor:"diffusers.models.vae_flax.FlaxAutoencoderKLOutput.latent_dist",description:`<strong>latent_dist</strong> (<code>FlaxDiagonalGaussianDistribution</code>) — | |
| Encoded outputs of <code>Encoder</code> represented as the mean and logvar of <code>FlaxDiagonalGaussianDistribution</code>. | |
| <code>FlaxDiagonalGaussianDistribution</code> allows for sampling latents from the distribution.`,name:"latent_dist"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/vae_flax.py#L48"}}),Ks=new y({props:{name:"replace",anchor:"diffusers.models.vae_flax.FlaxAutoencoderKLOutput.replace",parameters:[{name:"**updates",val:""}],source:"https://github.com/huggingface/diffusers/blob/main/src/flax/struct.py#L108"}}),Rs=new k({}),Zs=new y({props:{name:"class diffusers.FlaxAutoencoderKL",anchor:"diffusers.FlaxAutoencoderKL",parameters:[{name:"in_channels",val:": int = 3"},{name:"out_channels",val:": int = 3"},{name:"down_block_types",val:": typing.Tuple[str] = ('DownEncoderBlock2D',)"},{name:"up_block_types",val:": typing.Tuple[str] = ('UpDecoderBlock2D',)"},{name:"block_out_channels",val:": typing.Tuple[int] = (64,)"},{name:"layers_per_block",val:": int = 1"},{name:"act_fn",val:": str = 'silu'"},{name:"latent_channels",val:": int = 4"},{name:"norm_num_groups",val:": int = 32"},{name:"sample_size",val:": int = 32"},{name:"scaling_factor",val:": float = 0.18215"},{name:"dtype",val:": dtype = <class 'jax.numpy.float32'>"},{name:"parent",val:": typing.Union[typing.Type[flax.linen.module.Module], typing.Type[flax.core.scope.Scope], typing.Type[flax.linen.module._Sentinel], NoneType] = <flax.linen.module._Sentinel object at 0x7fa57076a310>"},{name:"name",val:": str = None"}],parametersDescription:[{anchor:"diffusers.FlaxAutoencoderKL.in_channels",description:`<strong>in_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 3) — | |
| Input channels`,name:"in_channels"},{anchor:"diffusers.FlaxAutoencoderKL.out_channels",description:`<strong>out_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 3) — | |
| Output channels`,name:"out_channels"},{anchor:"diffusers.FlaxAutoencoderKL.down_block_types",description:`<strong>down_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to <em>(DownEncoderBlock2D)</em>) — | |
| DownEncoder block type`,name:"down_block_types"},{anchor:"diffusers.FlaxAutoencoderKL.up_block_types",description:`<strong>up_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to <em>(UpDecoderBlock2D)</em>) — | |
| UpDecoder block type`,name:"up_block_types"},{anchor:"diffusers.FlaxAutoencoderKL.block_out_channels",description:`<strong>block_out_channels</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to <em>(64,)</em>) — | |
| Tuple containing the number of output channels for each block`,name:"block_out_channels"},{anchor:"diffusers.FlaxAutoencoderKL.layers_per_block",description:`<strong>layers_per_block</strong> (<code>int</code>, <em>optional</em>, defaults to <em>2</em>) — | |
| Number of Resnet layer for each block`,name:"layers_per_block"},{anchor:"diffusers.FlaxAutoencoderKL.act_fn",description:`<strong>act_fn</strong> (<code>str</code>, <em>optional</em>, defaults to <em>silu</em>) — | |
| Activation function`,name:"act_fn"},{anchor:"diffusers.FlaxAutoencoderKL.latent_channels",description:`<strong>latent_channels</strong> (<code>int</code>, <em>optional</em>, defaults to <em>4</em>) — | |
| Latent space channels`,name:"latent_channels"},{anchor:"diffusers.FlaxAutoencoderKL.norm_num_groups",description:`<strong>norm_num_groups</strong> (<code>int</code>, <em>optional</em>, defaults to <em>32</em>) — | |
| Norm num group`,name:"norm_num_groups"},{anchor:"diffusers.FlaxAutoencoderKL.sample_size",description:`<strong>sample_size</strong> (<code>int</code>, <em>optional</em>, defaults to 32) — | |
| Sample input size`,name:"sample_size"},{anchor:"diffusers.FlaxAutoencoderKL.scaling_factor",description:`<strong>scaling_factor</strong> (<em>float</em>, <em>optional</em>, defaults to 0.18215) — | |
| The component-wise standard deviation of the trained latent space computed using the first batch of the | |
| training set. This is used to scale the latent space to have unit variance when training the diffusion | |
| model. The latents are scaled with the formula <em>z = z </em> scaling_factor<em> before being passed to the | |
| diffusion model. When decoding, the latents are scaled back to the original scale with the formula: </em>z = 1 | |
| / scaling_factor <em> z</em>. For more details, refer to sections 4.3.2 and D.1 of the <a href="https://arxiv.org/abs/2112.10752" rel="nofollow">High-Resolution Image | |
| Synthesis with Latent Diffusion Models</a> paper.`,name:"scaling_factor"},{anchor:"diffusers.FlaxAutoencoderKL.dtype",description:`<strong>dtype</strong> (<code>jnp.dtype</code>, <em>optional</em>, defaults to jnp.float32) — | |
| parameters <em>dtype</em>`,name:"dtype"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/vae_flax.py#L721"}}),nr=new k({}),sr=new y({props:{name:"class diffusers.models.controlnet_flax.FlaxControlNetOutput",anchor:"diffusers.models.controlnet_flax.FlaxControlNetOutput",parameters:[{name:"down_block_res_samples",val:": ndarray"},{name:"mid_block_res_sample",val:": ndarray"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/controlnet_flax.py#L34"}}),rr=new y({props:{name:"replace",anchor:"diffusers.models.controlnet_flax.FlaxControlNetOutput.replace",parameters:[{name:"**updates",val:""}],source:"https://github.com/huggingface/diffusers/blob/main/src/flax/struct.py#L108"}}),ar=new k({}),ir=new y({props:{name:"class diffusers.FlaxControlNetModel",anchor:"diffusers.FlaxControlNetModel",parameters:[{name:"sample_size",val:": int = 32"},{name:"in_channels",val:": int = 4"},{name:"down_block_types",val:": typing.Tuple[str] = ('CrossAttnDownBlock2D', 'CrossAttnDownBlock2D', 'CrossAttnDownBlock2D', 'DownBlock2D')"},{name:"only_cross_attention",val:": typing.Union[bool, typing.Tuple[bool]] = False"},{name:"block_out_channels",val:": typing.Tuple[int] = (320, 640, 1280, 1280)"},{name:"layers_per_block",val:": int = 2"},{name:"attention_head_dim",val:": typing.Union[int, typing.Tuple[int]] = 8"},{name:"num_attention_heads",val:": typing.Union[int, typing.Tuple[int], NoneType] = None"},{name:"cross_attention_dim",val:": int = 1280"},{name:"dropout",val:": float = 0.0"},{name:"use_linear_projection",val:": bool = False"},{name:"dtype",val:": dtype = <class 'jax.numpy.float32'>"},{name:"flip_sin_to_cos",val:": bool = True"},{name:"freq_shift",val:": int = 0"},{name:"controlnet_conditioning_channel_order",val:": str = 'rgb'"},{name:"conditioning_embedding_out_channels",val:": typing.Tuple[int] = (16, 32, 96, 256)"},{name:"parent",val:": typing.Union[typing.Type[flax.linen.module.Module], typing.Type[flax.core.scope.Scope], typing.Type[flax.linen.module._Sentinel], NoneType] = <flax.linen.module._Sentinel object at 0x7fa57076a310>"},{name:"name",val:": str = None"}],parametersDescription:[{anchor:"diffusers.FlaxControlNetModel.sample_size",description:`<strong>sample_size</strong> (<code>int</code>, <em>optional</em>) — | |
| The size of the input sample.`,name:"sample_size"},{anchor:"diffusers.FlaxControlNetModel.in_channels",description:`<strong>in_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 4) — | |
| The number of channels in the input sample.`,name:"in_channels"},{anchor:"diffusers.FlaxControlNetModel.down_block_types",description:`<strong>down_block_types</strong> (<code>Tuple[str]</code>, <em>optional</em>, defaults to <code>("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D")</code>) — | |
| The tuple of downsample blocks to use. The corresponding class names will be: “FlaxCrossAttnDownBlock2D”, | |
| “FlaxCrossAttnDownBlock2D”, “FlaxCrossAttnDownBlock2D”, “FlaxDownBlock2D”`,name:"down_block_types"},{anchor:"diffusers.FlaxControlNetModel.block_out_channels",description:`<strong>block_out_channels</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to <code>(320, 640, 1280, 1280)</code>) — | |
| The tuple of output channels for each block.`,name:"block_out_channels"},{anchor:"diffusers.FlaxControlNetModel.layers_per_block",description:`<strong>layers_per_block</strong> (<code>int</code>, <em>optional</em>, defaults to 2) — | |
| The number of layers per block.`,name:"layers_per_block"},{anchor:"diffusers.FlaxControlNetModel.attention_head_dim",description:`<strong>attention_head_dim</strong> (<code>int</code> or <code>Tuple[int]</code>, <em>optional</em>, defaults to 8) — | |
| The dimension of the attention heads.`,name:"attention_head_dim"},{anchor:"diffusers.FlaxControlNetModel.num_attention_heads",description:`<strong>num_attention_heads</strong> (<code>int</code> or <code>Tuple[int]</code>, <em>optional</em>) — | |
| The number of attention heads.`,name:"num_attention_heads"},{anchor:"diffusers.FlaxControlNetModel.cross_attention_dim",description:`<strong>cross_attention_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 768) — | |
| The dimension of the cross attention features.`,name:"cross_attention_dim"},{anchor:"diffusers.FlaxControlNetModel.dropout",description:`<strong>dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0) — | |
| Dropout probability for down, up and bottleneck blocks.`,name:"dropout"},{anchor:"diffusers.FlaxControlNetModel.flip_sin_to_cos",description:`<strong>flip_sin_to_cos</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to flip the sin to cos in the time embedding.`,name:"flip_sin_to_cos"},{anchor:"diffusers.FlaxControlNetModel.freq_shift",description:"<strong>freq_shift</strong> (<code>int</code>, <em>optional</em>, defaults to 0) — The frequency shift to apply to the time embedding.",name:"freq_shift"},{anchor:"diffusers.FlaxControlNetModel.controlnet_conditioning_channel_order",description:`<strong>controlnet_conditioning_channel_order</strong> (<code>str</code>, <em>optional</em>, defaults to <code>rgb</code>) — | |
| The channel order of conditional image. Will convert it to <code>rgb</code> if it’s <code>bgr</code>`,name:"controlnet_conditioning_channel_order"},{anchor:"diffusers.FlaxControlNetModel.conditioning_embedding_out_channels",description:`<strong>conditioning_embedding_out_channels</strong> (<code>tuple</code>, <em>optional</em>, defaults to <code>(16, 32, 96, 256)</code>) — | |
| The tuple of output channel for each block in conditioning_embedding layer`,name:"conditioning_embedding_out_channels"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/controlnet_flax.py#L96"}}),{c(){x=n("meta"),T=i(),M=n("h1"),w=n("a"),D=n("span"),f(v.$$.fragment),$=i(),se=n("span"),Ue=l("Models"),Z=i(),Y=n("p"),yr=l(`Diffusers contains pretrained models for popular algorithms and modules for creating the next set of diffusion models. | |
| The primary function of these models is to denoise an input sample, by modeling the distribution `),al=new $x,il=l(`. | |
| The models are built on the base class [\u2018ModelMixin\u2019] that is a `),aa=n("code"),Om=l("torch.nn.module"),Lm=l(" with basic functionality for saving and loading models both locally and from the HuggingFace hub."),dl=i(),Ee=n("h2"),ho=n("a"),ia=n("span"),f(Tt.$$.fragment),Vm=i(),da=n("span"),Bm=l("ModelMixin"),ll=i(),N=n("div"),f(kt.$$.fragment),qm=i(),la=n("p"),Jm=l("Base class for all models."),zm=i(),xr=n("p"),wr=n("a"),Wm=l("ModelMixin"),Xm=l(` takes care of storing the configuration of the models and handles methods for loading, downloading | |
| and saving models.`),Sm=i(),ca=n("ul"),_e=n("li"),ma=n("strong"),Gm=l("config_name"),Km=l(" ("),pa=n("code"),Rm=l("str"),Zm=l(`) \u2014 A filename under which the model should be stored when calling | |
| `),$r=n("a"),Ym=l("save_pretrained()"),Qm=l("."),Hm=i(),ge=n("div"),f(At.$$.fragment),ep=i(),fa=n("p"),op=l("Deactivates gradient checkpointing for the current model."),tp=i(),ua=n("p"),np=l(`Note that in other frameworks this feature can be referred to as \u201Cactivation checkpointing\u201D or \u201Ccheckpoint | |
| activations\u201D.`),sp=i(),_o=n("div"),f(Nt.$$.fragment),rp=i(),ha=n("p"),ap=l("Disable memory efficient attention as implemented in xformers."),ip=i(),be=n("div"),f(Ut.$$.fragment),dp=i(),_a=n("p"),lp=l("Activates gradient checkpointing for the current model."),cp=i(),ga=n("p"),mp=l(`Note that in other frameworks this feature can be referred to as \u201Cactivation checkpointing\u201D or \u201Ccheckpoint | |
| activations\u201D.`),pp=i(),W=n("div"),f(Et.$$.fragment),fp=i(),ba=n("p"),up=l("Enable memory efficient attention as implemented in xformers."),hp=i(),va=n("p"),_p=l(`When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference | |
| time. Speed up at training time is not guaranteed.`),gp=i(),ya=n("p"),bp=l(`Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention | |
| is used.`),vp=i(),f(go.$$.fragment),yp=i(),I=n("div"),f(Ct.$$.fragment),xp=i(),xa=n("p"),wp=l("Instantiate a pretrained pytorch model from a pre-trained model configuration."),$p=i(),Ce=n("p"),Mp=l("The model is set in evaluation mode by default using "),wa=n("code"),Dp=l("model.eval()"),Tp=l(` (Dropout modules are deactivated). To train | |
| the model, you should first set it back in training mode with `),$a=n("code"),kp=l("model.train()"),Ap=l("."),Np=i(),Ft=n("p"),Up=l("The warning "),Ma=n("em"),Ep=l("Weights from XXX not initialized from pretrained model"),Cp=l(` means that the weights of XXX do not come | |
| pretrained with the rest of the model. It is up to you to train those weights with a downstream fine-tuning | |
| task.`),Fp=i(),Pt=n("p"),Pp=l("The warning "),Da=n("em"),jp=l("Weights from XXX not used in YYY"),Ip=l(` means that the layer XXX is not used by YYY, therefore those | |
| weights are discarded.`),Op=i(),f(bo.$$.fragment),Lp=i(),f(vo.$$.fragment),Vp=i(),yo=n("div"),f(jt.$$.fragment),Bp=i(),Ta=n("p"),qp=l("Get number of (optionally, trainable or non-embeddings) parameters in the module."),Jp=i(),xo=n("div"),f(It.$$.fragment),zp=i(),Ot=n("p"),Wp=l(`Save a model and its configuration file to a directory, so that it can be re-loaded using the | |
| `),ka=n("code"),Xp=l("[from_pretrained()](/docs/diffusers/main/en/api/models#diffusers.ModelMixin.from_pretrained)"),Sp=l(" class method."),cl=i(),Fe=n("h2"),wo=n("a"),Aa=n("span"),f(Lt.$$.fragment),Gp=i(),Na=n("span"),Kp=l("UNet2DOutput"),ml=i(),Vt=n("div"),f(Bt.$$.fragment),pl=i(),Pe=n("h2"),$o=n("a"),Ua=n("span"),f(qt.$$.fragment),Rp=i(),Ea=n("span"),Zp=l("UNet2DModel"),fl=i(),Q=n("div"),f(Jt.$$.fragment),Yp=i(),Ca=n("p"),Qp=l("UNet2DModel is a 2D UNet model that takes in a noisy sample and a timestep and returns sample shaped output."),Hp=i(),zt=n("p"),ef=l("This model inherits from "),Mr=n("a"),of=l("ModelMixin"),tf=l(`. Check the superclass documentation for the generic methods the library | |
| implements for all the model (such as downloading or saving, etc.)`),nf=i(),Dr=n("div"),f(Wt.$$.fragment),ul=i(),je=n("h2"),Mo=n("a"),Fa=n("span"),f(Xt.$$.fragment),sf=i(),Pa=n("span"),rf=l("UNet1DOutput"),hl=i(),St=n("div"),f(Gt.$$.fragment),_l=i(),Ie=n("h2"),Do=n("a"),ja=n("span"),f(Kt.$$.fragment),af=i(),Ia=n("span"),df=l("UNet1DModel"),gl=i(),H=n("div"),f(Rt.$$.fragment),lf=i(),Oa=n("p"),cf=l("UNet1DModel is a 1D UNet model that takes in a noisy sample and a timestep and returns sample shaped output."),mf=i(),Zt=n("p"),pf=l("This model inherits from "),Tr=n("a"),ff=l("ModelMixin"),uf=l(`. Check the superclass documentation for the generic methods the library | |
| implements for all the model (such as downloading or saving, etc.)`),hf=i(),kr=n("div"),f(Yt.$$.fragment),bl=i(),Oe=n("h2"),To=n("a"),La=n("span"),f(Qt.$$.fragment),_f=i(),Va=n("span"),gf=l("UNet2DConditionOutput"),vl=i(),Ht=n("div"),f(en.$$.fragment),yl=i(),Le=n("h2"),ko=n("a"),Ba=n("span"),f(on.$$.fragment),bf=i(),qa=n("span"),vf=l("UNet2DConditionModel"),xl=i(),F=n("div"),f(tn.$$.fragment),yf=i(),Ja=n("p"),xf=l(`UNet2DConditionModel is a conditional 2D UNet model that takes in a noisy sample, conditional state, and a timestep | |
| and returns sample shaped output.`),wf=i(),nn=n("p"),$f=l("This model inherits from "),Ar=n("a"),Mf=l("ModelMixin"),Df=l(`. Check the superclass documentation for the generic methods the library | |
| implements for all the models (such as downloading or saving, etc.)`),Tf=i(),Nr=n("div"),f(sn.$$.fragment),kf=i(),ve=n("div"),f(rn.$$.fragment),Af=i(),za=n("p"),Nf=l("Enable sliced attention computation."),Uf=i(),Wa=n("p"),Ef=l(`When this option is enabled, the attention module will split the input tensor in slices, to compute attention | |
| in several steps. This is useful to save some memory in exchange for a small speed decrease.`),Cf=i(),Ur=n("div"),f(an.$$.fragment),Ff=i(),Ao=n("div"),f(dn.$$.fragment),Pf=i(),Xa=n("p"),jf=l("Disables custom attention processors and sets the default attention implementation."),wl=i(),Ve=n("h2"),No=n("a"),Sa=n("span"),f(ln.$$.fragment),If=i(),Ga=n("span"),Of=l("UNet3DConditionOutput"),$l=i(),cn=n("div"),f(mn.$$.fragment),Ml=i(),Be=n("h2"),Uo=n("a"),Ka=n("span"),f(pn.$$.fragment),Lf=i(),Ra=n("span"),Vf=l("UNet3DConditionModel"),Dl=i(),P=n("div"),f(fn.$$.fragment),Bf=i(),Za=n("p"),qf=l(`UNet3DConditionModel is a conditional 2D UNet model that takes in a noisy sample, conditional state, and a timestep | |
| and returns sample shaped output.`),Jf=i(),un=n("p"),zf=l("This model inherits from "),Er=n("a"),Wf=l("ModelMixin"),Xf=l(`. Check the superclass documentation for the generic methods the library | |
| implements for all the models (such as downloading or saving, etc.)`),Sf=i(),Cr=n("div"),f(hn.$$.fragment),Gf=i(),ye=n("div"),f(_n.$$.fragment),Kf=i(),Ya=n("p"),Rf=l("Enable sliced attention computation."),Zf=i(),Qa=n("p"),Yf=l(`When this option is enabled, the attention module will split the input tensor in slices, to compute attention | |
| in several steps. This is useful to save some memory in exchange for a small speed decrease.`),Qf=i(),Fr=n("div"),f(gn.$$.fragment),Hf=i(),Eo=n("div"),f(bn.$$.fragment),eu=i(),Ha=n("p"),ou=l("Disables custom attention processors and sets the default attention implementation."),Tl=i(),qe=n("h2"),Co=n("a"),ei=n("span"),f(vn.$$.fragment),tu=i(),oi=n("span"),nu=l("DecoderOutput"),kl=i(),Je=n("div"),f(yn.$$.fragment),su=i(),ti=n("p"),ru=l("Output of decoding method."),Al=i(),ze=n("h2"),Fo=n("a"),ni=n("span"),f(xn.$$.fragment),au=i(),si=n("span"),iu=l("VQEncoderOutput"),Nl=i(),We=n("div"),f(wn.$$.fragment),du=i(),ri=n("p"),lu=l("Output of VQModel encoding method."),Ul=i(),Xe=n("h2"),Po=n("a"),ai=n("span"),f($n.$$.fragment),cu=i(),ii=n("span"),mu=l("VQModel"),El=i(),ee=n("div"),f(Mn.$$.fragment),pu=i(),di=n("p"),fu=l(`VQ-VAE model from the paper Neural Discrete Representation Learning by Aaron van den Oord, Oriol Vinyals and Koray | |
| Kavukcuoglu.`),uu=i(),Dn=n("p"),hu=l("This model inherits from "),Pr=n("a"),_u=l("ModelMixin"),gu=l(`. Check the superclass documentation for the generic methods the library | |
| implements for all the model (such as downloading or saving, etc.)`),bu=i(),jr=n("div"),f(Tn.$$.fragment),Cl=i(),Se=n("h2"),jo=n("a"),li=n("span"),f(kn.$$.fragment),vu=i(),ci=n("span"),yu=l("AutoencoderKLOutput"),Fl=i(),Ge=n("div"),f(An.$$.fragment),xu=i(),mi=n("p"),wu=l("Output of AutoencoderKL encoding method."),Pl=i(),Ke=n("h2"),Io=n("a"),pi=n("span"),f(Nn.$$.fragment),$u=i(),fi=n("span"),Mu=l("AutoencoderKL"),jl=i(),A=n("div"),f(Un.$$.fragment),Du=i(),ui=n("p"),Tu=l(`Variational Autoencoder (VAE) model with KL loss from the paper Auto-Encoding Variational Bayes by Diederik P. Kingma | |
| and Max Welling.`),ku=i(),En=n("p"),Au=l("This model inherits from "),Ir=n("a"),Nu=l("ModelMixin"),Uu=l(`. Check the superclass documentation for the generic methods the library | |
| implements for all the model (such as downloading or saving, etc.)`),Eu=i(),Oo=n("div"),f(Cn.$$.fragment),Cu=i(),Fn=n("p"),Fu=l("Disable sliced VAE decoding. If "),hi=n("code"),Pu=l("enable_slicing"),ju=l(` was previously invoked, this method will go back to computing | |
| decoding in one step.`),Iu=i(),Lo=n("div"),f(Pn.$$.fragment),Ou=i(),jn=n("p"),Lu=l("Disable tiled VAE decoding. If "),_i=n("code"),Vu=l("enable_vae_tiling"),Bu=l(` was previously invoked, this method will go back to | |
| computing decoding in one step.`),qu=i(),Vo=n("div"),f(In.$$.fragment),Ju=i(),gi=n("p"),zu=l(`Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to | |
| compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.`),Wu=i(),Bo=n("div"),f(On.$$.fragment),Xu=i(),bi=n("p"),Su=l(`Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to | |
| compute decoding and encoding in several steps. This is useful to save a large amount of memory and to allow | |
| the processing of larger images.`),Gu=i(),Or=n("div"),f(Ln.$$.fragment),Ku=i(),Lr=n("div"),f(Vn.$$.fragment),Ru=i(),qo=n("div"),f(Bn.$$.fragment),Zu=i(),vi=n("p"),Yu=l("Disables custom attention processors and sets the default attention implementation."),Qu=i(),Jo=n("div"),f(qn.$$.fragment),Hu=i(),yi=n("p"),eh=l("Decode a batch of images using a tiled decoder."),oh=i(),zo=n("div"),f(Jn.$$.fragment),th=i(),xi=n("p"),nh=l("Encode a batch of images using a tiled encoder."),Il=i(),Re=n("h2"),Wo=n("a"),wi=n("span"),f(zn.$$.fragment),sh=i(),$i=n("span"),rh=l("Transformer2DModel"),Ol=i(),O=n("div"),f(Wn.$$.fragment),ah=i(),Mi=n("p"),ih=l(`Transformer model for image-like data. Takes either discrete (classes of vector embeddings) or continuous (actual | |
| embeddings) inputs.`),dh=i(),Di=n("p"),lh=l(`When input is continuous: First, project the input (aka embedding) and reshape to b, t, d. Then apply standard | |
| transformer action. Finally, reshape to image.`),ch=i(),Xn=n("p"),mh=l(`When input is discrete: First, input (classes of latent pixels) is converted to embeddings and has positional | |
| embeddings applied, see `),Ti=n("code"),ph=l("ImagePositionalEmbeddings"),fh=l(`. Then apply standard transformer action. Finally, predict | |
| classes of unnoised image.`),uh=i(),ki=n("p"),hh=l(`Note that it is assumed one of the input classes is the masked latent pixel. The predicted classes of the unnoised | |
| image do not contain a prediction for the masked pixel as the unnoised image cannot be masked.`),_h=i(),Vr=n("div"),f(Sn.$$.fragment),Ll=i(),Ze=n("h2"),Xo=n("a"),Ai=n("span"),f(Gn.$$.fragment),gh=i(),Ni=n("span"),bh=l("Transformer2DModelOutput"),Vl=i(),Kn=n("div"),f(Rn.$$.fragment),Bl=i(),Ye=n("h2"),So=n("a"),Ui=n("span"),f(Zn.$$.fragment),vh=i(),Ei=n("span"),yh=l("TransformerTemporalModel"),ql=i(),ce=n("div"),f(Yn.$$.fragment),xh=i(),Ci=n("p"),wh=l("Transformer model for video-like data."),$h=i(),Br=n("div"),f(Qn.$$.fragment),Jl=i(),Qe=n("h2"),Go=n("a"),Fi=n("span"),f(Hn.$$.fragment),Mh=i(),Pi=n("span"),Dh=l("Transformer2DModelOutput"),zl=i(),es=n("div"),f(os.$$.fragment),Wl=i(),He=n("h2"),Ko=n("a"),ji=n("span"),f(ts.$$.fragment),Th=i(),Ii=n("span"),kh=l("PriorTransformer"),Xl=i(),j=n("div"),f(ns.$$.fragment),Ah=i(),Oi=n("p"),Nh=l(`The prior transformer from unCLIP is used to predict CLIP image embeddings from CLIP text embeddings. Note that the | |
| transformer predicts the image embeddings through a denoising diffusion process.`),Uh=i(),ss=n("p"),Eh=l("This model inherits from "),qr=n("a"),Ch=l("ModelMixin"),Fh=l(`. Check the superclass documentation for the generic methods the library | |
| implements for all the models (such as downloading or saving, etc.)`),Ph=i(),Jr=n("p"),jh=l("For more details, see the original paper: "),rs=n("a"),Ih=l("https://arxiv.org/abs/2204.06125"),Oh=i(),zr=n("div"),f(as.$$.fragment),Lh=i(),Wr=n("div"),f(is.$$.fragment),Vh=i(),Ro=n("div"),f(ds.$$.fragment),Bh=i(),Li=n("p"),qh=l("Disables custom attention processors and sets the default attention implementation."),Sl=i(),eo=n("h2"),Zo=n("a"),Vi=n("span"),f(ls.$$.fragment),Jh=i(),Bi=n("span"),zh=l("PriorTransformerOutput"),Gl=i(),cs=n("div"),f(ms.$$.fragment),Kl=i(),oo=n("h2"),Yo=n("a"),qi=n("span"),f(ps.$$.fragment),Wh=i(),Ji=n("span"),Xh=l("ControlNetOutput"),Rl=i(),fs=n("div"),f(us.$$.fragment),Zl=i(),to=n("h2"),Qo=n("a"),zi=n("span"),f(hs.$$.fragment),Sh=i(),Wi=n("span"),Gh=l("ControlNetModel"),Yl=i(),J=n("div"),f(_s.$$.fragment),Kh=i(),Ho=n("div"),f(gs.$$.fragment),Rh=i(),Xi=n("p"),Zh=l("Instantiate Controlnet class from UNet2DConditionModel."),Yh=i(),xe=n("div"),f(bs.$$.fragment),Qh=i(),Si=n("p"),Hh=l("Enable sliced attention computation."),e_=i(),Gi=n("p"),o_=l(`When this option is enabled, the attention module will split the input tensor in slices, to compute attention | |
| in several steps. This is useful to save some memory in exchange for a small speed decrease.`),t_=i(),Xr=n("div"),f(vs.$$.fragment),n_=i(),et=n("div"),f(ys.$$.fragment),s_=i(),Ki=n("p"),r_=l("Disables custom attention processors and sets the default attention implementation."),Ql=i(),no=n("h2"),ot=n("a"),Ri=n("span"),f(xs.$$.fragment),a_=i(),Zi=n("span"),i_=l("FlaxModelMixin"),Hl=i(),C=n("div"),f(ws.$$.fragment),d_=i(),Yi=n("p"),l_=l("Base class for all flax models."),c_=i(),Sr=n("p"),Gr=n("a"),m_=l("FlaxModelMixin"),p_=l(` takes care of storing the configuration of the models and handles methods for loading, | |
| downloading and saving models.`),f_=i(),X=n("div"),f($s.$$.fragment),u_=i(),Qi=n("p"),h_=l("Instantiate a pretrained flax model from a pre-trained model configuration."),__=i(),Ms=n("p"),g_=l("The warning "),Hi=n("em"),b_=l("Weights from XXX not initialized from pretrained model"),v_=l(` means that the weights of XXX do not come | |
| pretrained with the rest of the model. It is up to you to train those weights with a downstream fine-tuning | |
| task.`),y_=i(),Ds=n("p"),x_=l("The warning "),ed=n("em"),w_=l("Weights from XXX not used in YYY"),$_=l(` means that the layer XXX is not used by YYY, therefore those | |
| weights are discarded.`),M_=i(),f(tt.$$.fragment),D_=i(),nt=n("div"),f(Ts.$$.fragment),T_=i(),ks=n("p"),k_=l(`Save a model and its configuration file to a directory, so that it can be re-loaded using the | |
| `),od=n("code"),A_=l("[from_pretrained()](/docs/diffusers/main/en/api/models#diffusers.FlaxModelMixin.from_pretrained)"),N_=l(" class method"),U_=i(),re=n("div"),f(As.$$.fragment),E_=i(),oe=n("p"),C_=l("Cast the floating-point "),td=n("code"),F_=l("params"),P_=l(" to "),nd=n("code"),j_=l("jax.numpy.bfloat16"),I_=l(". This returns a new "),sd=n("code"),O_=l("params"),L_=l(` tree and does not cast | |
| the `),rd=n("code"),V_=l("params"),B_=l(" in place."),q_=i(),ad=n("p"),J_=l(`This method can be used on TPU to explicitly convert the model parameters to bfloat16 precision to do full | |
| half-precision training or to save weights in bfloat16 for inference in order to save memory and improve speed.`),z_=i(),f(st.$$.fragment),W_=i(),ae=n("div"),f(Ns.$$.fragment),X_=i(),te=n("p"),S_=l("Cast the floating-point "),id=n("code"),G_=l("params"),K_=l(" to "),dd=n("code"),R_=l("jax.numpy.float16"),Z_=l(". This returns a new "),ld=n("code"),Y_=l("params"),Q_=l(` tree and does not cast the | |
| `),cd=n("code"),H_=l("params"),eg=l(" in place."),og=i(),md=n("p"),tg=l(`This method can be used on GPU to explicitly convert the model parameters to float16 precision to do full | |
| half-precision training or to save weights in float16 for inference in order to save memory and improve speed.`),ng=i(),f(rt.$$.fragment),sg=i(),we=n("div"),f(Us.$$.fragment),rg=i(),ne=n("p"),ag=l("Cast the floating-point "),pd=n("code"),ig=l("params"),dg=l(" to "),fd=n("code"),lg=l("jax.numpy.float32"),cg=l(`. This method can be used to explicitly convert the | |
| model parameters to fp32 precision. This returns a new `),ud=n("code"),mg=l("params"),pg=l(" tree and does not cast the "),hd=n("code"),fg=l("params"),ug=l(" in place."),hg=i(),f(at.$$.fragment),ec=i(),so=n("h2"),it=n("a"),_d=n("span"),f(Es.$$.fragment),_g=i(),gd=n("span"),gg=l("FlaxUNet2DConditionOutput"),oc=i(),ro=n("div"),f(Cs.$$.fragment),bg=i(),dt=n("div"),f(Fs.$$.fragment),vg=i(),bd=n("p"),yg=l("\u201CReturns a new object replacing the specified fields with new values."),tc=i(),ao=n("h2"),lt=n("a"),vd=n("span"),f(Ps.$$.fragment),xg=i(),yd=n("span"),wg=l("FlaxUNet2DConditionModel"),nc=i(),L=n("div"),f(js.$$.fragment),$g=i(),xd=n("p"),Mg=l(`FlaxUNet2DConditionModel is a conditional 2D UNet model that takes in a noisy sample, conditional state, and a | |
| timestep and returns sample shaped output.`),Dg=i(),Is=n("p"),Tg=l("This model inherits from "),Kr=n("a"),kg=l("FlaxModelMixin"),Ag=l(`. Check the superclass documentation for the generic methods the library | |
| implements for all the models (such as downloading or saving, etc.)`),Ng=i(),Os=n("p"),Ug=l("Also, this model is a Flax Linen "),Ls=n("a"),Eg=l("flax.linen.Module"),Cg=l(` | |
| subclass. Use it as a regular Flax linen Module and refer to the Flax documentation for all matter related to | |
| general usage and behavior.`),Fg=i(),wd=n("p"),Pg=l("Finally, this model supports inherent JAX features such as:"),jg=i(),me=n("ul"),$d=n("li"),Vs=n("a"),Ig=l("Just-In-Time (JIT) compilation"),Og=i(),Md=n("li"),Bs=n("a"),Lg=l("Automatic Differentiation"),Vg=i(),Dd=n("li"),qs=n("a"),Bg=l("Vectorization"),qg=i(),Td=n("li"),Js=n("a"),Jg=l("Parallelization"),sc=i(),io=n("h2"),ct=n("a"),kd=n("span"),f(zs.$$.fragment),zg=i(),Ad=n("span"),Wg=l("FlaxDecoderOutput"),rc=i(),pe=n("div"),f(Ws.$$.fragment),Xg=i(),Nd=n("p"),Sg=l("Output of decoding method."),Gg=i(),mt=n("div"),f(Xs.$$.fragment),Kg=i(),Ud=n("p"),Rg=l("\u201CReturns a new object replacing the specified fields with new values."),ac=i(),lo=n("h2"),pt=n("a"),Ed=n("span"),f(Ss.$$.fragment),Zg=i(),Cd=n("span"),Yg=l("FlaxAutoencoderKLOutput"),ic=i(),fe=n("div"),f(Gs.$$.fragment),Qg=i(),Fd=n("p"),Hg=l("Output of AutoencoderKL encoding method."),eb=i(),ft=n("div"),f(Ks.$$.fragment),ob=i(),Pd=n("p"),tb=l("\u201CReturns a new object replacing the specified fields with new values."),dc=i(),co=n("h2"),ut=n("a"),jd=n("span"),f(Rs.$$.fragment),nb=i(),Id=n("span"),sb=l("FlaxAutoencoderKL"),lc=i(),z=n("div"),f(Zs.$$.fragment),rb=i(),Od=n("p"),ab=l(`Flax Implementation of Variational Autoencoder (VAE) model with KL loss from the paper Auto-Encoding Variational | |
| Bayes by Diederik P. Kingma and Max Welling.`),ib=i(),Ys=n("p"),db=l("This model is a Flax Linen "),Qs=n("a"),lb=l("flax.linen.Module"),cb=l(` | |
| subclass. Use it as a regular Flax linen Module and refer to the Flax documentation for all matter related to | |
| general usage and behavior.`),mb=i(),Ld=n("p"),pb=l("Finally, this model supports inherent JAX features such as:"),fb=i(),ue=n("ul"),Vd=n("li"),Hs=n("a"),ub=l("Just-In-Time (JIT) compilation"),hb=i(),Bd=n("li"),er=n("a"),_b=l("Automatic Differentiation"),gb=i(),qd=n("li"),or=n("a"),bb=l("Vectorization"),vb=i(),Jd=n("li"),tr=n("a"),yb=l("Parallelization"),cc=i(),mo=n("h2"),ht=n("a"),zd=n("span"),f(nr.$$.fragment),xb=i(),Wd=n("span"),wb=l("FlaxControlNetOutput"),mc=i(),po=n("div"),f(sr.$$.fragment),$b=i(),_t=n("div"),f(rr.$$.fragment),Mb=i(),Xd=n("p"),Db=l("\u201CReturns a new object replacing the specified fields with new values."),pc=i(),fo=n("h2"),gt=n("a"),Sd=n("span"),f(ar.$$.fragment),Tb=i(),Gd=n("span"),kb=l("FlaxControlNetModel"),fc=i(),V=n("div"),f(ir.$$.fragment),Ab=i(),dr=n("p"),Nb=l("Quoting from "),lr=n("a"),Ub=l("https://arxiv.org/abs/2302.05543"),Eb=l(`: \u201CStable Diffusion uses a pre-processing method similar to VQ-GAN | |
| [11] to convert the entire dataset of 512 \xD7 512 images into smaller 64 \xD7 64 \u201Clatent images\u201D for stabilized | |
| training. This requires ControlNets to convert image-based conditions to 64 \xD7 64 feature space to match the | |
| convolution size. We use a tiny network E(\xB7) of four convolution layers with 4 \xD7 4 kernels and 2 \xD7 2 strides | |
| (activated by ReLU, channels are 16, 32, 64, 128, initialized with Gaussian weights, trained jointly with the full | |
| model) to encode image-space conditions \u2026 into feature maps \u2026\u201D`),Cb=i(),cr=n("p"),Fb=l("This model inherits from "),Rr=n("a"),Pb=l("FlaxModelMixin"),jb=l(`. Check the superclass documentation for the generic methods the library | |
| implements for all the models (such as downloading or saving, etc.)`),Ib=i(),mr=n("p"),Ob=l("Also, this model is a Flax Linen "),pr=n("a"),Lb=l("flax.linen.Module"),Vb=l(` | |
| subclass. Use it as a regular Flax linen Module and refer to the Flax documentation for all matter related to | |
| general usage and behavior.`),Bb=i(),Kd=n("p"),qb=l("Finally, this model supports inherent JAX features such as:"),Jb=i(),he=n("ul"),Rd=n("li"),fr=n("a"),zb=l("Just-In-Time (JIT) compilation"),Wb=i(),Zd=n("li"),ur=n("a"),Xb=l("Automatic Differentiation"),Sb=i(),Yd=n("li"),hr=n("a"),Gb=l("Vectorization"),Kb=i(),Qd=n("li"),_r=n("a"),Rb=l("Parallelization"),this.h()},l(o){const m=Mx('[data-svelte="svelte-1phssyn"]',document.head);x=s(m,"META",{name:!0,content:!0}),m.forEach(t),T=d(o),M=s(o,"H1",{class:!0});var gr=r(M);w=s(gr,"A",{id:!0,class:!0,href:!0});var Hd=r(w);D=s(Hd,"SPAN",{});var el=r(D);u(v.$$.fragment,el),el.forEach(t),Hd.forEach(t),$=d(gr),se=s(gr,"SPAN",{});var ol=r(se);Ue=c(ol,"Models"),ol.forEach(t),gr.forEach(t),Z=d(o),Y=s(o,"P",{});var uo=r(Y);yr=c(uo,`Diffusers contains pretrained models for popular algorithms and modules for creating the next set of diffusion models. | |
| The primary function of these models is to denoise an input sample, by modeling the distribution `),al=Dx(uo),il=c(uo,`. | |
| The models are built on the base class [\u2018ModelMixin\u2019] that is a `),aa=s(uo,"CODE",{});var tl=r(aa);Om=c(tl,"torch.nn.module"),tl.forEach(t),Lm=c(uo," with basic functionality for saving and loading models both locally and from the HuggingFace hub."),uo.forEach(t),dl=d(o),Ee=s(o,"H2",{class:!0});var br=r(Ee);ho=s(br,"A",{id:!0,class:!0,href:!0});var Hb=r(ho);ia=s(Hb,"SPAN",{});var e2=r(ia);u(Tt.$$.fragment,e2),e2.forEach(t),Hb.forEach(t),Vm=d(br),da=s(br,"SPAN",{});var o2=r(da);Bm=c(o2,"ModelMixin"),o2.forEach(t),br.forEach(t),ll=d(o),N=s(o,"DIV",{class:!0});var E=r(N);u(kt.$$.fragment,E),qm=d(E),la=s(E,"P",{});var t2=r(la);Jm=c(t2,"Base class for all models."),t2.forEach(t),zm=d(E),xr=s(E,"P",{});var Zb=r(xr);wr=s(Zb,"A",{href:!0});var n2=r(wr);Wm=c(n2,"ModelMixin"),n2.forEach(t),Xm=c(Zb,` takes care of storing the configuration of the models and handles methods for loading, downloading | |
| and saving models.`),Zb.forEach(t),Sm=d(E),ca=s(E,"UL",{});var s2=r(ca);_e=s(s2,"LI",{});var vr=r(_e);ma=s(vr,"STRONG",{});var r2=r(ma);Gm=c(r2,"config_name"),r2.forEach(t),Km=c(vr," ("),pa=s(vr,"CODE",{});var a2=r(pa);Rm=c(a2,"str"),a2.forEach(t),Zm=c(vr,`) \u2014 A filename under which the model should be stored when calling | |
| `),$r=s(vr,"A",{href:!0});var i2=r($r);Ym=c(i2,"save_pretrained()"),i2.forEach(t),Qm=c(vr,"."),vr.forEach(t),s2.forEach(t),Hm=d(E),ge=s(E,"DIV",{class:!0});var Zr=r(ge);u(At.$$.fragment,Zr),ep=d(Zr),fa=s(Zr,"P",{});var d2=r(fa);op=c(d2,"Deactivates gradient checkpointing for the current model."),d2.forEach(t),tp=d(Zr),ua=s(Zr,"P",{});var l2=r(ua);np=c(l2,`Note that in other frameworks this feature can be referred to as \u201Cactivation checkpointing\u201D or \u201Ccheckpoint | |
| activations\u201D.`),l2.forEach(t),Zr.forEach(t),sp=d(E),_o=s(E,"DIV",{class:!0});var hc=r(_o);u(Nt.$$.fragment,hc),rp=d(hc),ha=s(hc,"P",{});var c2=r(ha);ap=c(c2,"Disable memory efficient attention as implemented in xformers."),c2.forEach(t),hc.forEach(t),ip=d(E),be=s(E,"DIV",{class:!0});var Yr=r(be);u(Ut.$$.fragment,Yr),dp=d(Yr),_a=s(Yr,"P",{});var m2=r(_a);lp=c(m2,"Activates gradient checkpointing for the current model."),m2.forEach(t),cp=d(Yr),ga=s(Yr,"P",{});var p2=r(ga);mp=c(p2,`Note that in other frameworks this feature can be referred to as \u201Cactivation checkpointing\u201D or \u201Ccheckpoint | |
| activations\u201D.`),p2.forEach(t),Yr.forEach(t),pp=d(E),W=s(E,"DIV",{class:!0});var $e=r(W);u(Et.$$.fragment,$e),fp=d($e),ba=s($e,"P",{});var f2=r(ba);up=c(f2,"Enable memory efficient attention as implemented in xformers."),f2.forEach(t),hp=d($e),va=s($e,"P",{});var u2=r(va);_p=c(u2,`When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference | |
| time. Speed up at training time is not guaranteed.`),u2.forEach(t),gp=d($e),ya=s($e,"P",{});var h2=r(ya);bp=c(h2,`Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention | |
| is used.`),h2.forEach(t),vp=d($e),u(go.$$.fragment,$e),$e.forEach(t),yp=d(E),I=s(E,"DIV",{class:!0});var S=r(I);u(Ct.$$.fragment,S),xp=d(S),xa=s(S,"P",{});var _2=r(xa);wp=c(_2,"Instantiate a pretrained pytorch model from a pre-trained model configuration."),_2.forEach(t),$p=d(S),Ce=s(S,"P",{});var Qr=r(Ce);Mp=c(Qr,"The model is set in evaluation mode by default using "),wa=s(Qr,"CODE",{});var g2=r(wa);Dp=c(g2,"model.eval()"),g2.forEach(t),Tp=c(Qr,` (Dropout modules are deactivated). To train | |
| the model, you should first set it back in training mode with `),$a=s(Qr,"CODE",{});var b2=r($a);kp=c(b2,"model.train()"),b2.forEach(t),Ap=c(Qr,"."),Qr.forEach(t),Np=d(S),Ft=s(S,"P",{});var _c=r(Ft);Up=c(_c,"The warning "),Ma=s(_c,"EM",{});var v2=r(Ma);Ep=c(v2,"Weights from XXX not initialized from pretrained model"),v2.forEach(t),Cp=c(_c,` means that the weights of XXX do not come | |
| pretrained with the rest of the model. It is up to you to train those weights with a downstream fine-tuning | |
| task.`),_c.forEach(t),Fp=d(S),Pt=s(S,"P",{});var gc=r(Pt);Pp=c(gc,"The warning "),Da=s(gc,"EM",{});var y2=r(Da);jp=c(y2,"Weights from XXX not used in YYY"),y2.forEach(t),Ip=c(gc,` means that the layer XXX is not used by YYY, therefore those | |
| weights are discarded.`),gc.forEach(t),Op=d(S),u(bo.$$.fragment,S),Lp=d(S),u(vo.$$.fragment,S),S.forEach(t),Vp=d(E),yo=s(E,"DIV",{class:!0});var bc=r(yo);u(jt.$$.fragment,bc),Bp=d(bc),Ta=s(bc,"P",{});var x2=r(Ta);qp=c(x2,"Get number of (optionally, trainable or non-embeddings) parameters in the module."),x2.forEach(t),bc.forEach(t),Jp=d(E),xo=s(E,"DIV",{class:!0});var vc=r(xo);u(It.$$.fragment,vc),zp=d(vc),Ot=s(vc,"P",{});var yc=r(Ot);Wp=c(yc,`Save a model and its configuration file to a directory, so that it can be re-loaded using the | |
| `),ka=s(yc,"CODE",{});var w2=r(ka);Xp=c(w2,"[from_pretrained()](/docs/diffusers/main/en/api/models#diffusers.ModelMixin.from_pretrained)"),w2.forEach(t),Sp=c(yc," class method."),yc.forEach(t),vc.forEach(t),E.forEach(t),cl=d(o),Fe=s(o,"H2",{class:!0});var xc=r(Fe);wo=s(xc,"A",{id:!0,class:!0,href:!0});var $2=r(wo);Aa=s($2,"SPAN",{});var M2=r(Aa);u(Lt.$$.fragment,M2),M2.forEach(t),$2.forEach(t),Gp=d(xc),Na=s(xc,"SPAN",{});var D2=r(Na);Kp=c(D2,"UNet2DOutput"),D2.forEach(t),xc.forEach(t),ml=d(o),Vt=s(o,"DIV",{class:!0});var T2=r(Vt);u(Bt.$$.fragment,T2),T2.forEach(t),pl=d(o),Pe=s(o,"H2",{class:!0});var wc=r(Pe);$o=s(wc,"A",{id:!0,class:!0,href:!0});var k2=r($o);Ua=s(k2,"SPAN",{});var A2=r(Ua);u(qt.$$.fragment,A2),A2.forEach(t),k2.forEach(t),Rp=d(wc),Ea=s(wc,"SPAN",{});var N2=r(Ea);Zp=c(N2,"UNet2DModel"),N2.forEach(t),wc.forEach(t),fl=d(o),Q=s(o,"DIV",{class:!0});var bt=r(Q);u(Jt.$$.fragment,bt),Yp=d(bt),Ca=s(bt,"P",{});var U2=r(Ca);Qp=c(U2,"UNet2DModel is a 2D UNet model that takes in a noisy sample and a timestep and returns sample shaped output."),U2.forEach(t),Hp=d(bt),zt=s(bt,"P",{});var $c=r(zt);ef=c($c,"This model inherits from "),Mr=s($c,"A",{href:!0});var E2=r(Mr);of=c(E2,"ModelMixin"),E2.forEach(t),tf=c($c,`. Check the superclass documentation for the generic methods the library | |
| implements for all the model (such as downloading or saving, etc.)`),$c.forEach(t),nf=d(bt),Dr=s(bt,"DIV",{class:!0});var C2=r(Dr);u(Wt.$$.fragment,C2),C2.forEach(t),bt.forEach(t),ul=d(o),je=s(o,"H2",{class:!0});var Mc=r(je);Mo=s(Mc,"A",{id:!0,class:!0,href:!0});var F2=r(Mo);Fa=s(F2,"SPAN",{});var P2=r(Fa);u(Xt.$$.fragment,P2),P2.forEach(t),F2.forEach(t),sf=d(Mc),Pa=s(Mc,"SPAN",{});var j2=r(Pa);rf=c(j2,"UNet1DOutput"),j2.forEach(t),Mc.forEach(t),hl=d(o),St=s(o,"DIV",{class:!0});var I2=r(St);u(Gt.$$.fragment,I2),I2.forEach(t),_l=d(o),Ie=s(o,"H2",{class:!0});var Dc=r(Ie);Do=s(Dc,"A",{id:!0,class:!0,href:!0});var O2=r(Do);ja=s(O2,"SPAN",{});var L2=r(ja);u(Kt.$$.fragment,L2),L2.forEach(t),O2.forEach(t),af=d(Dc),Ia=s(Dc,"SPAN",{});var V2=r(Ia);df=c(V2,"UNet1DModel"),V2.forEach(t),Dc.forEach(t),gl=d(o),H=s(o,"DIV",{class:!0});var vt=r(H);u(Rt.$$.fragment,vt),lf=d(vt),Oa=s(vt,"P",{});var B2=r(Oa);cf=c(B2,"UNet1DModel is a 1D UNet model that takes in a noisy sample and a timestep and returns sample shaped output."),B2.forEach(t),mf=d(vt),Zt=s(vt,"P",{});var Tc=r(Zt);pf=c(Tc,"This model inherits from "),Tr=s(Tc,"A",{href:!0});var q2=r(Tr);ff=c(q2,"ModelMixin"),q2.forEach(t),uf=c(Tc,`. Check the superclass documentation for the generic methods the library | |
| implements for all the model (such as downloading or saving, etc.)`),Tc.forEach(t),hf=d(vt),kr=s(vt,"DIV",{class:!0});var J2=r(kr);u(Yt.$$.fragment,J2),J2.forEach(t),vt.forEach(t),bl=d(o),Oe=s(o,"H2",{class:!0});var kc=r(Oe);To=s(kc,"A",{id:!0,class:!0,href:!0});var z2=r(To);La=s(z2,"SPAN",{});var W2=r(La);u(Qt.$$.fragment,W2),W2.forEach(t),z2.forEach(t),_f=d(kc),Va=s(kc,"SPAN",{});var X2=r(Va);gf=c(X2,"UNet2DConditionOutput"),X2.forEach(t),kc.forEach(t),vl=d(o),Ht=s(o,"DIV",{class:!0});var S2=r(Ht);u(en.$$.fragment,S2),S2.forEach(t),yl=d(o),Le=s(o,"H2",{class:!0});var Ac=r(Le);ko=s(Ac,"A",{id:!0,class:!0,href:!0});var G2=r(ko);Ba=s(G2,"SPAN",{});var K2=r(Ba);u(on.$$.fragment,K2),K2.forEach(t),G2.forEach(t),bf=d(Ac),qa=s(Ac,"SPAN",{});var R2=r(qa);vf=c(R2,"UNet2DConditionModel"),R2.forEach(t),Ac.forEach(t),xl=d(o),F=s(o,"DIV",{class:!0});var G=r(F);u(tn.$$.fragment,G),yf=d(G),Ja=s(G,"P",{});var Z2=r(Ja);xf=c(Z2,`UNet2DConditionModel is a conditional 2D UNet model that takes in a noisy sample, conditional state, and a timestep | |
| and returns sample shaped output.`),Z2.forEach(t),wf=d(G),nn=s(G,"P",{});var Nc=r(nn);$f=c(Nc,"This model inherits from "),Ar=s(Nc,"A",{href:!0});var Y2=r(Ar);Mf=c(Y2,"ModelMixin"),Y2.forEach(t),Df=c(Nc,`. Check the superclass documentation for the generic methods the library | |
| implements for all the models (such as downloading or saving, etc.)`),Nc.forEach(t),Tf=d(G),Nr=s(G,"DIV",{class:!0});var Q2=r(Nr);u(sn.$$.fragment,Q2),Q2.forEach(t),kf=d(G),ve=s(G,"DIV",{class:!0});var Hr=r(ve);u(rn.$$.fragment,Hr),Af=d(Hr),za=s(Hr,"P",{});var H2=r(za);Nf=c(H2,"Enable sliced attention computation."),H2.forEach(t),Uf=d(Hr),Wa=s(Hr,"P",{});var ev=r(Wa);Ef=c(ev,`When this option is enabled, the attention module will split the input tensor in slices, to compute attention | |
| in several steps. This is useful to save some memory in exchange for a small speed decrease.`),ev.forEach(t),Hr.forEach(t),Cf=d(G),Ur=s(G,"DIV",{class:!0});var ov=r(Ur);u(an.$$.fragment,ov),ov.forEach(t),Ff=d(G),Ao=s(G,"DIV",{class:!0});var Uc=r(Ao);u(dn.$$.fragment,Uc),Pf=d(Uc),Xa=s(Uc,"P",{});var tv=r(Xa);jf=c(tv,"Disables custom attention processors and sets the default attention implementation."),tv.forEach(t),Uc.forEach(t),G.forEach(t),wl=d(o),Ve=s(o,"H2",{class:!0});var Ec=r(Ve);No=s(Ec,"A",{id:!0,class:!0,href:!0});var nv=r(No);Sa=s(nv,"SPAN",{});var sv=r(Sa);u(ln.$$.fragment,sv),sv.forEach(t),nv.forEach(t),If=d(Ec),Ga=s(Ec,"SPAN",{});var rv=r(Ga);Of=c(rv,"UNet3DConditionOutput"),rv.forEach(t),Ec.forEach(t),$l=d(o),cn=s(o,"DIV",{class:!0});var av=r(cn);u(mn.$$.fragment,av),av.forEach(t),Ml=d(o),Be=s(o,"H2",{class:!0});var Cc=r(Be);Uo=s(Cc,"A",{id:!0,class:!0,href:!0});var iv=r(Uo);Ka=s(iv,"SPAN",{});var dv=r(Ka);u(pn.$$.fragment,dv),dv.forEach(t),iv.forEach(t),Lf=d(Cc),Ra=s(Cc,"SPAN",{});var lv=r(Ra);Vf=c(lv,"UNet3DConditionModel"),lv.forEach(t),Cc.forEach(t),Dl=d(o),P=s(o,"DIV",{class:!0});var K=r(P);u(fn.$$.fragment,K),Bf=d(K),Za=s(K,"P",{});var cv=r(Za);qf=c(cv,`UNet3DConditionModel is a conditional 2D UNet model that takes in a noisy sample, conditional state, and a timestep | |
| and returns sample shaped output.`),cv.forEach(t),Jf=d(K),un=s(K,"P",{});var Fc=r(un);zf=c(Fc,"This model inherits from "),Er=s(Fc,"A",{href:!0});var mv=r(Er);Wf=c(mv,"ModelMixin"),mv.forEach(t),Xf=c(Fc,`. Check the superclass documentation for the generic methods the library | |
| implements for all the models (such as downloading or saving, etc.)`),Fc.forEach(t),Sf=d(K),Cr=s(K,"DIV",{class:!0});var pv=r(Cr);u(hn.$$.fragment,pv),pv.forEach(t),Gf=d(K),ye=s(K,"DIV",{class:!0});var ea=r(ye);u(_n.$$.fragment,ea),Kf=d(ea),Ya=s(ea,"P",{});var fv=r(Ya);Rf=c(fv,"Enable sliced attention computation."),fv.forEach(t),Zf=d(ea),Qa=s(ea,"P",{});var uv=r(Qa);Yf=c(uv,`When this option is enabled, the attention module will split the input tensor in slices, to compute attention | |
| in several steps. This is useful to save some memory in exchange for a small speed decrease.`),uv.forEach(t),ea.forEach(t),Qf=d(K),Fr=s(K,"DIV",{class:!0});var hv=r(Fr);u(gn.$$.fragment,hv),hv.forEach(t),Hf=d(K),Eo=s(K,"DIV",{class:!0});var Pc=r(Eo);u(bn.$$.fragment,Pc),eu=d(Pc),Ha=s(Pc,"P",{});var _v=r(Ha);ou=c(_v,"Disables custom attention processors and sets the default attention implementation."),_v.forEach(t),Pc.forEach(t),K.forEach(t),Tl=d(o),qe=s(o,"H2",{class:!0});var jc=r(qe);Co=s(jc,"A",{id:!0,class:!0,href:!0});var gv=r(Co);ei=s(gv,"SPAN",{});var bv=r(ei);u(vn.$$.fragment,bv),bv.forEach(t),gv.forEach(t),tu=d(jc),oi=s(jc,"SPAN",{});var vv=r(oi);nu=c(vv,"DecoderOutput"),vv.forEach(t),jc.forEach(t),kl=d(o),Je=s(o,"DIV",{class:!0});var Ic=r(Je);u(yn.$$.fragment,Ic),su=d(Ic),ti=s(Ic,"P",{});var yv=r(ti);ru=c(yv,"Output of decoding method."),yv.forEach(t),Ic.forEach(t),Al=d(o),ze=s(o,"H2",{class:!0});var Oc=r(ze);Fo=s(Oc,"A",{id:!0,class:!0,href:!0});var xv=r(Fo);ni=s(xv,"SPAN",{});var wv=r(ni);u(xn.$$.fragment,wv),wv.forEach(t),xv.forEach(t),au=d(Oc),si=s(Oc,"SPAN",{});var $v=r(si);iu=c($v,"VQEncoderOutput"),$v.forEach(t),Oc.forEach(t),Nl=d(o),We=s(o,"DIV",{class:!0});var Lc=r(We);u(wn.$$.fragment,Lc),du=d(Lc),ri=s(Lc,"P",{});var Mv=r(ri);lu=c(Mv,"Output of VQModel encoding method."),Mv.forEach(t),Lc.forEach(t),Ul=d(o),Xe=s(o,"H2",{class:!0});var Vc=r(Xe);Po=s(Vc,"A",{id:!0,class:!0,href:!0});var Dv=r(Po);ai=s(Dv,"SPAN",{});var Tv=r(ai);u($n.$$.fragment,Tv),Tv.forEach(t),Dv.forEach(t),cu=d(Vc),ii=s(Vc,"SPAN",{});var kv=r(ii);mu=c(kv,"VQModel"),kv.forEach(t),Vc.forEach(t),El=d(o),ee=s(o,"DIV",{class:!0});var yt=r(ee);u(Mn.$$.fragment,yt),pu=d(yt),di=s(yt,"P",{});var Av=r(di);fu=c(Av,`VQ-VAE model from the paper Neural Discrete Representation Learning by Aaron van den Oord, Oriol Vinyals and Koray | |
| Kavukcuoglu.`),Av.forEach(t),uu=d(yt),Dn=s(yt,"P",{});var Bc=r(Dn);hu=c(Bc,"This model inherits from "),Pr=s(Bc,"A",{href:!0});var Nv=r(Pr);_u=c(Nv,"ModelMixin"),Nv.forEach(t),gu=c(Bc,`. Check the superclass documentation for the generic methods the library | |
| implements for all the model (such as downloading or saving, etc.)`),Bc.forEach(t),bu=d(yt),jr=s(yt,"DIV",{class:!0});var Uv=r(jr);u(Tn.$$.fragment,Uv),Uv.forEach(t),yt.forEach(t),Cl=d(o),Se=s(o,"H2",{class:!0});var qc=r(Se);jo=s(qc,"A",{id:!0,class:!0,href:!0});var Ev=r(jo);li=s(Ev,"SPAN",{});var Cv=r(li);u(kn.$$.fragment,Cv),Cv.forEach(t),Ev.forEach(t),vu=d(qc),ci=s(qc,"SPAN",{});var Fv=r(ci);yu=c(Fv,"AutoencoderKLOutput"),Fv.forEach(t),qc.forEach(t),Fl=d(o),Ge=s(o,"DIV",{class:!0});var Jc=r(Ge);u(An.$$.fragment,Jc),xu=d(Jc),mi=s(Jc,"P",{});var Pv=r(mi);wu=c(Pv,"Output of AutoencoderKL encoding method."),Pv.forEach(t),Jc.forEach(t),Pl=d(o),Ke=s(o,"H2",{class:!0});var zc=r(Ke);Io=s(zc,"A",{id:!0,class:!0,href:!0});var jv=r(Io);pi=s(jv,"SPAN",{});var Iv=r(pi);u(Nn.$$.fragment,Iv),Iv.forEach(t),jv.forEach(t),$u=d(zc),fi=s(zc,"SPAN",{});var Ov=r(fi);Mu=c(Ov,"AutoencoderKL"),Ov.forEach(t),zc.forEach(t),jl=d(o),A=s(o,"DIV",{class:!0});var U=r(A);u(Un.$$.fragment,U),Du=d(U),ui=s(U,"P",{});var Lv=r(ui);Tu=c(Lv,`Variational Autoencoder (VAE) model with KL loss from the paper Auto-Encoding Variational Bayes by Diederik P. Kingma | |
| and Max Welling.`),Lv.forEach(t),ku=d(U),En=s(U,"P",{});var Wc=r(En);Au=c(Wc,"This model inherits from "),Ir=s(Wc,"A",{href:!0});var Vv=r(Ir);Nu=c(Vv,"ModelMixin"),Vv.forEach(t),Uu=c(Wc,`. Check the superclass documentation for the generic methods the library | |
| implements for all the model (such as downloading or saving, etc.)`),Wc.forEach(t),Eu=d(U),Oo=s(U,"DIV",{class:!0});var Xc=r(Oo);u(Cn.$$.fragment,Xc),Cu=d(Xc),Fn=s(Xc,"P",{});var Sc=r(Fn);Fu=c(Sc,"Disable sliced VAE decoding. If "),hi=s(Sc,"CODE",{});var Bv=r(hi);Pu=c(Bv,"enable_slicing"),Bv.forEach(t),ju=c(Sc,` was previously invoked, this method will go back to computing | |
| decoding in one step.`),Sc.forEach(t),Xc.forEach(t),Iu=d(U),Lo=s(U,"DIV",{class:!0});var Gc=r(Lo);u(Pn.$$.fragment,Gc),Ou=d(Gc),jn=s(Gc,"P",{});var Kc=r(jn);Lu=c(Kc,"Disable tiled VAE decoding. If "),_i=s(Kc,"CODE",{});var qv=r(_i);Vu=c(qv,"enable_vae_tiling"),qv.forEach(t),Bu=c(Kc,` was previously invoked, this method will go back to | |
| computing decoding in one step.`),Kc.forEach(t),Gc.forEach(t),qu=d(U),Vo=s(U,"DIV",{class:!0});var Rc=r(Vo);u(In.$$.fragment,Rc),Ju=d(Rc),gi=s(Rc,"P",{});var Jv=r(gi);zu=c(Jv,`Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to | |
| compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.`),Jv.forEach(t),Rc.forEach(t),Wu=d(U),Bo=s(U,"DIV",{class:!0});var Zc=r(Bo);u(On.$$.fragment,Zc),Xu=d(Zc),bi=s(Zc,"P",{});var zv=r(bi);Su=c(zv,`Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to | |
| compute decoding and encoding in several steps. This is useful to save a large amount of memory and to allow | |
| the processing of larger images.`),zv.forEach(t),Zc.forEach(t),Gu=d(U),Or=s(U,"DIV",{class:!0});var Wv=r(Or);u(Ln.$$.fragment,Wv),Wv.forEach(t),Ku=d(U),Lr=s(U,"DIV",{class:!0});var Xv=r(Lr);u(Vn.$$.fragment,Xv),Xv.forEach(t),Ru=d(U),qo=s(U,"DIV",{class:!0});var Yc=r(qo);u(Bn.$$.fragment,Yc),Zu=d(Yc),vi=s(Yc,"P",{});var Sv=r(vi);Yu=c(Sv,"Disables custom attention processors and sets the default attention implementation."),Sv.forEach(t),Yc.forEach(t),Qu=d(U),Jo=s(U,"DIV",{class:!0});var Qc=r(Jo);u(qn.$$.fragment,Qc),Hu=d(Qc),yi=s(Qc,"P",{});var Gv=r(yi);eh=c(Gv,"Decode a batch of images using a tiled decoder."),Gv.forEach(t),Qc.forEach(t),oh=d(U),zo=s(U,"DIV",{class:!0});var Hc=r(zo);u(Jn.$$.fragment,Hc),th=d(Hc),xi=s(Hc,"P",{});var Kv=r(xi);nh=c(Kv,"Encode a batch of images using a tiled encoder."),Kv.forEach(t),Hc.forEach(t),U.forEach(t),Il=d(o),Re=s(o,"H2",{class:!0});var em=r(Re);Wo=s(em,"A",{id:!0,class:!0,href:!0});var Rv=r(Wo);wi=s(Rv,"SPAN",{});var Zv=r(wi);u(zn.$$.fragment,Zv),Zv.forEach(t),Rv.forEach(t),sh=d(em),$i=s(em,"SPAN",{});var Yv=r($i);rh=c(Yv,"Transformer2DModel"),Yv.forEach(t),em.forEach(t),Ol=d(o),O=s(o,"DIV",{class:!0});var ie=r(O);u(Wn.$$.fragment,ie),ah=d(ie),Mi=s(ie,"P",{});var Qv=r(Mi);ih=c(Qv,`Transformer model for image-like data. Takes either discrete (classes of vector embeddings) or continuous (actual | |
| embeddings) inputs.`),Qv.forEach(t),dh=d(ie),Di=s(ie,"P",{});var Hv=r(Di);lh=c(Hv,`When input is continuous: First, project the input (aka embedding) and reshape to b, t, d. Then apply standard | |
| transformer action. Finally, reshape to image.`),Hv.forEach(t),ch=d(ie),Xn=s(ie,"P",{});var om=r(Xn);mh=c(om,`When input is discrete: First, input (classes of latent pixels) is converted to embeddings and has positional | |
| embeddings applied, see `),Ti=s(om,"CODE",{});var ey=r(Ti);ph=c(ey,"ImagePositionalEmbeddings"),ey.forEach(t),fh=c(om,`. Then apply standard transformer action. Finally, predict | |
| classes of unnoised image.`),om.forEach(t),uh=d(ie),ki=s(ie,"P",{});var oy=r(ki);hh=c(oy,`Note that it is assumed one of the input classes is the masked latent pixel. The predicted classes of the unnoised | |
| image do not contain a prediction for the masked pixel as the unnoised image cannot be masked.`),oy.forEach(t),_h=d(ie),Vr=s(ie,"DIV",{class:!0});var ty=r(Vr);u(Sn.$$.fragment,ty),ty.forEach(t),ie.forEach(t),Ll=d(o),Ze=s(o,"H2",{class:!0});var tm=r(Ze);Xo=s(tm,"A",{id:!0,class:!0,href:!0});var ny=r(Xo);Ai=s(ny,"SPAN",{});var sy=r(Ai);u(Gn.$$.fragment,sy),sy.forEach(t),ny.forEach(t),gh=d(tm),Ni=s(tm,"SPAN",{});var ry=r(Ni);bh=c(ry,"Transformer2DModelOutput"),ry.forEach(t),tm.forEach(t),Vl=d(o),Kn=s(o,"DIV",{class:!0});var ay=r(Kn);u(Rn.$$.fragment,ay),ay.forEach(t),Bl=d(o),Ye=s(o,"H2",{class:!0});var nm=r(Ye);So=s(nm,"A",{id:!0,class:!0,href:!0});var iy=r(So);Ui=s(iy,"SPAN",{});var dy=r(Ui);u(Zn.$$.fragment,dy),dy.forEach(t),iy.forEach(t),vh=d(nm),Ei=s(nm,"SPAN",{});var ly=r(Ei);yh=c(ly,"TransformerTemporalModel"),ly.forEach(t),nm.forEach(t),ql=d(o),ce=s(o,"DIV",{class:!0});var oa=r(ce);u(Yn.$$.fragment,oa),xh=d(oa),Ci=s(oa,"P",{});var cy=r(Ci);wh=c(cy,"Transformer model for video-like data."),cy.forEach(t),$h=d(oa),Br=s(oa,"DIV",{class:!0});var my=r(Br);u(Qn.$$.fragment,my),my.forEach(t),oa.forEach(t),Jl=d(o),Qe=s(o,"H2",{class:!0});var sm=r(Qe);Go=s(sm,"A",{id:!0,class:!0,href:!0});var py=r(Go);Fi=s(py,"SPAN",{});var fy=r(Fi);u(Hn.$$.fragment,fy),fy.forEach(t),py.forEach(t),Mh=d(sm),Pi=s(sm,"SPAN",{});var uy=r(Pi);Dh=c(uy,"Transformer2DModelOutput"),uy.forEach(t),sm.forEach(t),zl=d(o),es=s(o,"DIV",{class:!0});var hy=r(es);u(os.$$.fragment,hy),hy.forEach(t),Wl=d(o),He=s(o,"H2",{class:!0});var rm=r(He);Ko=s(rm,"A",{id:!0,class:!0,href:!0});var _y=r(Ko);ji=s(_y,"SPAN",{});var gy=r(ji);u(ts.$$.fragment,gy),gy.forEach(t),_y.forEach(t),Th=d(rm),Ii=s(rm,"SPAN",{});var by=r(Ii);kh=c(by,"PriorTransformer"),by.forEach(t),rm.forEach(t),Xl=d(o),j=s(o,"DIV",{class:!0});var R=r(j);u(ns.$$.fragment,R),Ah=d(R),Oi=s(R,"P",{});var vy=r(Oi);Nh=c(vy,`The prior transformer from unCLIP is used to predict CLIP image embeddings from CLIP text embeddings. Note that the | |
| transformer predicts the image embeddings through a denoising diffusion process.`),vy.forEach(t),Uh=d(R),ss=s(R,"P",{});var am=r(ss);Eh=c(am,"This model inherits from "),qr=s(am,"A",{href:!0});var yy=r(qr);Ch=c(yy,"ModelMixin"),yy.forEach(t),Fh=c(am,`. Check the superclass documentation for the generic methods the library | |
| implements for all the models (such as downloading or saving, etc.)`),am.forEach(t),Ph=d(R),Jr=s(R,"P",{});var Yb=r(Jr);jh=c(Yb,"For more details, see the original paper: "),rs=s(Yb,"A",{href:!0,rel:!0});var xy=r(rs);Ih=c(xy,"https://arxiv.org/abs/2204.06125"),xy.forEach(t),Yb.forEach(t),Oh=d(R),zr=s(R,"DIV",{class:!0});var wy=r(zr);u(as.$$.fragment,wy),wy.forEach(t),Lh=d(R),Wr=s(R,"DIV",{class:!0});var $y=r(Wr);u(is.$$.fragment,$y),$y.forEach(t),Vh=d(R),Ro=s(R,"DIV",{class:!0});var im=r(Ro);u(ds.$$.fragment,im),Bh=d(im),Li=s(im,"P",{});var My=r(Li);qh=c(My,"Disables custom attention processors and sets the default attention implementation."),My.forEach(t),im.forEach(t),R.forEach(t),Sl=d(o),eo=s(o,"H2",{class:!0});var dm=r(eo);Zo=s(dm,"A",{id:!0,class:!0,href:!0});var Dy=r(Zo);Vi=s(Dy,"SPAN",{});var Ty=r(Vi);u(ls.$$.fragment,Ty),Ty.forEach(t),Dy.forEach(t),Jh=d(dm),Bi=s(dm,"SPAN",{});var ky=r(Bi);zh=c(ky,"PriorTransformerOutput"),ky.forEach(t),dm.forEach(t),Gl=d(o),cs=s(o,"DIV",{class:!0});var Ay=r(cs);u(ms.$$.fragment,Ay),Ay.forEach(t),Kl=d(o),oo=s(o,"H2",{class:!0});var lm=r(oo);Yo=s(lm,"A",{id:!0,class:!0,href:!0});var Ny=r(Yo);qi=s(Ny,"SPAN",{});var Uy=r(qi);u(ps.$$.fragment,Uy),Uy.forEach(t),Ny.forEach(t),Wh=d(lm),Ji=s(lm,"SPAN",{});var Ey=r(Ji);Xh=c(Ey,"ControlNetOutput"),Ey.forEach(t),lm.forEach(t),Rl=d(o),fs=s(o,"DIV",{class:!0});var Cy=r(fs);u(us.$$.fragment,Cy),Cy.forEach(t),Zl=d(o),to=s(o,"H2",{class:!0});var cm=r(to);Qo=s(cm,"A",{id:!0,class:!0,href:!0});var Fy=r(Qo);zi=s(Fy,"SPAN",{});var Py=r(zi);u(hs.$$.fragment,Py),Py.forEach(t),Fy.forEach(t),Sh=d(cm),Wi=s(cm,"SPAN",{});var jy=r(Wi);Gh=c(jy,"ControlNetModel"),jy.forEach(t),cm.forEach(t),Yl=d(o),J=s(o,"DIV",{class:!0});var Me=r(J);u(_s.$$.fragment,Me),Kh=d(Me),Ho=s(Me,"DIV",{class:!0});var mm=r(Ho);u(gs.$$.fragment,mm),Rh=d(mm),Xi=s(mm,"P",{});var Iy=r(Xi);Zh=c(Iy,"Instantiate Controlnet class from UNet2DConditionModel."),Iy.forEach(t),mm.forEach(t),Yh=d(Me),xe=s(Me,"DIV",{class:!0});var ta=r(xe);u(bs.$$.fragment,ta),Qh=d(ta),Si=s(ta,"P",{});var Oy=r(Si);Hh=c(Oy,"Enable sliced attention computation."),Oy.forEach(t),e_=d(ta),Gi=s(ta,"P",{});var Ly=r(Gi);o_=c(Ly,`When this option is enabled, the attention module will split the input tensor in slices, to compute attention | |
| in several steps. This is useful to save some memory in exchange for a small speed decrease.`),Ly.forEach(t),ta.forEach(t),t_=d(Me),Xr=s(Me,"DIV",{class:!0});var Vy=r(Xr);u(vs.$$.fragment,Vy),Vy.forEach(t),n_=d(Me),et=s(Me,"DIV",{class:!0});var pm=r(et);u(ys.$$.fragment,pm),s_=d(pm),Ki=s(pm,"P",{});var By=r(Ki);r_=c(By,"Disables custom attention processors and sets the default attention implementation."),By.forEach(t),pm.forEach(t),Me.forEach(t),Ql=d(o),no=s(o,"H2",{class:!0});var fm=r(no);ot=s(fm,"A",{id:!0,class:!0,href:!0});var qy=r(ot);Ri=s(qy,"SPAN",{});var Jy=r(Ri);u(xs.$$.fragment,Jy),Jy.forEach(t),qy.forEach(t),a_=d(fm),Zi=s(fm,"SPAN",{});var zy=r(Zi);i_=c(zy,"FlaxModelMixin"),zy.forEach(t),fm.forEach(t),Hl=d(o),C=s(o,"DIV",{class:!0});var q=r(C);u(ws.$$.fragment,q),d_=d(q),Yi=s(q,"P",{});var Wy=r(Yi);l_=c(Wy,"Base class for all flax models."),Wy.forEach(t),c_=d(q),Sr=s(q,"P",{});var Qb=r(Sr);Gr=s(Qb,"A",{href:!0});var Xy=r(Gr);m_=c(Xy,"FlaxModelMixin"),Xy.forEach(t),p_=c(Qb,` takes care of storing the configuration of the models and handles methods for loading, | |
| downloading and saving models.`),Qb.forEach(t),f_=d(q),X=s(q,"DIV",{class:!0});var De=r(X);u($s.$$.fragment,De),u_=d(De),Qi=s(De,"P",{});var Sy=r(Qi);h_=c(Sy,"Instantiate a pretrained flax model from a pre-trained model configuration."),Sy.forEach(t),__=d(De),Ms=s(De,"P",{});var um=r(Ms);g_=c(um,"The warning "),Hi=s(um,"EM",{});var Gy=r(Hi);b_=c(Gy,"Weights from XXX not initialized from pretrained model"),Gy.forEach(t),v_=c(um,` means that the weights of XXX do not come | |
| pretrained with the rest of the model. It is up to you to train those weights with a downstream fine-tuning | |
| task.`),um.forEach(t),y_=d(De),Ds=s(De,"P",{});var hm=r(Ds);x_=c(hm,"The warning "),ed=s(hm,"EM",{});var Ky=r(ed);w_=c(Ky,"Weights from XXX not used in YYY"),Ky.forEach(t),$_=c(hm,` means that the layer XXX is not used by YYY, therefore those | |
| weights are discarded.`),hm.forEach(t),M_=d(De),u(tt.$$.fragment,De),De.forEach(t),D_=d(q),nt=s(q,"DIV",{class:!0});var _m=r(nt);u(Ts.$$.fragment,_m),T_=d(_m),ks=s(_m,"P",{});var gm=r(ks);k_=c(gm,`Save a model and its configuration file to a directory, so that it can be re-loaded using the | |
| `),od=s(gm,"CODE",{});var Ry=r(od);A_=c(Ry,"[from_pretrained()](/docs/diffusers/main/en/api/models#diffusers.FlaxModelMixin.from_pretrained)"),Ry.forEach(t),N_=c(gm," class method"),gm.forEach(t),_m.forEach(t),U_=d(q),re=s(q,"DIV",{class:!0});var xt=r(re);u(As.$$.fragment,xt),E_=d(xt),oe=s(xt,"P",{});var Te=r(oe);C_=c(Te,"Cast the floating-point "),td=s(Te,"CODE",{});var Zy=r(td);F_=c(Zy,"params"),Zy.forEach(t),P_=c(Te," to "),nd=s(Te,"CODE",{});var Yy=r(nd);j_=c(Yy,"jax.numpy.bfloat16"),Yy.forEach(t),I_=c(Te,". This returns a new "),sd=s(Te,"CODE",{});var Qy=r(sd);O_=c(Qy,"params"),Qy.forEach(t),L_=c(Te,` tree and does not cast | |
| the `),rd=s(Te,"CODE",{});var Hy=r(rd);V_=c(Hy,"params"),Hy.forEach(t),B_=c(Te," in place."),Te.forEach(t),q_=d(xt),ad=s(xt,"P",{});var e1=r(ad);J_=c(e1,`This method can be used on TPU to explicitly convert the model parameters to bfloat16 precision to do full | |
| half-precision training or to save weights in bfloat16 for inference in order to save memory and improve speed.`),e1.forEach(t),z_=d(xt),u(st.$$.fragment,xt),xt.forEach(t),W_=d(q),ae=s(q,"DIV",{class:!0});var wt=r(ae);u(Ns.$$.fragment,wt),X_=d(wt),te=s(wt,"P",{});var ke=r(te);S_=c(ke,"Cast the floating-point "),id=s(ke,"CODE",{});var o1=r(id);G_=c(o1,"params"),o1.forEach(t),K_=c(ke," to "),dd=s(ke,"CODE",{});var t1=r(dd);R_=c(t1,"jax.numpy.float16"),t1.forEach(t),Z_=c(ke,". This returns a new "),ld=s(ke,"CODE",{});var n1=r(ld);Y_=c(n1,"params"),n1.forEach(t),Q_=c(ke,` tree and does not cast the | |
| `),cd=s(ke,"CODE",{});var s1=r(cd);H_=c(s1,"params"),s1.forEach(t),eg=c(ke," in place."),ke.forEach(t),og=d(wt),md=s(wt,"P",{});var r1=r(md);tg=c(r1,`This method can be used on GPU to explicitly convert the model parameters to float16 precision to do full | |
| half-precision training or to save weights in float16 for inference in order to save memory and improve speed.`),r1.forEach(t),ng=d(wt),u(rt.$$.fragment,wt),wt.forEach(t),sg=d(q),we=s(q,"DIV",{class:!0});var na=r(we);u(Us.$$.fragment,na),rg=d(na),ne=s(na,"P",{});var Ae=r(ne);ag=c(Ae,"Cast the floating-point "),pd=s(Ae,"CODE",{});var a1=r(pd);ig=c(a1,"params"),a1.forEach(t),dg=c(Ae," to "),fd=s(Ae,"CODE",{});var i1=r(fd);lg=c(i1,"jax.numpy.float32"),i1.forEach(t),cg=c(Ae,`. This method can be used to explicitly convert the | |
| model parameters to fp32 precision. This returns a new `),ud=s(Ae,"CODE",{});var d1=r(ud);mg=c(d1,"params"),d1.forEach(t),pg=c(Ae," tree and does not cast the "),hd=s(Ae,"CODE",{});var l1=r(hd);fg=c(l1,"params"),l1.forEach(t),ug=c(Ae," in place."),Ae.forEach(t),hg=d(na),u(at.$$.fragment,na),na.forEach(t),q.forEach(t),ec=d(o),so=s(o,"H2",{class:!0});var bm=r(so);it=s(bm,"A",{id:!0,class:!0,href:!0});var c1=r(it);_d=s(c1,"SPAN",{});var m1=r(_d);u(Es.$$.fragment,m1),m1.forEach(t),c1.forEach(t),_g=d(bm),gd=s(bm,"SPAN",{});var p1=r(gd);gg=c(p1,"FlaxUNet2DConditionOutput"),p1.forEach(t),bm.forEach(t),oc=d(o),ro=s(o,"DIV",{class:!0});var vm=r(ro);u(Cs.$$.fragment,vm),bg=d(vm),dt=s(vm,"DIV",{class:!0});var ym=r(dt);u(Fs.$$.fragment,ym),vg=d(ym),bd=s(ym,"P",{});var f1=r(bd);yg=c(f1,"\u201CReturns a new object replacing the specified fields with new values."),f1.forEach(t),ym.forEach(t),vm.forEach(t),tc=d(o),ao=s(o,"H2",{class:!0});var xm=r(ao);lt=s(xm,"A",{id:!0,class:!0,href:!0});var u1=r(lt);vd=s(u1,"SPAN",{});var h1=r(vd);u(Ps.$$.fragment,h1),h1.forEach(t),u1.forEach(t),xg=d(xm),yd=s(xm,"SPAN",{});var _1=r(yd);wg=c(_1,"FlaxUNet2DConditionModel"),_1.forEach(t),xm.forEach(t),nc=d(o),L=s(o,"DIV",{class:!0});var de=r(L);u(js.$$.fragment,de),$g=d(de),xd=s(de,"P",{});var g1=r(xd);Mg=c(g1,`FlaxUNet2DConditionModel is a conditional 2D UNet model that takes in a noisy sample, conditional state, and a | |
| timestep and returns sample shaped output.`),g1.forEach(t),Dg=d(de),Is=s(de,"P",{});var wm=r(Is);Tg=c(wm,"This model inherits from "),Kr=s(wm,"A",{href:!0});var b1=r(Kr);kg=c(b1,"FlaxModelMixin"),b1.forEach(t),Ag=c(wm,`. Check the superclass documentation for the generic methods the library | |
| implements for all the models (such as downloading or saving, etc.)`),wm.forEach(t),Ng=d(de),Os=s(de,"P",{});var $m=r(Os);Ug=c($m,"Also, this model is a Flax Linen "),Ls=s($m,"A",{href:!0,rel:!0});var v1=r(Ls);Eg=c(v1,"flax.linen.Module"),v1.forEach(t),Cg=c($m,` | |
| subclass. Use it as a regular Flax linen Module and refer to the Flax documentation for all matter related to | |
| general usage and behavior.`),$m.forEach(t),Fg=d(de),wd=s(de,"P",{});var y1=r(wd);Pg=c(y1,"Finally, this model supports inherent JAX features such as:"),y1.forEach(t),jg=d(de),me=s(de,"UL",{});var $t=r(me);$d=s($t,"LI",{});var x1=r($d);Vs=s(x1,"A",{href:!0,rel:!0});var w1=r(Vs);Ig=c(w1,"Just-In-Time (JIT) compilation"),w1.forEach(t),x1.forEach(t),Og=d($t),Md=s($t,"LI",{});var $1=r(Md);Bs=s($1,"A",{href:!0,rel:!0});var M1=r(Bs);Lg=c(M1,"Automatic Differentiation"),M1.forEach(t),$1.forEach(t),Vg=d($t),Dd=s($t,"LI",{});var D1=r(Dd);qs=s(D1,"A",{href:!0,rel:!0});var T1=r(qs);Bg=c(T1,"Vectorization"),T1.forEach(t),D1.forEach(t),qg=d($t),Td=s($t,"LI",{});var k1=r(Td);Js=s(k1,"A",{href:!0,rel:!0});var A1=r(Js);Jg=c(A1,"Parallelization"),A1.forEach(t),k1.forEach(t),$t.forEach(t),de.forEach(t),sc=d(o),io=s(o,"H2",{class:!0});var Mm=r(io);ct=s(Mm,"A",{id:!0,class:!0,href:!0});var N1=r(ct);kd=s(N1,"SPAN",{});var U1=r(kd);u(zs.$$.fragment,U1),U1.forEach(t),N1.forEach(t),zg=d(Mm),Ad=s(Mm,"SPAN",{});var E1=r(Ad);Wg=c(E1,"FlaxDecoderOutput"),E1.forEach(t),Mm.forEach(t),rc=d(o),pe=s(o,"DIV",{class:!0});var sa=r(pe);u(Ws.$$.fragment,sa),Xg=d(sa),Nd=s(sa,"P",{});var C1=r(Nd);Sg=c(C1,"Output of decoding method."),C1.forEach(t),Gg=d(sa),mt=s(sa,"DIV",{class:!0});var Dm=r(mt);u(Xs.$$.fragment,Dm),Kg=d(Dm),Ud=s(Dm,"P",{});var F1=r(Ud);Rg=c(F1,"\u201CReturns a new object replacing the specified fields with new values."),F1.forEach(t),Dm.forEach(t),sa.forEach(t),ac=d(o),lo=s(o,"H2",{class:!0});var Tm=r(lo);pt=s(Tm,"A",{id:!0,class:!0,href:!0});var P1=r(pt);Ed=s(P1,"SPAN",{});var j1=r(Ed);u(Ss.$$.fragment,j1),j1.forEach(t),P1.forEach(t),Zg=d(Tm),Cd=s(Tm,"SPAN",{});var I1=r(Cd);Yg=c(I1,"FlaxAutoencoderKLOutput"),I1.forEach(t),Tm.forEach(t),ic=d(o),fe=s(o,"DIV",{class:!0});var ra=r(fe);u(Gs.$$.fragment,ra),Qg=d(ra),Fd=s(ra,"P",{});var O1=r(Fd);Hg=c(O1,"Output of AutoencoderKL encoding method."),O1.forEach(t),eb=d(ra),ft=s(ra,"DIV",{class:!0});var km=r(ft);u(Ks.$$.fragment,km),ob=d(km),Pd=s(km,"P",{});var L1=r(Pd);tb=c(L1,"\u201CReturns a new object replacing the specified fields with new values."),L1.forEach(t),km.forEach(t),ra.forEach(t),dc=d(o),co=s(o,"H2",{class:!0});var Am=r(co);ut=s(Am,"A",{id:!0,class:!0,href:!0});var V1=r(ut);jd=s(V1,"SPAN",{});var B1=r(jd);u(Rs.$$.fragment,B1),B1.forEach(t),V1.forEach(t),nb=d(Am),Id=s(Am,"SPAN",{});var q1=r(Id);sb=c(q1,"FlaxAutoencoderKL"),q1.forEach(t),Am.forEach(t),lc=d(o),z=s(o,"DIV",{class:!0});var Ne=r(z);u(Zs.$$.fragment,Ne),rb=d(Ne),Od=s(Ne,"P",{});var J1=r(Od);ab=c(J1,`Flax Implementation of Variational Autoencoder (VAE) model with KL loss from the paper Auto-Encoding Variational | |
| Bayes by Diederik P. Kingma and Max Welling.`),J1.forEach(t),ib=d(Ne),Ys=s(Ne,"P",{});var Nm=r(Ys);db=c(Nm,"This model is a Flax Linen "),Qs=s(Nm,"A",{href:!0,rel:!0});var z1=r(Qs);lb=c(z1,"flax.linen.Module"),z1.forEach(t),cb=c(Nm,` | |
| subclass. Use it as a regular Flax linen Module and refer to the Flax documentation for all matter related to | |
| general usage and behavior.`),Nm.forEach(t),mb=d(Ne),Ld=s(Ne,"P",{});var W1=r(Ld);pb=c(W1,"Finally, this model supports inherent JAX features such as:"),W1.forEach(t),fb=d(Ne),ue=s(Ne,"UL",{});var Mt=r(ue);Vd=s(Mt,"LI",{});var X1=r(Vd);Hs=s(X1,"A",{href:!0,rel:!0});var S1=r(Hs);ub=c(S1,"Just-In-Time (JIT) compilation"),S1.forEach(t),X1.forEach(t),hb=d(Mt),Bd=s(Mt,"LI",{});var G1=r(Bd);er=s(G1,"A",{href:!0,rel:!0});var K1=r(er);_b=c(K1,"Automatic Differentiation"),K1.forEach(t),G1.forEach(t),gb=d(Mt),qd=s(Mt,"LI",{});var R1=r(qd);or=s(R1,"A",{href:!0,rel:!0});var Z1=r(or);bb=c(Z1,"Vectorization"),Z1.forEach(t),R1.forEach(t),vb=d(Mt),Jd=s(Mt,"LI",{});var Y1=r(Jd);tr=s(Y1,"A",{href:!0,rel:!0});var Q1=r(tr);yb=c(Q1,"Parallelization"),Q1.forEach(t),Y1.forEach(t),Mt.forEach(t),Ne.forEach(t),cc=d(o),mo=s(o,"H2",{class:!0});var Um=r(mo);ht=s(Um,"A",{id:!0,class:!0,href:!0});var H1=r(ht);zd=s(H1,"SPAN",{});var ex=r(zd);u(nr.$$.fragment,ex),ex.forEach(t),H1.forEach(t),xb=d(Um),Wd=s(Um,"SPAN",{});var ox=r(Wd);wb=c(ox,"FlaxControlNetOutput"),ox.forEach(t),Um.forEach(t),mc=d(o),po=s(o,"DIV",{class:!0});var Em=r(po);u(sr.$$.fragment,Em),$b=d(Em),_t=s(Em,"DIV",{class:!0});var Cm=r(_t);u(rr.$$.fragment,Cm),Mb=d(Cm),Xd=s(Cm,"P",{});var tx=r(Xd);Db=c(tx,"\u201CReturns a new object replacing the specified fields with new values."),tx.forEach(t),Cm.forEach(t),Em.forEach(t),pc=d(o),fo=s(o,"H2",{class:!0});var Fm=r(fo);gt=s(Fm,"A",{id:!0,class:!0,href:!0});var nx=r(gt);Sd=s(nx,"SPAN",{});var sx=r(Sd);u(ar.$$.fragment,sx),sx.forEach(t),nx.forEach(t),Tb=d(Fm),Gd=s(Fm,"SPAN",{});var rx=r(Gd);kb=c(rx,"FlaxControlNetModel"),rx.forEach(t),Fm.forEach(t),fc=d(o),V=s(o,"DIV",{class:!0});var le=r(V);u(ir.$$.fragment,le),Ab=d(le),dr=s(le,"P",{});var Pm=r(dr);Nb=c(Pm,"Quoting from "),lr=s(Pm,"A",{href:!0,rel:!0});var ax=r(lr);Ub=c(ax,"https://arxiv.org/abs/2302.05543"),ax.forEach(t),Eb=c(Pm,`: \u201CStable Diffusion uses a pre-processing method similar to VQ-GAN | |
| [11] to convert the entire dataset of 512 \xD7 512 images into smaller 64 \xD7 64 \u201Clatent images\u201D for stabilized | |
| training. This requires ControlNets to convert image-based conditions to 64 \xD7 64 feature space to match the | |
| convolution size. We use a tiny network E(\xB7) of four convolution layers with 4 \xD7 4 kernels and 2 \xD7 2 strides | |
| (activated by ReLU, channels are 16, 32, 64, 128, initialized with Gaussian weights, trained jointly with the full | |
| model) to encode image-space conditions \u2026 into feature maps \u2026\u201D`),Pm.forEach(t),Cb=d(le),cr=s(le,"P",{});var jm=r(cr);Fb=c(jm,"This model inherits from "),Rr=s(jm,"A",{href:!0});var ix=r(Rr);Pb=c(ix,"FlaxModelMixin"),ix.forEach(t),jb=c(jm,`. Check the superclass documentation for the generic methods the library | |
| implements for all the models (such as downloading or saving, etc.)`),jm.forEach(t),Ib=d(le),mr=s(le,"P",{});var Im=r(mr);Ob=c(Im,"Also, this model is a Flax Linen "),pr=s(Im,"A",{href:!0,rel:!0});var dx=r(pr);Lb=c(dx,"flax.linen.Module"),dx.forEach(t),Vb=c(Im,` | |
| subclass. Use it as a regular Flax linen Module and refer to the Flax documentation for all matter related to | |
| general usage and behavior.`),Im.forEach(t),Bb=d(le),Kd=s(le,"P",{});var lx=r(Kd);qb=c(lx,"Finally, this model supports inherent JAX features such as:"),lx.forEach(t),Jb=d(le),he=s(le,"UL",{});var Dt=r(he);Rd=s(Dt,"LI",{});var cx=r(Rd);fr=s(cx,"A",{href:!0,rel:!0});var mx=r(fr);zb=c(mx,"Just-In-Time (JIT) compilation"),mx.forEach(t),cx.forEach(t),Wb=d(Dt),Zd=s(Dt,"LI",{});var px=r(Zd);ur=s(px,"A",{href:!0,rel:!0});var fx=r(ur);Xb=c(fx,"Automatic Differentiation"),fx.forEach(t),px.forEach(t),Sb=d(Dt),Yd=s(Dt,"LI",{});var ux=r(Yd);hr=s(ux,"A",{href:!0,rel:!0});var hx=r(hr);Gb=c(hx,"Vectorization"),hx.forEach(t),ux.forEach(t),Kb=d(Dt),Qd=s(Dt,"LI",{});var _x=r(Qd);_r=s(_x,"A",{href:!0,rel:!0});var gx=r(_r);Rb=c(gx,"Parallelization"),gx.forEach(t),_x.forEach(t),Dt.forEach(t),le.forEach(t),this.h()},h(){a(x,"name","hf:doc:metadata"),a(x,"content",JSON.stringify(jx)),a(w,"id","models"),a(w,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(w,"href","#models"),a(M,"class","relative group"),al.a=il,a(ho,"id","diffusers.ModelMixin"),a(ho,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(ho,"href","#diffusers.ModelMixin"),a(Ee,"class","relative group"),a(wr,"href","/docs/diffusers/main/en/api/models#diffusers.ModelMixin"),a($r,"href","/docs/diffusers/main/en/api/models#diffusers.ModelMixin.save_pretrained"),a(ge,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(_o,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(be,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(I,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(yo,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(xo,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(wo,"id","diffusers.models.unet_2d.UNet2DOutput"),a(wo,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(wo,"href","#diffusers.models.unet_2d.UNet2DOutput"),a(Fe,"class","relative group"),a(Vt,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a($o,"id","diffusers.UNet2DModel"),a($o,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a($o,"href","#diffusers.UNet2DModel"),a(Pe,"class","relative group"),a(Mr,"href","/docs/diffusers/main/en/api/models#diffusers.ModelMixin"),a(Dr,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Mo,"id","diffusers.models.unet_1d.UNet1DOutput"),a(Mo,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(Mo,"href","#diffusers.models.unet_1d.UNet1DOutput"),a(je,"class","relative group"),a(St,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Do,"id","diffusers.UNet1DModel"),a(Do,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(Do,"href","#diffusers.UNet1DModel"),a(Ie,"class","relative group"),a(Tr,"href","/docs/diffusers/main/en/api/models#diffusers.ModelMixin"),a(kr,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(H,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(To,"id","diffusers.models.unet_2d_condition.UNet2DConditionOutput"),a(To,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(To,"href","#diffusers.models.unet_2d_condition.UNet2DConditionOutput"),a(Oe,"class","relative group"),a(Ht,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(ko,"id","diffusers.UNet2DConditionModel"),a(ko,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(ko,"href","#diffusers.UNet2DConditionModel"),a(Le,"class","relative group"),a(Ar,"href","/docs/diffusers/main/en/api/models#diffusers.ModelMixin"),a(Nr,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(ve,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Ur,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Ao,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(F,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(No,"id","diffusers.models.unet_3d_condition.UNet3DConditionOutput"),a(No,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(No,"href","#diffusers.models.unet_3d_condition.UNet3DConditionOutput"),a(Ve,"class","relative group"),a(cn,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Uo,"id","diffusers.UNet3DConditionModel"),a(Uo,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(Uo,"href","#diffusers.UNet3DConditionModel"),a(Be,"class","relative group"),a(Er,"href","/docs/diffusers/main/en/api/models#diffusers.ModelMixin"),a(Cr,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(ye,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Fr,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Eo,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Co,"id","diffusers.models.vae.DecoderOutput"),a(Co,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(Co,"href","#diffusers.models.vae.DecoderOutput"),a(qe,"class","relative group"),a(Je,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Fo,"id","diffusers.models.vq_model.VQEncoderOutput"),a(Fo,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(Fo,"href","#diffusers.models.vq_model.VQEncoderOutput"),a(ze,"class","relative group"),a(We,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Po,"id","diffusers.VQModel"),a(Po,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(Po,"href","#diffusers.VQModel"),a(Xe,"class","relative group"),a(Pr,"href","/docs/diffusers/main/en/api/models#diffusers.ModelMixin"),a(jr,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(ee,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(jo,"id","diffusers.models.autoencoder_kl.AutoencoderKLOutput"),a(jo,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(jo,"href","#diffusers.models.autoencoder_kl.AutoencoderKLOutput"),a(Se,"class","relative group"),a(Ge,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Io,"id","diffusers.AutoencoderKL"),a(Io,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(Io,"href","#diffusers.AutoencoderKL"),a(Ke,"class","relative group"),a(Ir,"href","/docs/diffusers/main/en/api/models#diffusers.ModelMixin"),a(Oo,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Lo,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Vo,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Bo,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Or,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Lr,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(qo,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Jo,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(zo,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(A,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Wo,"id","diffusers.Transformer2DModel"),a(Wo,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(Wo,"href","#diffusers.Transformer2DModel"),a(Re,"class","relative group"),a(Vr,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(O,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Xo,"id","diffusers.models.transformer_2d.Transformer2DModelOutput"),a(Xo,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(Xo,"href","#diffusers.models.transformer_2d.Transformer2DModelOutput"),a(Ze,"class","relative group"),a(Kn,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(So,"id","diffusers.models.transformer_temporal.TransformerTemporalModel"),a(So,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(So,"href","#diffusers.models.transformer_temporal.TransformerTemporalModel"),a(Ye,"class","relative group"),a(Br,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(ce,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Go,"id","diffusers.models.transformer_temporal.TransformerTemporalModelOutput"),a(Go,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(Go,"href","#diffusers.models.transformer_temporal.TransformerTemporalModelOutput"),a(Qe,"class","relative group"),a(es,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Ko,"id","diffusers.PriorTransformer"),a(Ko,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(Ko,"href","#diffusers.PriorTransformer"),a(He,"class","relative group"),a(qr,"href","/docs/diffusers/main/en/api/models#diffusers.ModelMixin"),a(rs,"href","https://arxiv.org/abs/2204.06125"),a(rs,"rel","nofollow"),a(zr,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Wr,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Ro,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Zo,"id","diffusers.models.prior_transformer.PriorTransformerOutput"),a(Zo,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(Zo,"href","#diffusers.models.prior_transformer.PriorTransformerOutput"),a(eo,"class","relative group"),a(cs,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Yo,"id","diffusers.models.controlnet.ControlNetOutput"),a(Yo,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(Yo,"href","#diffusers.models.controlnet.ControlNetOutput"),a(oo,"class","relative group"),a(fs,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Qo,"id","diffusers.ControlNetModel"),a(Qo,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(Qo,"href","#diffusers.ControlNetModel"),a(to,"class","relative group"),a(Ho,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(xe,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(Xr,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(et,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(J,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(ot,"id","diffusers.FlaxModelMixin"),a(ot,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(ot,"href","#diffusers.FlaxModelMixin"),a(no,"class","relative group"),a(Gr,"href","/docs/diffusers/main/en/api/models#diffusers.FlaxModelMixin"),a(X,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(nt,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(re,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(ae,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(we,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(it,"id","diffusers.models.unet_2d_condition_flax.FlaxUNet2DConditionOutput"),a(it,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(it,"href","#diffusers.models.unet_2d_condition_flax.FlaxUNet2DConditionOutput"),a(so,"class","relative group"),a(dt,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(ro,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(lt,"id","diffusers.FlaxUNet2DConditionModel"),a(lt,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(lt,"href","#diffusers.FlaxUNet2DConditionModel"),a(ao,"class","relative group"),a(Kr,"href","/docs/diffusers/main/en/api/models#diffusers.FlaxModelMixin"),a(Ls,"href","https://flax.readthedocs.io/en/latest/flax.linen.html#module"),a(Ls,"rel","nofollow"),a(Vs,"href","https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit"),a(Vs,"rel","nofollow"),a(Bs,"href","https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation"),a(Bs,"rel","nofollow"),a(qs,"href","https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap"),a(qs,"rel","nofollow"),a(Js,"href","https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap"),a(Js,"rel","nofollow"),a(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(ct,"id","diffusers.models.vae_flax.FlaxDecoderOutput"),a(ct,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(ct,"href","#diffusers.models.vae_flax.FlaxDecoderOutput"),a(io,"class","relative group"),a(mt,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(pe,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(pt,"id","diffusers.models.vae_flax.FlaxAutoencoderKLOutput"),a(pt,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(pt,"href","#diffusers.models.vae_flax.FlaxAutoencoderKLOutput"),a(lo,"class","relative group"),a(ft,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(fe,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(ut,"id","diffusers.FlaxAutoencoderKL"),a(ut,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(ut,"href","#diffusers.FlaxAutoencoderKL"),a(co,"class","relative group"),a(Qs,"href","https://flax.readthedocs.io/en/latest/flax.linen.html#module"),a(Qs,"rel","nofollow"),a(Hs,"href","https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit"),a(Hs,"rel","nofollow"),a(er,"href","https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation"),a(er,"rel","nofollow"),a(or,"href","https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap"),a(or,"rel","nofollow"),a(tr,"href","https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap"),a(tr,"rel","nofollow"),a(z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(ht,"id","diffusers.models.controlnet_flax.FlaxControlNetOutput"),a(ht,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(ht,"href","#diffusers.models.controlnet_flax.FlaxControlNetOutput"),a(mo,"class","relative group"),a(_t,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(po,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),a(gt,"id","diffusers.FlaxControlNetModel"),a(gt,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),a(gt,"href","#diffusers.FlaxControlNetModel"),a(fo,"class","relative group"),a(lr,"href","https://arxiv.org/abs/2302.05543"),a(lr,"rel","nofollow"),a(Rr,"href","/docs/diffusers/main/en/api/models#diffusers.FlaxModelMixin"),a(pr,"href","https://flax.readthedocs.io/en/latest/flax.linen.html#module"),a(pr,"rel","nofollow"),a(fr,"href","https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit"),a(fr,"rel","nofollow"),a(ur,"href","https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation"),a(ur,"rel","nofollow"),a(hr,"href","https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap"),a(hr,"rel","nofollow"),a(_r,"href","https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap"),a(_r,"rel","nofollow"),a(V,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(o,m){e(document.head,x),p(o,T,m),p(o,M,m),e(M,w),e(w,D),h(v,D,null),e(M,$),e(M,se),e(se,Ue),p(o,Z,m),p(o,Y,m),e(Y,yr),al.m(vx,Y),e(Y,il),e(Y,aa),e(aa,Om),e(Y,Lm),p(o,dl,m),p(o,Ee,m),e(Ee,ho),e(ho,ia),h(Tt,ia,null),e(Ee,Vm),e(Ee,da),e(da,Bm),p(o,ll,m),p(o,N,m),h(kt,N,null),e(N,qm),e(N,la),e(la,Jm),e(N,zm),e(N,xr),e(xr,wr),e(wr,Wm),e(xr,Xm),e(N,Sm),e(N,ca),e(ca,_e),e(_e,ma),e(ma,Gm),e(_e,Km),e(_e,pa),e(pa,Rm),e(_e,Zm),e(_e,$r),e($r,Ym),e(_e,Qm),e(N,Hm),e(N,ge),h(At,ge,null),e(ge,ep),e(ge,fa),e(fa,op),e(ge,tp),e(ge,ua),e(ua,np),e(N,sp),e(N,_o),h(Nt,_o,null),e(_o,rp),e(_o,ha),e(ha,ap),e(N,ip),e(N,be),h(Ut,be,null),e(be,dp),e(be,_a),e(_a,lp),e(be,cp),e(be,ga),e(ga,mp),e(N,pp),e(N,W),h(Et,W,null),e(W,fp),e(W,ba),e(ba,up),e(W,hp),e(W,va),e(va,_p),e(W,gp),e(W,ya),e(ya,bp),e(W,vp),h(go,W,null),e(N,yp),e(N,I),h(Ct,I,null),e(I,xp),e(I,xa),e(xa,wp),e(I,$p),e(I,Ce),e(Ce,Mp),e(Ce,wa),e(wa,Dp),e(Ce,Tp),e(Ce,$a),e($a,kp),e(Ce,Ap),e(I,Np),e(I,Ft),e(Ft,Up),e(Ft,Ma),e(Ma,Ep),e(Ft,Cp),e(I,Fp),e(I,Pt),e(Pt,Pp),e(Pt,Da),e(Da,jp),e(Pt,Ip),e(I,Op),h(bo,I,null),e(I,Lp),h(vo,I,null),e(N,Vp),e(N,yo),h(jt,yo,null),e(yo,Bp),e(yo,Ta),e(Ta,qp),e(N,Jp),e(N,xo),h(It,xo,null),e(xo,zp),e(xo,Ot),e(Ot,Wp),e(Ot,ka),e(ka,Xp),e(Ot,Sp),p(o,cl,m),p(o,Fe,m),e(Fe,wo),e(wo,Aa),h(Lt,Aa,null),e(Fe,Gp),e(Fe,Na),e(Na,Kp),p(o,ml,m),p(o,Vt,m),h(Bt,Vt,null),p(o,pl,m),p(o,Pe,m),e(Pe,$o),e($o,Ua),h(qt,Ua,null),e(Pe,Rp),e(Pe,Ea),e(Ea,Zp),p(o,fl,m),p(o,Q,m),h(Jt,Q,null),e(Q,Yp),e(Q,Ca),e(Ca,Qp),e(Q,Hp),e(Q,zt),e(zt,ef),e(zt,Mr),e(Mr,of),e(zt,tf),e(Q,nf),e(Q,Dr),h(Wt,Dr,null),p(o,ul,m),p(o,je,m),e(je,Mo),e(Mo,Fa),h(Xt,Fa,null),e(je,sf),e(je,Pa),e(Pa,rf),p(o,hl,m),p(o,St,m),h(Gt,St,null),p(o,_l,m),p(o,Ie,m),e(Ie,Do),e(Do,ja),h(Kt,ja,null),e(Ie,af),e(Ie,Ia),e(Ia,df),p(o,gl,m),p(o,H,m),h(Rt,H,null),e(H,lf),e(H,Oa),e(Oa,cf),e(H,mf),e(H,Zt),e(Zt,pf),e(Zt,Tr),e(Tr,ff),e(Zt,uf),e(H,hf),e(H,kr),h(Yt,kr,null),p(o,bl,m),p(o,Oe,m),e(Oe,To),e(To,La),h(Qt,La,null),e(Oe,_f),e(Oe,Va),e(Va,gf),p(o,vl,m),p(o,Ht,m),h(en,Ht,null),p(o,yl,m),p(o,Le,m),e(Le,ko),e(ko,Ba),h(on,Ba,null),e(Le,bf),e(Le,qa),e(qa,vf),p(o,xl,m),p(o,F,m),h(tn,F,null),e(F,yf),e(F,Ja),e(Ja,xf),e(F,wf),e(F,nn),e(nn,$f),e(nn,Ar),e(Ar,Mf),e(nn,Df),e(F,Tf),e(F,Nr),h(sn,Nr,null),e(F,kf),e(F,ve),h(rn,ve,null),e(ve,Af),e(ve,za),e(za,Nf),e(ve,Uf),e(ve,Wa),e(Wa,Ef),e(F,Cf),e(F,Ur),h(an,Ur,null),e(F,Ff),e(F,Ao),h(dn,Ao,null),e(Ao,Pf),e(Ao,Xa),e(Xa,jf),p(o,wl,m),p(o,Ve,m),e(Ve,No),e(No,Sa),h(ln,Sa,null),e(Ve,If),e(Ve,Ga),e(Ga,Of),p(o,$l,m),p(o,cn,m),h(mn,cn,null),p(o,Ml,m),p(o,Be,m),e(Be,Uo),e(Uo,Ka),h(pn,Ka,null),e(Be,Lf),e(Be,Ra),e(Ra,Vf),p(o,Dl,m),p(o,P,m),h(fn,P,null),e(P,Bf),e(P,Za),e(Za,qf),e(P,Jf),e(P,un),e(un,zf),e(un,Er),e(Er,Wf),e(un,Xf),e(P,Sf),e(P,Cr),h(hn,Cr,null),e(P,Gf),e(P,ye),h(_n,ye,null),e(ye,Kf),e(ye,Ya),e(Ya,Rf),e(ye,Zf),e(ye,Qa),e(Qa,Yf),e(P,Qf),e(P,Fr),h(gn,Fr,null),e(P,Hf),e(P,Eo),h(bn,Eo,null),e(Eo,eu),e(Eo,Ha),e(Ha,ou),p(o,Tl,m),p(o,qe,m),e(qe,Co),e(Co,ei),h(vn,ei,null),e(qe,tu),e(qe,oi),e(oi,nu),p(o,kl,m),p(o,Je,m),h(yn,Je,null),e(Je,su),e(Je,ti),e(ti,ru),p(o,Al,m),p(o,ze,m),e(ze,Fo),e(Fo,ni),h(xn,ni,null),e(ze,au),e(ze,si),e(si,iu),p(o,Nl,m),p(o,We,m),h(wn,We,null),e(We,du),e(We,ri),e(ri,lu),p(o,Ul,m),p(o,Xe,m),e(Xe,Po),e(Po,ai),h($n,ai,null),e(Xe,cu),e(Xe,ii),e(ii,mu),p(o,El,m),p(o,ee,m),h(Mn,ee,null),e(ee,pu),e(ee,di),e(di,fu),e(ee,uu),e(ee,Dn),e(Dn,hu),e(Dn,Pr),e(Pr,_u),e(Dn,gu),e(ee,bu),e(ee,jr),h(Tn,jr,null),p(o,Cl,m),p(o,Se,m),e(Se,jo),e(jo,li),h(kn,li,null),e(Se,vu),e(Se,ci),e(ci,yu),p(o,Fl,m),p(o,Ge,m),h(An,Ge,null),e(Ge,xu),e(Ge,mi),e(mi,wu),p(o,Pl,m),p(o,Ke,m),e(Ke,Io),e(Io,pi),h(Nn,pi,null),e(Ke,$u),e(Ke,fi),e(fi,Mu),p(o,jl,m),p(o,A,m),h(Un,A,null),e(A,Du),e(A,ui),e(ui,Tu),e(A,ku),e(A,En),e(En,Au),e(En,Ir),e(Ir,Nu),e(En,Uu),e(A,Eu),e(A,Oo),h(Cn,Oo,null),e(Oo,Cu),e(Oo,Fn),e(Fn,Fu),e(Fn,hi),e(hi,Pu),e(Fn,ju),e(A,Iu),e(A,Lo),h(Pn,Lo,null),e(Lo,Ou),e(Lo,jn),e(jn,Lu),e(jn,_i),e(_i,Vu),e(jn,Bu),e(A,qu),e(A,Vo),h(In,Vo,null),e(Vo,Ju),e(Vo,gi),e(gi,zu),e(A,Wu),e(A,Bo),h(On,Bo,null),e(Bo,Xu),e(Bo,bi),e(bi,Su),e(A,Gu),e(A,Or),h(Ln,Or,null),e(A,Ku),e(A,Lr),h(Vn,Lr,null),e(A,Ru),e(A,qo),h(Bn,qo,null),e(qo,Zu),e(qo,vi),e(vi,Yu),e(A,Qu),e(A,Jo),h(qn,Jo,null),e(Jo,Hu),e(Jo,yi),e(yi,eh),e(A,oh),e(A,zo),h(Jn,zo,null),e(zo,th),e(zo,xi),e(xi,nh),p(o,Il,m),p(o,Re,m),e(Re,Wo),e(Wo,wi),h(zn,wi,null),e(Re,sh),e(Re,$i),e($i,rh),p(o,Ol,m),p(o,O,m),h(Wn,O,null),e(O,ah),e(O,Mi),e(Mi,ih),e(O,dh),e(O,Di),e(Di,lh),e(O,ch),e(O,Xn),e(Xn,mh),e(Xn,Ti),e(Ti,ph),e(Xn,fh),e(O,uh),e(O,ki),e(ki,hh),e(O,_h),e(O,Vr),h(Sn,Vr,null),p(o,Ll,m),p(o,Ze,m),e(Ze,Xo),e(Xo,Ai),h(Gn,Ai,null),e(Ze,gh),e(Ze,Ni),e(Ni,bh),p(o,Vl,m),p(o,Kn,m),h(Rn,Kn,null),p(o,Bl,m),p(o,Ye,m),e(Ye,So),e(So,Ui),h(Zn,Ui,null),e(Ye,vh),e(Ye,Ei),e(Ei,yh),p(o,ql,m),p(o,ce,m),h(Yn,ce,null),e(ce,xh),e(ce,Ci),e(Ci,wh),e(ce,$h),e(ce,Br),h(Qn,Br,null),p(o,Jl,m),p(o,Qe,m),e(Qe,Go),e(Go,Fi),h(Hn,Fi,null),e(Qe,Mh),e(Qe,Pi),e(Pi,Dh),p(o,zl,m),p(o,es,m),h(os,es,null),p(o,Wl,m),p(o,He,m),e(He,Ko),e(Ko,ji),h(ts,ji,null),e(He,Th),e(He,Ii),e(Ii,kh),p(o,Xl,m),p(o,j,m),h(ns,j,null),e(j,Ah),e(j,Oi),e(Oi,Nh),e(j,Uh),e(j,ss),e(ss,Eh),e(ss,qr),e(qr,Ch),e(ss,Fh),e(j,Ph),e(j,Jr),e(Jr,jh),e(Jr,rs),e(rs,Ih),e(j,Oh),e(j,zr),h(as,zr,null),e(j,Lh),e(j,Wr),h(is,Wr,null),e(j,Vh),e(j,Ro),h(ds,Ro,null),e(Ro,Bh),e(Ro,Li),e(Li,qh),p(o,Sl,m),p(o,eo,m),e(eo,Zo),e(Zo,Vi),h(ls,Vi,null),e(eo,Jh),e(eo,Bi),e(Bi,zh),p(o,Gl,m),p(o,cs,m),h(ms,cs,null),p(o,Kl,m),p(o,oo,m),e(oo,Yo),e(Yo,qi),h(ps,qi,null),e(oo,Wh),e(oo,Ji),e(Ji,Xh),p(o,Rl,m),p(o,fs,m),h(us,fs,null),p(o,Zl,m),p(o,to,m),e(to,Qo),e(Qo,zi),h(hs,zi,null),e(to,Sh),e(to,Wi),e(Wi,Gh),p(o,Yl,m),p(o,J,m),h(_s,J,null),e(J,Kh),e(J,Ho),h(gs,Ho,null),e(Ho,Rh),e(Ho,Xi),e(Xi,Zh),e(J,Yh),e(J,xe),h(bs,xe,null),e(xe,Qh),e(xe,Si),e(Si,Hh),e(xe,e_),e(xe,Gi),e(Gi,o_),e(J,t_),e(J,Xr),h(vs,Xr,null),e(J,n_),e(J,et),h(ys,et,null),e(et,s_),e(et,Ki),e(Ki,r_),p(o,Ql,m),p(o,no,m),e(no,ot),e(ot,Ri),h(xs,Ri,null),e(no,a_),e(no,Zi),e(Zi,i_),p(o,Hl,m),p(o,C,m),h(ws,C,null),e(C,d_),e(C,Yi),e(Yi,l_),e(C,c_),e(C,Sr),e(Sr,Gr),e(Gr,m_),e(Sr,p_),e(C,f_),e(C,X),h($s,X,null),e(X,u_),e(X,Qi),e(Qi,h_),e(X,__),e(X,Ms),e(Ms,g_),e(Ms,Hi),e(Hi,b_),e(Ms,v_),e(X,y_),e(X,Ds),e(Ds,x_),e(Ds,ed),e(ed,w_),e(Ds,$_),e(X,M_),h(tt,X,null),e(C,D_),e(C,nt),h(Ts,nt,null),e(nt,T_),e(nt,ks),e(ks,k_),e(ks,od),e(od,A_),e(ks,N_),e(C,U_),e(C,re),h(As,re,null),e(re,E_),e(re,oe),e(oe,C_),e(oe,td),e(td,F_),e(oe,P_),e(oe,nd),e(nd,j_),e(oe,I_),e(oe,sd),e(sd,O_),e(oe,L_),e(oe,rd),e(rd,V_),e(oe,B_),e(re,q_),e(re,ad),e(ad,J_),e(re,z_),h(st,re,null),e(C,W_),e(C,ae),h(Ns,ae,null),e(ae,X_),e(ae,te),e(te,S_),e(te,id),e(id,G_),e(te,K_),e(te,dd),e(dd,R_),e(te,Z_),e(te,ld),e(ld,Y_),e(te,Q_),e(te,cd),e(cd,H_),e(te,eg),e(ae,og),e(ae,md),e(md,tg),e(ae,ng),h(rt,ae,null),e(C,sg),e(C,we),h(Us,we,null),e(we,rg),e(we,ne),e(ne,ag),e(ne,pd),e(pd,ig),e(ne,dg),e(ne,fd),e(fd,lg),e(ne,cg),e(ne,ud),e(ud,mg),e(ne,pg),e(ne,hd),e(hd,fg),e(ne,ug),e(we,hg),h(at,we,null),p(o,ec,m),p(o,so,m),e(so,it),e(it,_d),h(Es,_d,null),e(so,_g),e(so,gd),e(gd,gg),p(o,oc,m),p(o,ro,m),h(Cs,ro,null),e(ro,bg),e(ro,dt),h(Fs,dt,null),e(dt,vg),e(dt,bd),e(bd,yg),p(o,tc,m),p(o,ao,m),e(ao,lt),e(lt,vd),h(Ps,vd,null),e(ao,xg),e(ao,yd),e(yd,wg),p(o,nc,m),p(o,L,m),h(js,L,null),e(L,$g),e(L,xd),e(xd,Mg),e(L,Dg),e(L,Is),e(Is,Tg),e(Is,Kr),e(Kr,kg),e(Is,Ag),e(L,Ng),e(L,Os),e(Os,Ug),e(Os,Ls),e(Ls,Eg),e(Os,Cg),e(L,Fg),e(L,wd),e(wd,Pg),e(L,jg),e(L,me),e(me,$d),e($d,Vs),e(Vs,Ig),e(me,Og),e(me,Md),e(Md,Bs),e(Bs,Lg),e(me,Vg),e(me,Dd),e(Dd,qs),e(qs,Bg),e(me,qg),e(me,Td),e(Td,Js),e(Js,Jg),p(o,sc,m),p(o,io,m),e(io,ct),e(ct,kd),h(zs,kd,null),e(io,zg),e(io,Ad),e(Ad,Wg),p(o,rc,m),p(o,pe,m),h(Ws,pe,null),e(pe,Xg),e(pe,Nd),e(Nd,Sg),e(pe,Gg),e(pe,mt),h(Xs,mt,null),e(mt,Kg),e(mt,Ud),e(Ud,Rg),p(o,ac,m),p(o,lo,m),e(lo,pt),e(pt,Ed),h(Ss,Ed,null),e(lo,Zg),e(lo,Cd),e(Cd,Yg),p(o,ic,m),p(o,fe,m),h(Gs,fe,null),e(fe,Qg),e(fe,Fd),e(Fd,Hg),e(fe,eb),e(fe,ft),h(Ks,ft,null),e(ft,ob),e(ft,Pd),e(Pd,tb),p(o,dc,m),p(o,co,m),e(co,ut),e(ut,jd),h(Rs,jd,null),e(co,nb),e(co,Id),e(Id,sb),p(o,lc,m),p(o,z,m),h(Zs,z,null),e(z,rb),e(z,Od),e(Od,ab),e(z,ib),e(z,Ys),e(Ys,db),e(Ys,Qs),e(Qs,lb),e(Ys,cb),e(z,mb),e(z,Ld),e(Ld,pb),e(z,fb),e(z,ue),e(ue,Vd),e(Vd,Hs),e(Hs,ub),e(ue,hb),e(ue,Bd),e(Bd,er),e(er,_b),e(ue,gb),e(ue,qd),e(qd,or),e(or,bb),e(ue,vb),e(ue,Jd),e(Jd,tr),e(tr,yb),p(o,cc,m),p(o,mo,m),e(mo,ht),e(ht,zd),h(nr,zd,null),e(mo,xb),e(mo,Wd),e(Wd,wb),p(o,mc,m),p(o,po,m),h(sr,po,null),e(po,$b),e(po,_t),h(rr,_t,null),e(_t,Mb),e(_t,Xd),e(Xd,Db),p(o,pc,m),p(o,fo,m),e(fo,gt),e(gt,Sd),h(ar,Sd,null),e(fo,Tb),e(fo,Gd),e(Gd,kb),p(o,fc,m),p(o,V,m),h(ir,V,null),e(V,Ab),e(V,dr),e(dr,Nb),e(dr,lr),e(lr,Ub),e(dr,Eb),e(V,Cb),e(V,cr),e(cr,Fb),e(cr,Rr),e(Rr,Pb),e(cr,jb),e(V,Ib),e(V,mr),e(mr,Ob),e(mr,pr),e(pr,Lb),e(mr,Vb),e(V,Bb),e(V,Kd),e(Kd,qb),e(V,Jb),e(V,he),e(he,Rd),e(Rd,fr),e(fr,zb),e(he,Wb),e(he,Zd),e(Zd,ur),e(ur,Xb),e(he,Sb),e(he,Yd),e(Yd,hr),e(hr,Gb),e(he,Kb),e(he,Qd),e(Qd,_r),e(_r,Rb),uc=!0},p(o,[m]){const gr={};m&2&&(gr.$$scope={dirty:m,ctx:o}),go.$set(gr);const Hd={};m&2&&(Hd.$$scope={dirty:m,ctx:o}),bo.$set(Hd);const el={};m&2&&(el.$$scope={dirty:m,ctx:o}),vo.$set(el);const ol={};m&2&&(ol.$$scope={dirty:m,ctx:o}),tt.$set(ol);const uo={};m&2&&(uo.$$scope={dirty:m,ctx:o}),st.$set(uo);const tl={};m&2&&(tl.$$scope={dirty:m,ctx:o}),rt.$set(tl);const br={};m&2&&(br.$$scope={dirty:m,ctx:o}),at.$set(br)},i(o){uc||(_(v.$$.fragment,o),_(Tt.$$.fragment,o),_(kt.$$.fragment,o),_(At.$$.fragment,o),_(Nt.$$.fragment,o),_(Ut.$$.fragment,o),_(Et.$$.fragment,o),_(go.$$.fragment,o),_(Ct.$$.fragment,o),_(bo.$$.fragment,o),_(vo.$$.fragment,o),_(jt.$$.fragment,o),_(It.$$.fragment,o),_(Lt.$$.fragment,o),_(Bt.$$.fragment,o),_(qt.$$.fragment,o),_(Jt.$$.fragment,o),_(Wt.$$.fragment,o),_(Xt.$$.fragment,o),_(Gt.$$.fragment,o),_(Kt.$$.fragment,o),_(Rt.$$.fragment,o),_(Yt.$$.fragment,o),_(Qt.$$.fragment,o),_(en.$$.fragment,o),_(on.$$.fragment,o),_(tn.$$.fragment,o),_(sn.$$.fragment,o),_(rn.$$.fragment,o),_(an.$$.fragment,o),_(dn.$$.fragment,o),_(ln.$$.fragment,o),_(mn.$$.fragment,o),_(pn.$$.fragment,o),_(fn.$$.fragment,o),_(hn.$$.fragment,o),_(_n.$$.fragment,o),_(gn.$$.fragment,o),_(bn.$$.fragment,o),_(vn.$$.fragment,o),_(yn.$$.fragment,o),_(xn.$$.fragment,o),_(wn.$$.fragment,o),_($n.$$.fragment,o),_(Mn.$$.fragment,o),_(Tn.$$.fragment,o),_(kn.$$.fragment,o),_(An.$$.fragment,o),_(Nn.$$.fragment,o),_(Un.$$.fragment,o),_(Cn.$$.fragment,o),_(Pn.$$.fragment,o),_(In.$$.fragment,o),_(On.$$.fragment,o),_(Ln.$$.fragment,o),_(Vn.$$.fragment,o),_(Bn.$$.fragment,o),_(qn.$$.fragment,o),_(Jn.$$.fragment,o),_(zn.$$.fragment,o),_(Wn.$$.fragment,o),_(Sn.$$.fragment,o),_(Gn.$$.fragment,o),_(Rn.$$.fragment,o),_(Zn.$$.fragment,o),_(Yn.$$.fragment,o),_(Qn.$$.fragment,o),_(Hn.$$.fragment,o),_(os.$$.fragment,o),_(ts.$$.fragment,o),_(ns.$$.fragment,o),_(as.$$.fragment,o),_(is.$$.fragment,o),_(ds.$$.fragment,o),_(ls.$$.fragment,o),_(ms.$$.fragment,o),_(ps.$$.fragment,o),_(us.$$.fragment,o),_(hs.$$.fragment,o),_(_s.$$.fragment,o),_(gs.$$.fragment,o),_(bs.$$.fragment,o),_(vs.$$.fragment,o),_(ys.$$.fragment,o),_(xs.$$.fragment,o),_(ws.$$.fragment,o),_($s.$$.fragment,o),_(tt.$$.fragment,o),_(Ts.$$.fragment,o),_(As.$$.fragment,o),_(st.$$.fragment,o),_(Ns.$$.fragment,o),_(rt.$$.fragment,o),_(Us.$$.fragment,o),_(at.$$.fragment,o),_(Es.$$.fragment,o),_(Cs.$$.fragment,o),_(Fs.$$.fragment,o),_(Ps.$$.fragment,o),_(js.$$.fragment,o),_(zs.$$.fragment,o),_(Ws.$$.fragment,o),_(Xs.$$.fragment,o),_(Ss.$$.fragment,o),_(Gs.$$.fragment,o),_(Ks.$$.fragment,o),_(Rs.$$.fragment,o),_(Zs.$$.fragment,o),_(nr.$$.fragment,o),_(sr.$$.fragment,o),_(rr.$$.fragment,o),_(ar.$$.fragment,o),_(ir.$$.fragment,o),uc=!0)},o(o){g(v.$$.fragment,o),g(Tt.$$.fragment,o),g(kt.$$.fragment,o),g(At.$$.fragment,o),g(Nt.$$.fragment,o),g(Ut.$$.fragment,o),g(Et.$$.fragment,o),g(go.$$.fragment,o),g(Ct.$$.fragment,o),g(bo.$$.fragment,o),g(vo.$$.fragment,o),g(jt.$$.fragment,o),g(It.$$.fragment,o),g(Lt.$$.fragment,o),g(Bt.$$.fragment,o),g(qt.$$.fragment,o),g(Jt.$$.fragment,o),g(Wt.$$.fragment,o),g(Xt.$$.fragment,o),g(Gt.$$.fragment,o),g(Kt.$$.fragment,o),g(Rt.$$.fragment,o),g(Yt.$$.fragment,o),g(Qt.$$.fragment,o),g(en.$$.fragment,o),g(on.$$.fragment,o),g(tn.$$.fragment,o),g(sn.$$.fragment,o),g(rn.$$.fragment,o),g(an.$$.fragment,o),g(dn.$$.fragment,o),g(ln.$$.fragment,o),g(mn.$$.fragment,o),g(pn.$$.fragment,o),g(fn.$$.fragment,o),g(hn.$$.fragment,o),g(_n.$$.fragment,o),g(gn.$$.fragment,o),g(bn.$$.fragment,o),g(vn.$$.fragment,o),g(yn.$$.fragment,o),g(xn.$$.fragment,o),g(wn.$$.fragment,o),g($n.$$.fragment,o),g(Mn.$$.fragment,o),g(Tn.$$.fragment,o),g(kn.$$.fragment,o),g(An.$$.fragment,o),g(Nn.$$.fragment,o),g(Un.$$.fragment,o),g(Cn.$$.fragment,o),g(Pn.$$.fragment,o),g(In.$$.fragment,o),g(On.$$.fragment,o),g(Ln.$$.fragment,o),g(Vn.$$.fragment,o),g(Bn.$$.fragment,o),g(qn.$$.fragment,o),g(Jn.$$.fragment,o),g(zn.$$.fragment,o),g(Wn.$$.fragment,o),g(Sn.$$.fragment,o),g(Gn.$$.fragment,o),g(Rn.$$.fragment,o),g(Zn.$$.fragment,o),g(Yn.$$.fragment,o),g(Qn.$$.fragment,o),g(Hn.$$.fragment,o),g(os.$$.fragment,o),g(ts.$$.fragment,o),g(ns.$$.fragment,o),g(as.$$.fragment,o),g(is.$$.fragment,o),g(ds.$$.fragment,o),g(ls.$$.fragment,o),g(ms.$$.fragment,o),g(ps.$$.fragment,o),g(us.$$.fragment,o),g(hs.$$.fragment,o),g(_s.$$.fragment,o),g(gs.$$.fragment,o),g(bs.$$.fragment,o),g(vs.$$.fragment,o),g(ys.$$.fragment,o),g(xs.$$.fragment,o),g(ws.$$.fragment,o),g($s.$$.fragment,o),g(tt.$$.fragment,o),g(Ts.$$.fragment,o),g(As.$$.fragment,o),g(st.$$.fragment,o),g(Ns.$$.fragment,o),g(rt.$$.fragment,o),g(Us.$$.fragment,o),g(at.$$.fragment,o),g(Es.$$.fragment,o),g(Cs.$$.fragment,o),g(Fs.$$.fragment,o),g(Ps.$$.fragment,o),g(js.$$.fragment,o),g(zs.$$.fragment,o),g(Ws.$$.fragment,o),g(Xs.$$.fragment,o),g(Ss.$$.fragment,o),g(Gs.$$.fragment,o),g(Ks.$$.fragment,o),g(Rs.$$.fragment,o),g(Zs.$$.fragment,o),g(nr.$$.fragment,o),g(sr.$$.fragment,o),g(rr.$$.fragment,o),g(ar.$$.fragment,o),g(ir.$$.fragment,o),uc=!1},d(o){t(x),o&&t(T),o&&t(M),b(v),o&&t(Z),o&&t(Y),o&&t(dl),o&&t(Ee),b(Tt),o&&t(ll),o&&t(N),b(kt),b(At),b(Nt),b(Ut),b(Et),b(go),b(Ct),b(bo),b(vo),b(jt),b(It),o&&t(cl),o&&t(Fe),b(Lt),o&&t(ml),o&&t(Vt),b(Bt),o&&t(pl),o&&t(Pe),b(qt),o&&t(fl),o&&t(Q),b(Jt),b(Wt),o&&t(ul),o&&t(je),b(Xt),o&&t(hl),o&&t(St),b(Gt),o&&t(_l),o&&t(Ie),b(Kt),o&&t(gl),o&&t(H),b(Rt),b(Yt),o&&t(bl),o&&t(Oe),b(Qt),o&&t(vl),o&&t(Ht),b(en),o&&t(yl),o&&t(Le),b(on),o&&t(xl),o&&t(F),b(tn),b(sn),b(rn),b(an),b(dn),o&&t(wl),o&&t(Ve),b(ln),o&&t($l),o&&t(cn),b(mn),o&&t(Ml),o&&t(Be),b(pn),o&&t(Dl),o&&t(P),b(fn),b(hn),b(_n),b(gn),b(bn),o&&t(Tl),o&&t(qe),b(vn),o&&t(kl),o&&t(Je),b(yn),o&&t(Al),o&&t(ze),b(xn),o&&t(Nl),o&&t(We),b(wn),o&&t(Ul),o&&t(Xe),b($n),o&&t(El),o&&t(ee),b(Mn),b(Tn),o&&t(Cl),o&&t(Se),b(kn),o&&t(Fl),o&&t(Ge),b(An),o&&t(Pl),o&&t(Ke),b(Nn),o&&t(jl),o&&t(A),b(Un),b(Cn),b(Pn),b(In),b(On),b(Ln),b(Vn),b(Bn),b(qn),b(Jn),o&&t(Il),o&&t(Re),b(zn),o&&t(Ol),o&&t(O),b(Wn),b(Sn),o&&t(Ll),o&&t(Ze),b(Gn),o&&t(Vl),o&&t(Kn),b(Rn),o&&t(Bl),o&&t(Ye),b(Zn),o&&t(ql),o&&t(ce),b(Yn),b(Qn),o&&t(Jl),o&&t(Qe),b(Hn),o&&t(zl),o&&t(es),b(os),o&&t(Wl),o&&t(He),b(ts),o&&t(Xl),o&&t(j),b(ns),b(as),b(is),b(ds),o&&t(Sl),o&&t(eo),b(ls),o&&t(Gl),o&&t(cs),b(ms),o&&t(Kl),o&&t(oo),b(ps),o&&t(Rl),o&&t(fs),b(us),o&&t(Zl),o&&t(to),b(hs),o&&t(Yl),o&&t(J),b(_s),b(gs),b(bs),b(vs),b(ys),o&&t(Ql),o&&t(no),b(xs),o&&t(Hl),o&&t(C),b(ws),b($s),b(tt),b(Ts),b(As),b(st),b(Ns),b(rt),b(Us),b(at),o&&t(ec),o&&t(so),b(Es),o&&t(oc),o&&t(ro),b(Cs),b(Fs),o&&t(tc),o&&t(ao),b(Ps),o&&t(nc),o&&t(L),b(js),o&&t(sc),o&&t(io),b(zs),o&&t(rc),o&&t(pe),b(Ws),b(Xs),o&&t(ac),o&&t(lo),b(Ss),o&&t(ic),o&&t(fe),b(Gs),b(Ks),o&&t(dc),o&&t(co),b(Rs),o&&t(lc),o&&t(z),b(Zs),o&&t(cc),o&&t(mo),b(nr),o&&t(mc),o&&t(po),b(sr),b(rr),o&&t(pc),o&&t(fo),b(ar),o&&t(fc),o&&t(V),b(ir)}}}const jx={local:"models",sections:[{local:"diffusers.ModelMixin",title:"ModelMixin"},{local:"diffusers.models.unet_2d.UNet2DOutput",title:"UNet2DOutput"},{local:"diffusers.UNet2DModel",title:"UNet2DModel"},{local:"diffusers.models.unet_1d.UNet1DOutput",title:"UNet1DOutput"},{local:"diffusers.UNet1DModel",title:"UNet1DModel"},{local:"diffusers.models.unet_2d_condition.UNet2DConditionOutput",title:"UNet2DConditionOutput"},{local:"diffusers.UNet2DConditionModel",title:"UNet2DConditionModel"},{local:"diffusers.models.unet_3d_condition.UNet3DConditionOutput",title:"UNet3DConditionOutput"},{local:"diffusers.UNet3DConditionModel",title:"UNet3DConditionModel"},{local:"diffusers.models.vae.DecoderOutput",title:"DecoderOutput"},{local:"diffusers.models.vq_model.VQEncoderOutput",title:"VQEncoderOutput"},{local:"diffusers.VQModel",title:"VQModel"},{local:"diffusers.models.autoencoder_kl.AutoencoderKLOutput",title:"AutoencoderKLOutput"},{local:"diffusers.AutoencoderKL",title:"AutoencoderKL"},{local:"diffusers.Transformer2DModel",title:"Transformer2DModel"},{local:"diffusers.models.transformer_2d.Transformer2DModelOutput",title:"Transformer2DModelOutput"},{local:"diffusers.models.transformer_temporal.TransformerTemporalModel",title:"TransformerTemporalModel"},{local:"diffusers.models.transformer_temporal.TransformerTemporalModelOutput",title:"Transformer2DModelOutput"},{local:"diffusers.PriorTransformer",title:"PriorTransformer"},{local:"diffusers.models.prior_transformer.PriorTransformerOutput",title:"PriorTransformerOutput"},{local:"diffusers.models.controlnet.ControlNetOutput",title:"ControlNetOutput"},{local:"diffusers.ControlNetModel",title:"ControlNetModel"},{local:"diffusers.FlaxModelMixin",title:"FlaxModelMixin"},{local:"diffusers.models.unet_2d_condition_flax.FlaxUNet2DConditionOutput",title:"FlaxUNet2DConditionOutput"},{local:"diffusers.FlaxUNet2DConditionModel",title:"FlaxUNet2DConditionModel"},{local:"diffusers.models.vae_flax.FlaxDecoderOutput",title:"FlaxDecoderOutput"},{local:"diffusers.models.vae_flax.FlaxAutoencoderKLOutput",title:"FlaxAutoencoderKLOutput"},{local:"diffusers.FlaxAutoencoderKL",title:"FlaxAutoencoderKL"},{local:"diffusers.models.controlnet_flax.FlaxControlNetOutput",title:"FlaxControlNetOutput"},{local:"diffusers.FlaxControlNetModel",title:"FlaxControlNetModel"}],title:"Models"};function Ix(B){return Tx(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class zx extends yx{constructor(x){super();xx(this,x,Ix,Px,wx,{})}}export{zx as default,jx as metadata}; | |
Xet Storage Details
- Size:
- 264 kB
- Xet hash:
- fd6751a6a6ff54fd939f05c625dda247fe649a9c8bc91dbea68123e2eaf3de1a
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.