Buckets:

hf-doc-build/doc / diffusers /v0.16.0 /en /_app /pages /using-diffusers /write_own_pipeline.mdx-hf-doc-builder.js
rtrm's picture
download
raw
57.8 kB
import{S as Do,i as So,s as To,e as n,k as h,w as c,t as o,M as qo,c as l,d as s,m as d,a as i,x as f,h as r,b as m,N as Po,G as t,g as p,y as g,q as _,o as b,B as v,v as No,L as Co}from"../../chunks/vendor-hf-doc-builder.js";import{T as ti}from"../../chunks/Tip-hf-doc-builder.js";import{I as ge}from"../../chunks/IconCopyLink-hf-doc-builder.js";import{C as k}from"../../chunks/CodeBlock-hf-doc-builder.js";import{D as Mo}from"../../chunks/DocNotebookDropdown-hf-doc-builder.js";function Ao(Q){let u,P,$,j,x;return{c(){u=n("p"),P=o("\u{1F4A1} Read the "),$=n("a"),j=o("How does Stable Diffusion work?"),x=o(" blog for more details about how the VAE, UNet, and text encoder models."),this.h()},l(y){u=l(y,"P",{});var w=i(u);P=r(w,"\u{1F4A1} Read the "),$=l(w,"A",{href:!0,rel:!0});var D=i($);j=r(D,"How does Stable Diffusion work?"),D.forEach(s),x=r(w," blog for more details about how the VAE, UNet, and text encoder models."),w.forEach(s),this.h()},h(){m($,"href","https://huggingface.co/blog/stable_diffusion#how-does-stable-diffusion-work"),m($,"rel","nofollow")},m(y,w){p(y,u,w),t(u,P),t(u,$),t($,j),t(u,x)},d(y){y&&s(u)}}}function Io(Q){let u,P,$,j,x;return{c(){u=n("p"),P=o("\u{1F4A1} The "),$=n("code"),j=o("guidance_scale"),x=o(" parameter determines how much weight should be given to the prompt when generating an image.")},l(y){u=l(y,"P",{});var w=i(u);P=r(w,"\u{1F4A1} The "),$=l(w,"CODE",{});var D=i($);j=r(D,"guidance_scale"),D.forEach(s),x=r(w," parameter determines how much weight should be given to the prompt when generating an image."),w.forEach(s)},m(y,w){p(y,u,w),t(u,P),t(u,$),t($,j),t(u,x)},d(y){y&&s(u)}}}function Lo(Q){let u,P,$,j,x,y,w,D;return w=new k({props:{code:"2 ** (len(vae.config.block_out_channels) - 1) == 8",highlighted:'<span class="hljs-number">2</span> ** (<span class="hljs-built_in">len</span>(vae.config.block_out_channels) - <span class="hljs-number">1</span>) == <span class="hljs-number">8</span>'}}),{c(){u=n("p"),P=o("\u{1F4A1} The height and width are divided by 8 because the "),$=n("code"),j=o("vae"),x=o(" model has 3 down-sampling layers. You can check by running the following:"),y=h(),c(w.$$.fragment)},l(E){u=l(E,"P",{});var N=i(u);P=r(N,"\u{1F4A1} The height and width are divided by 8 because the "),$=l(N,"CODE",{});var A=i($);j=r(A,"vae"),A.forEach(s),x=r(N," model has 3 down-sampling layers. You can check by running the following:"),N.forEach(s),y=d(E),f(w.$$.fragment,E)},m(E,N){p(E,u,N),t(u,P),t(u,$),t($,j),t(u,x),p(E,y,N),g(w,E,N),D=!0},p:Co,i(E){D||(_(w.$$.fragment,E),D=!0)},o(E){b(w.$$.fragment,E),D=!1},d(E){E&&s(u),E&&s(y),v(w,E)}}}function zo(Q){let u,P,$,j,x,y,w,D,E,N,A,$t,X,Fa,es,Ha,Ya,wt,ss,Wa,yt,H,Z,Ts,_e,Ga,qs,Ra,jt,ts,Ka,kt,be,Et,ve,as,ai,xt,ns,Ba,Pt,ee,Ja,Ns,Qa,Xa,Dt,ls,Za,St,S,$e,Cs,en,sn,we,tn,ye,Ms,an,nn,je,ln,ke,As,on,rn,Ee,pn,xe,Is,hn,dn,Pe,mn,Y,M,un,is,cn,fn,os,gn,_n,Ls,bn,vn,$n,De,wn,zs,yn,jn,Se,Us,kn,En,Te,Tt,T,xn,Os,Pn,Dn,Vs,Sn,Tn,Fs,qn,Nn,Hs,Cn,Mn,qt,rs,An,Nt,W,se,Ys,qe,In,Ws,Ln,Ct,te,zn,Gs,Un,On,Mt,ps,Vn,At,ae,It,I,Fn,hs,Hn,Yn,Ne,Rs,Wn,Gn,Lt,Ce,zt,L,Rn,ds,Kn,Bn,ms,Jn,Qn,Ut,Me,Ot,us,Xn,Vt,Ae,Ft,G,ne,Ks,Ie,Zn,Bs,el,Ht,cs,sl,Yt,le,Wt,fs,tl,Gt,Le,Rt,gs,al,Kt,ze,Bt,q,nl,Js,ll,il,Qs,ol,rl,Xs,pl,hl,Zs,dl,ml,Jt,Ue,Qt,_s,ul,Xt,Oe,Zt,R,ie,et,Ve,cl,st,fl,ea,oe,gl,tt,_l,bl,sa,re,ta,Fe,aa,K,pe,at,He,vl,nt,$l,na,z,wl,lt,yl,jl,bs,kl,El,la,Ye,ia,he,xl,it,Pl,Dl,oa,U,ot,Sl,Tl,rt,ql,Nl,pt,Cl,ra,We,pa,B,de,ht,Ge,Ml,dt,Al,ha,O,Il,mt,Ll,zl,ut,Ul,Ol,da,Re,ma,me,Vl,ct,Fl,Hl,ua,Ke,ca,Be,ft,ni,fa,J,ue,gt,Je,Yl,_t,Wl,ga,vs,Gl,_a,$s,Rl,ba,ws,Kl,va,ce,Qe,Bl,ys,Jl,Ql,Xl,Xe,Zl,js,ei,si,$a;return y=new ge({}),A=new Mo({props:{classNames:"absolute z-10 right-0 top-0",options:[{label:"Mixed",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers_doc/en/write_own_pipeline.ipynb"},{label:"PyTorch",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers_doc/en/pytorch/write_own_pipeline.ipynb"},{label:"TensorFlow",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers_doc/en/tensorflow/write_own_pipeline.ipynb"},{label:"Mixed",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/diffusers_doc/en/write_own_pipeline.ipynb"},{label:"PyTorch",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/diffusers_doc/en/pytorch/write_own_pipeline.ipynb"},{label:"TensorFlow",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/diffusers_doc/en/tensorflow/write_own_pipeline.ipynb"}]}}),_e=new ge({}),be=new k({props:{code:`from diffusers import DDPMPipeline
ddpm = DDPMPipeline.from_pretrained("google/ddpm-cat-256").to("cuda")
image = ddpm(num_inference_steps=25).images[0]
image`,highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DDPMPipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>ddpm = DDPMPipeline.from_pretrained(<span class="hljs-string">&quot;google/ddpm-cat-256&quot;</span>).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>image = ddpm(num_inference_steps=<span class="hljs-number">25</span>).images[<span class="hljs-number">0</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>image`}}),we=new k({props:{code:`from diffusers import DDPMScheduler, UNet2DModel
scheduler = DDPMScheduler.from_pretrained("google/ddpm-cat-256")
model = UNet2DModel.from_pretrained("google/ddpm-cat-256").to("cuda")`,highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DDPMScheduler, UNet2DModel
<span class="hljs-meta">&gt;&gt;&gt; </span>scheduler = DDPMScheduler.from_pretrained(<span class="hljs-string">&quot;google/ddpm-cat-256&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>model = UNet2DModel.from_pretrained(<span class="hljs-string">&quot;google/ddpm-cat-256&quot;</span>).to(<span class="hljs-string">&quot;cuda&quot;</span>)`}}),je=new k({props:{code:"scheduler.set_timesteps(50)",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>scheduler.set_timesteps(<span class="hljs-number">50</span>)'}}),Ee=new k({props:{code:"scheduler.timesteps",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>scheduler.timesteps
tensor([<span class="hljs-number">980</span>, <span class="hljs-number">960</span>, <span class="hljs-number">940</span>, <span class="hljs-number">920</span>, <span class="hljs-number">900</span>, <span class="hljs-number">880</span>, <span class="hljs-number">860</span>, <span class="hljs-number">840</span>, <span class="hljs-number">820</span>, <span class="hljs-number">800</span>, <span class="hljs-number">780</span>, <span class="hljs-number">760</span>, <span class="hljs-number">740</span>, <span class="hljs-number">720</span>,
<span class="hljs-number">700</span>, <span class="hljs-number">680</span>, <span class="hljs-number">660</span>, <span class="hljs-number">640</span>, <span class="hljs-number">620</span>, <span class="hljs-number">600</span>, <span class="hljs-number">580</span>, <span class="hljs-number">560</span>, <span class="hljs-number">540</span>, <span class="hljs-number">520</span>, <span class="hljs-number">500</span>, <span class="hljs-number">480</span>, <span class="hljs-number">460</span>, <span class="hljs-number">440</span>,
<span class="hljs-number">420</span>, <span class="hljs-number">400</span>, <span class="hljs-number">380</span>, <span class="hljs-number">360</span>, <span class="hljs-number">340</span>, <span class="hljs-number">320</span>, <span class="hljs-number">300</span>, <span class="hljs-number">280</span>, <span class="hljs-number">260</span>, <span class="hljs-number">240</span>, <span class="hljs-number">220</span>, <span class="hljs-number">200</span>, <span class="hljs-number">180</span>, <span class="hljs-number">160</span>,
<span class="hljs-number">140</span>, <span class="hljs-number">120</span>, <span class="hljs-number">100</span>, <span class="hljs-number">80</span>, <span class="hljs-number">60</span>, <span class="hljs-number">40</span>, <span class="hljs-number">20</span>, <span class="hljs-number">0</span>])`}}),Pe=new k({props:{code:`import torch
sample_size = model.config.sample_size
noise = torch.randn((1, 3, sample_size, sample_size)).to("cuda")`,highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span>sample_size = model.config.sample_size
<span class="hljs-meta">&gt;&gt;&gt; </span>noise = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">3</span>, sample_size, sample_size)).to(<span class="hljs-string">&quot;cuda&quot;</span>)`}}),De=new k({props:{code:`input = noise
for t in scheduler.timesteps:
with torch.no_grad():
noisy_residual = model(input, t).sample
previous_noisy_sample = scheduler.step(noisy_residual, t, input).prev_sample
input = previous_noisy_sample`,highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">input</span> = noise
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">for</span> t <span class="hljs-keyword">in</span> scheduler.timesteps:
<span class="hljs-meta">... </span> <span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-meta">... </span> noisy_residual = model(<span class="hljs-built_in">input</span>, t).sample
<span class="hljs-meta">&gt;&gt;&gt; </span>previous_noisy_sample = scheduler.step(noisy_residual, t, <span class="hljs-built_in">input</span>).prev_sample
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">input</span> = previous_noisy_sample`}}),Te=new k({props:{code:`from PIL import Image
import numpy as np
image = (input / 2 + 0.5).clamp(0, 1)
image = image.cpu().permute(0, 2, 3, 1).numpy()[0]
image = Image.fromarray((image * 255)).round().astype("uint8")
image`,highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-meta">&gt;&gt;&gt; </span>image = (<span class="hljs-built_in">input</span> / <span class="hljs-number">2</span> + <span class="hljs-number">0.5</span>).clamp(<span class="hljs-number">0</span>, <span class="hljs-number">1</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>image = image.cpu().permute(<span class="hljs-number">0</span>, <span class="hljs-number">2</span>, <span class="hljs-number">3</span>, <span class="hljs-number">1</span>).numpy()[<span class="hljs-number">0</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>image = Image.fromarray((image * <span class="hljs-number">255</span>)).<span class="hljs-built_in">round</span>().astype(<span class="hljs-string">&quot;uint8&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>image`}}),qe=new ge({}),ae=new ti({props:{$$slots:{default:[Ao]},$$scope:{ctx:Q}}}),Ce=new k({props:{code:`from PIL import Image
import torch
from transformers import CLIPTextModel, CLIPTokenizer
from diffusers import AutoencoderKL, UNet2DConditionModel, PNDMScheduler
vae = AutoencoderKL.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="vae")
tokenizer = CLIPTokenizer.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="tokenizer")
text_encoder = CLIPTextModel.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="text_encoder")
unet = UNet2DConditionModel.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="unet")`,highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> CLIPTextModel, CLIPTokenizer
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoencoderKL, UNet2DConditionModel, PNDMScheduler
<span class="hljs-meta">&gt;&gt;&gt; </span>vae = AutoencoderKL.from_pretrained(<span class="hljs-string">&quot;CompVis/stable-diffusion-v1-4&quot;</span>, subfolder=<span class="hljs-string">&quot;vae&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = CLIPTokenizer.from_pretrained(<span class="hljs-string">&quot;CompVis/stable-diffusion-v1-4&quot;</span>, subfolder=<span class="hljs-string">&quot;tokenizer&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>text_encoder = CLIPTextModel.from_pretrained(<span class="hljs-string">&quot;CompVis/stable-diffusion-v1-4&quot;</span>, subfolder=<span class="hljs-string">&quot;text_encoder&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>unet = UNet2DConditionModel.from_pretrained(<span class="hljs-string">&quot;CompVis/stable-diffusion-v1-4&quot;</span>, subfolder=<span class="hljs-string">&quot;unet&quot;</span>)`}}),Me=new k({props:{code:`from diffusers import UniPCMultistepScheduler
scheduler = UniPCMultistepScheduler.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="scheduler")`,highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> UniPCMultistepScheduler
<span class="hljs-meta">&gt;&gt;&gt; </span>scheduler = UniPCMultistepScheduler.from_pretrained(<span class="hljs-string">&quot;CompVis/stable-diffusion-v1-4&quot;</span>, subfolder=<span class="hljs-string">&quot;scheduler&quot;</span>)`}}),Ae=new k({props:{code:`torch_device = "cuda"
vae.to(torch_device)
text_encoder.to(torch_device)
unet.to(torch_device)`,highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>torch_device = <span class="hljs-string">&quot;cuda&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>vae.to(torch_device)
<span class="hljs-meta">&gt;&gt;&gt; </span>text_encoder.to(torch_device)
<span class="hljs-meta">&gt;&gt;&gt; </span>unet.to(torch_device)`}}),Ie=new ge({}),le=new ti({props:{$$slots:{default:[Io]},$$scope:{ctx:Q}}}),Le=new k({props:{code:`prompt = ["a photograph of an astronaut riding a horse"]
height = 512 # default height of Stable Diffusion
width = 512 # default width of Stable Diffusion
num_inference_steps = 25 # Number of denoising steps
guidance_scale = 7.5 # Scale for classifier-free guidance
generator = torch.manual_seed(0) # Seed generator to create the inital latent noise
batch_size = len(prompt)`,highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>prompt = [<span class="hljs-string">&quot;a photograph of an astronaut riding a horse&quot;</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>height = <span class="hljs-number">512</span> <span class="hljs-comment"># default height of Stable Diffusion</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>width = <span class="hljs-number">512</span> <span class="hljs-comment"># default width of Stable Diffusion</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>num_inference_steps = <span class="hljs-number">25</span> <span class="hljs-comment"># Number of denoising steps</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>guidance_scale = <span class="hljs-number">7.5</span> <span class="hljs-comment"># Scale for classifier-free guidance</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>generator = torch.manual_seed(<span class="hljs-number">0</span>) <span class="hljs-comment"># Seed generator to create the inital latent noise</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>batch_size = <span class="hljs-built_in">len</span>(prompt)`}}),ze=new k({props:{code:`text_input = tokenizer(
prompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt"
)
with torch.no_grad():
text_embeddings = text_encoder(text_input.input_ids.to(torch_device))[0]`,highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>text_input = tokenizer(
<span class="hljs-meta">... </span> prompt, padding=<span class="hljs-string">&quot;max_length&quot;</span>, max_length=tokenizer.model_max_length, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-meta">... </span> text_embeddings = text_encoder(text_input.input_ids.to(torch_device))[<span class="hljs-number">0</span>]`}}),Ue=new k({props:{code:`max_length = text_input.input_ids.shape[-1]
uncond_input = tokenizer([""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt")
uncond_embeddings = text_encoder(uncond_input.input_ids.to(torch_device))[0]`,highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>max_length = text_input.input_ids.shape[-<span class="hljs-number">1</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>uncond_input = tokenizer([<span class="hljs-string">&quot;&quot;</span>] * batch_size, padding=<span class="hljs-string">&quot;max_length&quot;</span>, max_length=max_length, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>uncond_embeddings = text_encoder(uncond_input.input_ids.to(torch_device))[<span class="hljs-number">0</span>]`}}),Oe=new k({props:{code:"text_embeddings = torch.cat([uncond_embeddings, text_embeddings])",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>text_embeddings = torch.cat([uncond_embeddings, text_embeddings])'}}),Ve=new ge({}),re=new ti({props:{$$slots:{default:[Lo]},$$scope:{ctx:Q}}}),Fe=new k({props:{code:`latents = torch.randn(
(batch_size, unet.in_channels, height // 8, width // 8),
generator=generator,
)
latents = latents.to(torch_device)`,highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>latents = torch.randn(
<span class="hljs-meta">... </span> (batch_size, unet.in_channels, height // <span class="hljs-number">8</span>, width // <span class="hljs-number">8</span>),
<span class="hljs-meta">... </span> generator=generator,
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>latents = latents.to(torch_device)`}}),He=new ge({}),Ye=new k({props:{code:"latents = latents * scheduler.init_noise_sigma",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>latents = latents * scheduler.init_noise_sigma'}}),We=new k({props:{code:`from tqdm.auto import tqdm
scheduler.set_timesteps(num_inference_steps)
for t in tqdm(scheduler.timesteps):
# expand the latents if we are doing classifier-free guidance to avoid doing two forward passes.
latent_model_input = torch.cat([latents] * 2)
latent_model_input = scheduler.scale_model_input(latent_model_input, timestep=t)
# predict the noise residual
with torch.no_grad():
noise_pred = unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample
# perform guidance
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
# compute the previous noisy sample x_t -> x_t-1
latents = scheduler.step(noise_pred, t, latents).prev_sample`,highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> tqdm.auto <span class="hljs-keyword">import</span> tqdm
<span class="hljs-meta">&gt;&gt;&gt; </span>scheduler.set_timesteps(num_inference_steps)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">for</span> t <span class="hljs-keyword">in</span> tqdm(scheduler.timesteps):
<span class="hljs-meta">... </span> <span class="hljs-comment"># expand the latents if we are doing classifier-free guidance to avoid doing two forward passes.</span>
<span class="hljs-meta">... </span> latent_model_input = torch.cat([latents] * <span class="hljs-number">2</span>)
<span class="hljs-meta">... </span> latent_model_input = scheduler.scale_model_input(latent_model_input, timestep=t)
<span class="hljs-meta">... </span> <span class="hljs-comment"># predict the noise residual</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-meta">... </span> noise_pred = unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample
<span class="hljs-meta">... </span> <span class="hljs-comment"># perform guidance</span>
<span class="hljs-meta">... </span> noise_pred_uncond, noise_pred_text = noise_pred.chunk(<span class="hljs-number">2</span>)
<span class="hljs-meta">... </span> noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
<span class="hljs-meta">... </span> <span class="hljs-comment"># compute the previous noisy sample x_t -&gt; x_t-1</span>
<span class="hljs-meta">... </span> latents = scheduler.step(noise_pred, t, latents).prev_sample`}}),Ge=new ge({}),Re=new k({props:{code:`# scale and decode the image latents with vae
latents = 1 / 0.18215 * latents
with torch.no_grad():
image = vae.decode(latents).sample`,highlighted:`<span class="hljs-comment"># scale and decode the image latents with vae</span>
latents = <span class="hljs-number">1</span> / <span class="hljs-number">0.18215</span> * latents
<span class="hljs-keyword">with</span> torch.no_grad():
image = vae.decode(latents).sample`}}),Ke=new k({props:{code:`image = (image / 2 + 0.5).clamp(0, 1)
image = image.detach().cpu().permute(0, 2, 3, 1).numpy()
images = (image * 255).round().astype("uint8")
pil_images = [Image.fromarray(image) for image in images]
pil_images[0]`,highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>image = (image / <span class="hljs-number">2</span> + <span class="hljs-number">0.5</span>).clamp(<span class="hljs-number">0</span>, <span class="hljs-number">1</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>image = image.detach().cpu().permute(<span class="hljs-number">0</span>, <span class="hljs-number">2</span>, <span class="hljs-number">3</span>, <span class="hljs-number">1</span>).numpy()
<span class="hljs-meta">&gt;&gt;&gt; </span>images = (image * <span class="hljs-number">255</span>).<span class="hljs-built_in">round</span>().astype(<span class="hljs-string">&quot;uint8&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>pil_images = [Image.fromarray(image) <span class="hljs-keyword">for</span> image <span class="hljs-keyword">in</span> images]
<span class="hljs-meta">&gt;&gt;&gt; </span>pil_images[<span class="hljs-number">0</span>]`}}),Je=new ge({}),{c(){u=n("meta"),P=h(),$=n("h1"),j=n("a"),x=n("span"),c(y.$$.fragment),w=h(),D=n("span"),E=o("Understanding pipelines, models and schedulers"),N=h(),c(A.$$.fragment),$t=h(),X=n("p"),Fa=o("\u{1F9E8} Diffusers is designed to be a user-friendly and flexible toolbox for building diffusion systems tailored to your use-case. At the core of the toolbox are models and schedulers. While the "),es=n("a"),Ha=o("DiffusionPipeline"),Ya=o(" bundles these components together for convenience, you can also unbundle the pipeline and use the models and schedulers separately to create new diffusion systems."),wt=h(),ss=n("p"),Wa=o("In this tutorial, you\u2019ll learn how to use models and schedulers to assemble a diffusion system for inference, starting with a basic pipeline and then progressing to the Stable Diffusion pipeline."),yt=h(),H=n("h2"),Z=n("a"),Ts=n("span"),c(_e.$$.fragment),Ga=h(),qs=n("span"),Ra=o("Deconstruct a basic pipeline"),jt=h(),ts=n("p"),Ka=o("A pipeline is a quick and easy way to run a model for inference, requiring no more than four lines of code to generate an image:"),kt=h(),c(be.$$.fragment),Et=h(),ve=n("div"),as=n("img"),xt=h(),ns=n("p"),Ba=o("That was super easy, but how did the pipeline do that? Let\u2019s breakdown the pipeline and take a look at what\u2019s happening under the hood."),Pt=h(),ee=n("p"),Ja=o("In the example above, the pipeline contains a UNet model and a DDPM scheduler. The pipeline denoises an image by taking random noise the size of the desired output and passing it through the model several times. At each timestep, the model predicts the "),Ns=n("em"),Qa=o("noise residual"),Xa=o(" and the scheduler uses it to predict a less noisy image. The pipeline repeats this process until it reaches the end of the specified number of inference steps."),Dt=h(),ls=n("p"),Za=o("To recreate the pipeline with the model and scheduler separately, let\u2019s write our own denoising process."),St=h(),S=n("ol"),$e=n("li"),Cs=n("p"),en=o("Load the model and scheduler:"),sn=h(),c(we.$$.fragment),tn=h(),ye=n("li"),Ms=n("p"),an=o("Set the number of timesteps to run the denoising process for:"),nn=h(),c(je.$$.fragment),ln=h(),ke=n("li"),As=n("p"),on=o("Setting the scheduler timesteps creates a tensor with evenly spaced elements in it, 50 in this example. Each element corresponds to a timestep at which the model denoises an image. When you create the denoising loop later, you\u2019ll iterate over this tensor to denoise an image:"),rn=h(),c(Ee.$$.fragment),pn=h(),xe=n("li"),Is=n("p"),hn=o("Create some random noise with the same shape as the desired output:"),dn=h(),c(Pe.$$.fragment),mn=h(),Y=n("li"),M=n("p"),un=o("Now write a loop to iterate over the timesteps. At each timestep, the model does a "),is=n("a"),cn=o("UNet2DModel.forward()"),fn=o(" pass and returns the noisy residual. The scheduler\u2019s "),os=n("a"),gn=o("step()"),_n=o(" method takes the noisy residual, timestep, and input and it predicts the image at the previous timestep. This output becomes the next input to the model in the denoising loop, and it\u2019ll repeat until it reaches the end of the "),Ls=n("code"),bn=o("timesteps"),vn=o(" array."),$n=h(),c(De.$$.fragment),wn=h(),zs=n("p"),yn=o("This is the entire denoising process, and you can use this same pattern to write any diffusion system."),jn=h(),Se=n("li"),Us=n("p"),kn=o("The last step is to convert the denoised output into an image:"),En=h(),c(Te.$$.fragment),Tt=h(),T=n("p"),xn=o("In the next section, you\u2019ll put your skills to the test and breakdown the more complex Stable Diffusion pipeline. The steps are more or less the same. You\u2019ll initialize the necessary components, and set the number of timesteps to create a "),Os=n("code"),Pn=o("timestep"),Dn=o(" array. The "),Vs=n("code"),Sn=o("timestep"),Tn=o(" array is used in the denoising loop, and for each element in this array, the model predicts a less noisy image. The denoising loop iterates over the "),Fs=n("code"),qn=o("timestep"),Nn=o("\u2019s, and at each timestep, it outputs a noisy residual and the scheduler uses it to predict a less noisy image at the previous timestep. This process is repeated until you reach the end of the "),Hs=n("code"),Cn=o("timestep"),Mn=o(" array."),qt=h(),rs=n("p"),An=o("Let\u2019s try it out!"),Nt=h(),W=n("h2"),se=n("a"),Ys=n("span"),c(qe.$$.fragment),In=h(),Ws=n("span"),Ln=o("Deconstruct the Stable Diffusion pipeline"),Ct=h(),te=n("p"),zn=o("Stable Diffusion is a text-to-image "),Gs=n("em"),Un=o("latent diffusion"),On=o(" model. It is called a latent diffusion model because it works with a lower-dimensional representation of the image instead of the actual pixel space, which makes it more memory efficient. The encoder compresses the image into a smaller representation, and a decoder to convert the compressed representation back into an image. For text-to-image models, you\u2019ll need a tokenizer and an encoder to generate text embeddings. From the previous example, you already know you need a UNet model and a scheduler."),Mt=h(),ps=n("p"),Vn=o("As you can see, this is already more complex than the DDPM pipeline which only contains a UNet model. The Stable Diffusion model has three separate pretrained models."),At=h(),c(ae.$$.fragment),It=h(),I=n("p"),Fn=o("Now that you know what you need for the Stable Diffusion pipeline, load all these components with the "),hs=n("a"),Hn=o("from_pretrained()"),Yn=o(" method. You can find them in the pretrained "),Ne=n("a"),Rs=n("code"),Wn=o("runwayml/stable-diffusion-v1-5"),Gn=o(" checkpoint, and each component is stored in a separate subfolder:"),Lt=h(),c(Ce.$$.fragment),zt=h(),L=n("p"),Rn=o("Instead of the default "),ds=n("a"),Kn=o("PNDMScheduler"),Bn=o(", exchange it for the "),ms=n("a"),Jn=o("UniPCMultistepScheduler"),Qn=o(" to see how easy it is to plug a different scheduler in:"),Ut=h(),c(Me.$$.fragment),Ot=h(),us=n("p"),Xn=o("To speed up inference, move the models to a GPU since, unlike the scheduler, they have trainable weights:"),Vt=h(),c(Ae.$$.fragment),Ft=h(),G=n("h3"),ne=n("a"),Ks=n("span"),c(Ie.$$.fragment),Zn=h(),Bs=n("span"),el=o("Create text embeddings"),Ht=h(),cs=n("p"),sl=o("The next step is to tokenize the text to generate embeddings. The text is used to condition the UNet model and steer the diffusion process towards something that resembles the input prompt."),Yt=h(),c(le.$$.fragment),Wt=h(),fs=n("p"),tl=o("Feel free to choose any prompt you like if you want to generate something else!"),Gt=h(),c(Le.$$.fragment),Rt=h(),gs=n("p"),al=o("Tokenize the text and generate the embeddings from the prompt:"),Kt=h(),c(ze.$$.fragment),Bt=h(),q=n("p"),nl=o("You\u2019ll also need to generate the "),Js=n("em"),ll=o("unconditional text embeddings"),il=o(" which are the embeddings for the padding token. These need to have the same shape ("),Qs=n("code"),ol=o("batch_size"),rl=o(" and "),Xs=n("code"),pl=o("seq_length"),hl=o(") as the conditional "),Zs=n("code"),dl=o("text_embeddings"),ml=o(":"),Jt=h(),c(Ue.$$.fragment),Qt=h(),_s=n("p"),ul=o("Let\u2019s concatenate the conditional and unconditional embeddings into a batch to avoid doing two forward passes:"),Xt=h(),c(Oe.$$.fragment),Zt=h(),R=n("h3"),ie=n("a"),et=n("span"),c(Ve.$$.fragment),cl=h(),st=n("span"),fl=o("Create random noise"),ea=h(),oe=n("p"),gl=o("Next, generate some initial random noise as a starting point for the diffusion process. This is the latent representation of the image, and it\u2019ll be gradually denoised. At this point, the "),tt=n("code"),_l=o("latent"),bl=o(" image is smaller than the final image size but that\u2019s okay though because the model will transform it into the final 512x512 image dimensions later."),sa=h(),c(re.$$.fragment),ta=h(),c(Fe.$$.fragment),aa=h(),K=n("h3"),pe=n("a"),at=n("span"),c(He.$$.fragment),vl=h(),nt=n("span"),$l=o("Denoise the image"),na=h(),z=n("p"),wl=o("Start by scaling the input with the initial noise distribution, "),lt=n("em"),yl=o("sigma"),jl=o(", the noise scale value, which is required for improved schedulers like "),bs=n("a"),kl=o("UniPCMultistepScheduler"),El=o(":"),la=h(),c(Ye.$$.fragment),ia=h(),he=n("p"),xl=o("The last step is to create the denoising loop that\u2019ll progressively transform the pure noise in "),it=n("code"),Pl=o("latents"),Dl=o(" to an image described by your prompt. Remember, the denoising loop needs to do three things:"),oa=h(),U=n("ol"),ot=n("li"),Sl=o("Set the scheduler\u2019s timesteps to use during denoising."),Tl=h(),rt=n("li"),ql=o("Iterate over the timesteps."),Nl=h(),pt=n("li"),Cl=o("At each timestep, call the UNet model to predict the noise residual and pass it to the scheduler to compute the previous noisy sample."),ra=h(),c(We.$$.fragment),pa=h(),B=n("h3"),de=n("a"),ht=n("span"),c(Ge.$$.fragment),Ml=h(),dt=n("span"),Al=o("Decode the image"),ha=h(),O=n("p"),Il=o("The final step is to use the "),mt=n("code"),Ll=o("vae"),zl=o(" to decode the latent representation into an image and get the decoded output with "),ut=n("code"),Ul=o("sample"),Ol=o(":"),da=h(),c(Re.$$.fragment),ma=h(),me=n("p"),Vl=o("Lastly, convert the image to a "),ct=n("code"),Fl=o("PIL.Image"),Hl=o(" to see your generated image!"),ua=h(),c(Ke.$$.fragment),ca=h(),Be=n("div"),ft=n("img"),fa=h(),J=n("h2"),ue=n("a"),gt=n("span"),c(Je.$$.fragment),Yl=h(),_t=n("span"),Wl=o("Next steps"),ga=h(),vs=n("p"),Gl=o("From basic to complex pipelines, you\u2019ve seen that all you really need to write your own diffusion system is a denoising loop. The loop should set the scheduler\u2019s timesteps, iterate over them, and alternate between calling the UNet model to predict the noise residual and passing it to the scheduler to compute the previous noisy sample."),_a=h(),$s=n("p"),Rl=o("This is really what \u{1F9E8} Diffusers is designed for: to make it intuitive and easy to write your own diffusion system using models and schedulers."),ba=h(),ws=n("p"),Kl=o("For your next steps, feel free to:"),va=h(),ce=n("ul"),Qe=n("li"),Bl=o("Learn how to "),ys=n("a"),Jl=o("build and contribute a pipeline"),Ql=o(" to \u{1F9E8} Diffusers. We can\u2019t wait and see what you\u2019ll come up with!"),Xl=h(),Xe=n("li"),Zl=o("Explore "),js=n("a"),ei=o("existing pipelines"),si=o(" in the library, and see if you can deconstruct and build a pipeline from scratch using the models and schedulers separately."),this.h()},l(e){const a=qo('[data-svelte="svelte-1phssyn"]',document.head);u=l(a,"META",{name:!0,content:!0}),a.forEach(s),P=d(e),$=l(e,"H1",{class:!0});var Ze=i($);j=l(Ze,"A",{id:!0,class:!0,href:!0});var bt=i(j);x=l(bt,"SPAN",{});var vt=i(x);f(y.$$.fragment,vt),vt.forEach(s),bt.forEach(s),w=d(Ze),D=l(Ze,"SPAN",{});var li=i(D);E=r(li,"Understanding pipelines, models and schedulers"),li.forEach(s),Ze.forEach(s),N=d(e),f(A.$$.fragment,e),$t=d(e),X=l(e,"P",{});var wa=i(X);Fa=r(wa,"\u{1F9E8} Diffusers is designed to be a user-friendly and flexible toolbox for building diffusion systems tailored to your use-case. At the core of the toolbox are models and schedulers. While the "),es=l(wa,"A",{href:!0});var ii=i(es);Ha=r(ii,"DiffusionPipeline"),ii.forEach(s),Ya=r(wa," bundles these components together for convenience, you can also unbundle the pipeline and use the models and schedulers separately to create new diffusion systems."),wa.forEach(s),wt=d(e),ss=l(e,"P",{});var oi=i(ss);Wa=r(oi,"In this tutorial, you\u2019ll learn how to use models and schedulers to assemble a diffusion system for inference, starting with a basic pipeline and then progressing to the Stable Diffusion pipeline."),oi.forEach(s),yt=d(e),H=l(e,"H2",{class:!0});var ya=i(H);Z=l(ya,"A",{id:!0,class:!0,href:!0});var ri=i(Z);Ts=l(ri,"SPAN",{});var pi=i(Ts);f(_e.$$.fragment,pi),pi.forEach(s),ri.forEach(s),Ga=d(ya),qs=l(ya,"SPAN",{});var hi=i(qs);Ra=r(hi,"Deconstruct a basic pipeline"),hi.forEach(s),ya.forEach(s),jt=d(e),ts=l(e,"P",{});var di=i(ts);Ka=r(di,"A pipeline is a quick and easy way to run a model for inference, requiring no more than four lines of code to generate an image:"),di.forEach(s),kt=d(e),f(be.$$.fragment,e),Et=d(e),ve=l(e,"DIV",{class:!0});var mi=i(ve);as=l(mi,"IMG",{src:!0,alt:!0}),mi.forEach(s),xt=d(e),ns=l(e,"P",{});var ui=i(ns);Ba=r(ui,"That was super easy, but how did the pipeline do that? Let\u2019s breakdown the pipeline and take a look at what\u2019s happening under the hood."),ui.forEach(s),Pt=d(e),ee=l(e,"P",{});var ja=i(ee);Ja=r(ja,"In the example above, the pipeline contains a UNet model and a DDPM scheduler. The pipeline denoises an image by taking random noise the size of the desired output and passing it through the model several times. At each timestep, the model predicts the "),Ns=l(ja,"EM",{});var ci=i(Ns);Qa=r(ci,"noise residual"),ci.forEach(s),Xa=r(ja," and the scheduler uses it to predict a less noisy image. The pipeline repeats this process until it reaches the end of the specified number of inference steps."),ja.forEach(s),Dt=d(e),ls=l(e,"P",{});var fi=i(ls);Za=r(fi,"To recreate the pipeline with the model and scheduler separately, let\u2019s write our own denoising process."),fi.forEach(s),St=d(e),S=l(e,"OL",{});var C=i(S);$e=l(C,"LI",{});var ka=i($e);Cs=l(ka,"P",{});var gi=i(Cs);en=r(gi,"Load the model and scheduler:"),gi.forEach(s),sn=d(ka),f(we.$$.fragment,ka),ka.forEach(s),tn=d(C),ye=l(C,"LI",{});var Ea=i(ye);Ms=l(Ea,"P",{});var _i=i(Ms);an=r(_i,"Set the number of timesteps to run the denoising process for:"),_i.forEach(s),nn=d(Ea),f(je.$$.fragment,Ea),Ea.forEach(s),ln=d(C),ke=l(C,"LI",{});var xa=i(ke);As=l(xa,"P",{});var bi=i(As);on=r(bi,"Setting the scheduler timesteps creates a tensor with evenly spaced elements in it, 50 in this example. Each element corresponds to a timestep at which the model denoises an image. When you create the denoising loop later, you\u2019ll iterate over this tensor to denoise an image:"),bi.forEach(s),rn=d(xa),f(Ee.$$.fragment,xa),xa.forEach(s),pn=d(C),xe=l(C,"LI",{});var Pa=i(xe);Is=l(Pa,"P",{});var vi=i(Is);hn=r(vi,"Create some random noise with the same shape as the desired output:"),vi.forEach(s),dn=d(Pa),f(Pe.$$.fragment,Pa),Pa.forEach(s),mn=d(C),Y=l(C,"LI",{});var ks=i(Y);M=l(ks,"P",{});var fe=i(M);un=r(fe,"Now write a loop to iterate over the timesteps. At each timestep, the model does a "),is=l(fe,"A",{href:!0});var $i=i(is);cn=r($i,"UNet2DModel.forward()"),$i.forEach(s),fn=r(fe," pass and returns the noisy residual. The scheduler\u2019s "),os=l(fe,"A",{href:!0});var wi=i(os);gn=r(wi,"step()"),wi.forEach(s),_n=r(fe," method takes the noisy residual, timestep, and input and it predicts the image at the previous timestep. This output becomes the next input to the model in the denoising loop, and it\u2019ll repeat until it reaches the end of the "),Ls=l(fe,"CODE",{});var yi=i(Ls);bn=r(yi,"timesteps"),yi.forEach(s),vn=r(fe," array."),fe.forEach(s),$n=d(ks),f(De.$$.fragment,ks),wn=d(ks),zs=l(ks,"P",{});var ji=i(zs);yn=r(ji,"This is the entire denoising process, and you can use this same pattern to write any diffusion system."),ji.forEach(s),ks.forEach(s),jn=d(C),Se=l(C,"LI",{});var Da=i(Se);Us=l(Da,"P",{});var ki=i(Us);kn=r(ki,"The last step is to convert the denoised output into an image:"),ki.forEach(s),En=d(Da),f(Te.$$.fragment,Da),Da.forEach(s),C.forEach(s),Tt=d(e),T=l(e,"P",{});var V=i(T);xn=r(V,"In the next section, you\u2019ll put your skills to the test and breakdown the more complex Stable Diffusion pipeline. The steps are more or less the same. You\u2019ll initialize the necessary components, and set the number of timesteps to create a "),Os=l(V,"CODE",{});var Ei=i(Os);Pn=r(Ei,"timestep"),Ei.forEach(s),Dn=r(V," array. The "),Vs=l(V,"CODE",{});var xi=i(Vs);Sn=r(xi,"timestep"),xi.forEach(s),Tn=r(V," array is used in the denoising loop, and for each element in this array, the model predicts a less noisy image. The denoising loop iterates over the "),Fs=l(V,"CODE",{});var Pi=i(Fs);qn=r(Pi,"timestep"),Pi.forEach(s),Nn=r(V,"\u2019s, and at each timestep, it outputs a noisy residual and the scheduler uses it to predict a less noisy image at the previous timestep. This process is repeated until you reach the end of the "),Hs=l(V,"CODE",{});var Di=i(Hs);Cn=r(Di,"timestep"),Di.forEach(s),Mn=r(V," array."),V.forEach(s),qt=d(e),rs=l(e,"P",{});var Si=i(rs);An=r(Si,"Let\u2019s try it out!"),Si.forEach(s),Nt=d(e),W=l(e,"H2",{class:!0});var Sa=i(W);se=l(Sa,"A",{id:!0,class:!0,href:!0});var Ti=i(se);Ys=l(Ti,"SPAN",{});var qi=i(Ys);f(qe.$$.fragment,qi),qi.forEach(s),Ti.forEach(s),In=d(Sa),Ws=l(Sa,"SPAN",{});var Ni=i(Ws);Ln=r(Ni,"Deconstruct the Stable Diffusion pipeline"),Ni.forEach(s),Sa.forEach(s),Ct=d(e),te=l(e,"P",{});var Ta=i(te);zn=r(Ta,"Stable Diffusion is a text-to-image "),Gs=l(Ta,"EM",{});var Ci=i(Gs);Un=r(Ci,"latent diffusion"),Ci.forEach(s),On=r(Ta," model. It is called a latent diffusion model because it works with a lower-dimensional representation of the image instead of the actual pixel space, which makes it more memory efficient. The encoder compresses the image into a smaller representation, and a decoder to convert the compressed representation back into an image. For text-to-image models, you\u2019ll need a tokenizer and an encoder to generate text embeddings. From the previous example, you already know you need a UNet model and a scheduler."),Ta.forEach(s),Mt=d(e),ps=l(e,"P",{});var Mi=i(ps);Vn=r(Mi,"As you can see, this is already more complex than the DDPM pipeline which only contains a UNet model. The Stable Diffusion model has three separate pretrained models."),Mi.forEach(s),At=d(e),f(ae.$$.fragment,e),It=d(e),I=l(e,"P",{});var Es=i(I);Fn=r(Es,"Now that you know what you need for the Stable Diffusion pipeline, load all these components with the "),hs=l(Es,"A",{href:!0});var Ai=i(hs);Hn=r(Ai,"from_pretrained()"),Ai.forEach(s),Yn=r(Es," method. You can find them in the pretrained "),Ne=l(Es,"A",{href:!0,rel:!0});var Ii=i(Ne);Rs=l(Ii,"CODE",{});var Li=i(Rs);Wn=r(Li,"runwayml/stable-diffusion-v1-5"),Li.forEach(s),Ii.forEach(s),Gn=r(Es," checkpoint, and each component is stored in a separate subfolder:"),Es.forEach(s),Lt=d(e),f(Ce.$$.fragment,e),zt=d(e),L=l(e,"P",{});var xs=i(L);Rn=r(xs,"Instead of the default "),ds=l(xs,"A",{href:!0});var zi=i(ds);Kn=r(zi,"PNDMScheduler"),zi.forEach(s),Bn=r(xs,", exchange it for the "),ms=l(xs,"A",{href:!0});var Ui=i(ms);Jn=r(Ui,"UniPCMultistepScheduler"),Ui.forEach(s),Qn=r(xs," to see how easy it is to plug a different scheduler in:"),xs.forEach(s),Ut=d(e),f(Me.$$.fragment,e),Ot=d(e),us=l(e,"P",{});var Oi=i(us);Xn=r(Oi,"To speed up inference, move the models to a GPU since, unlike the scheduler, they have trainable weights:"),Oi.forEach(s),Vt=d(e),f(Ae.$$.fragment,e),Ft=d(e),G=l(e,"H3",{class:!0});var qa=i(G);ne=l(qa,"A",{id:!0,class:!0,href:!0});var Vi=i(ne);Ks=l(Vi,"SPAN",{});var Fi=i(Ks);f(Ie.$$.fragment,Fi),Fi.forEach(s),Vi.forEach(s),Zn=d(qa),Bs=l(qa,"SPAN",{});var Hi=i(Bs);el=r(Hi,"Create text embeddings"),Hi.forEach(s),qa.forEach(s),Ht=d(e),cs=l(e,"P",{});var Yi=i(cs);sl=r(Yi,"The next step is to tokenize the text to generate embeddings. The text is used to condition the UNet model and steer the diffusion process towards something that resembles the input prompt."),Yi.forEach(s),Yt=d(e),f(le.$$.fragment,e),Wt=d(e),fs=l(e,"P",{});var Wi=i(fs);tl=r(Wi,"Feel free to choose any prompt you like if you want to generate something else!"),Wi.forEach(s),Gt=d(e),f(Le.$$.fragment,e),Rt=d(e),gs=l(e,"P",{});var Gi=i(gs);al=r(Gi,"Tokenize the text and generate the embeddings from the prompt:"),Gi.forEach(s),Kt=d(e),f(ze.$$.fragment,e),Bt=d(e),q=l(e,"P",{});var F=i(q);nl=r(F,"You\u2019ll also need to generate the "),Js=l(F,"EM",{});var Ri=i(Js);ll=r(Ri,"unconditional text embeddings"),Ri.forEach(s),il=r(F," which are the embeddings for the padding token. These need to have the same shape ("),Qs=l(F,"CODE",{});var Ki=i(Qs);ol=r(Ki,"batch_size"),Ki.forEach(s),rl=r(F," and "),Xs=l(F,"CODE",{});var Bi=i(Xs);pl=r(Bi,"seq_length"),Bi.forEach(s),hl=r(F,") as the conditional "),Zs=l(F,"CODE",{});var Ji=i(Zs);dl=r(Ji,"text_embeddings"),Ji.forEach(s),ml=r(F,":"),F.forEach(s),Jt=d(e),f(Ue.$$.fragment,e),Qt=d(e),_s=l(e,"P",{});var Qi=i(_s);ul=r(Qi,"Let\u2019s concatenate the conditional and unconditional embeddings into a batch to avoid doing two forward passes:"),Qi.forEach(s),Xt=d(e),f(Oe.$$.fragment,e),Zt=d(e),R=l(e,"H3",{class:!0});var Na=i(R);ie=l(Na,"A",{id:!0,class:!0,href:!0});var Xi=i(ie);et=l(Xi,"SPAN",{});var Zi=i(et);f(Ve.$$.fragment,Zi),Zi.forEach(s),Xi.forEach(s),cl=d(Na),st=l(Na,"SPAN",{});var eo=i(st);fl=r(eo,"Create random noise"),eo.forEach(s),Na.forEach(s),ea=d(e),oe=l(e,"P",{});var Ca=i(oe);gl=r(Ca,"Next, generate some initial random noise as a starting point for the diffusion process. This is the latent representation of the image, and it\u2019ll be gradually denoised. At this point, the "),tt=l(Ca,"CODE",{});var so=i(tt);_l=r(so,"latent"),so.forEach(s),bl=r(Ca," image is smaller than the final image size but that\u2019s okay though because the model will transform it into the final 512x512 image dimensions later."),Ca.forEach(s),sa=d(e),f(re.$$.fragment,e),ta=d(e),f(Fe.$$.fragment,e),aa=d(e),K=l(e,"H3",{class:!0});var Ma=i(K);pe=l(Ma,"A",{id:!0,class:!0,href:!0});var to=i(pe);at=l(to,"SPAN",{});var ao=i(at);f(He.$$.fragment,ao),ao.forEach(s),to.forEach(s),vl=d(Ma),nt=l(Ma,"SPAN",{});var no=i(nt);$l=r(no,"Denoise the image"),no.forEach(s),Ma.forEach(s),na=d(e),z=l(e,"P",{});var Ps=i(z);wl=r(Ps,"Start by scaling the input with the initial noise distribution, "),lt=l(Ps,"EM",{});var lo=i(lt);yl=r(lo,"sigma"),lo.forEach(s),jl=r(Ps,", the noise scale value, which is required for improved schedulers like "),bs=l(Ps,"A",{href:!0});var io=i(bs);kl=r(io,"UniPCMultistepScheduler"),io.forEach(s),El=r(Ps,":"),Ps.forEach(s),la=d(e),f(Ye.$$.fragment,e),ia=d(e),he=l(e,"P",{});var Aa=i(he);xl=r(Aa,"The last step is to create the denoising loop that\u2019ll progressively transform the pure noise in "),it=l(Aa,"CODE",{});var oo=i(it);Pl=r(oo,"latents"),oo.forEach(s),Dl=r(Aa," to an image described by your prompt. Remember, the denoising loop needs to do three things:"),Aa.forEach(s),oa=d(e),U=l(e,"OL",{});var Ds=i(U);ot=l(Ds,"LI",{});var ro=i(ot);Sl=r(ro,"Set the scheduler\u2019s timesteps to use during denoising."),ro.forEach(s),Tl=d(Ds),rt=l(Ds,"LI",{});var po=i(rt);ql=r(po,"Iterate over the timesteps."),po.forEach(s),Nl=d(Ds),pt=l(Ds,"LI",{});var ho=i(pt);Cl=r(ho,"At each timestep, call the UNet model to predict the noise residual and pass it to the scheduler to compute the previous noisy sample."),ho.forEach(s),Ds.forEach(s),ra=d(e),f(We.$$.fragment,e),pa=d(e),B=l(e,"H3",{class:!0});var Ia=i(B);de=l(Ia,"A",{id:!0,class:!0,href:!0});var mo=i(de);ht=l(mo,"SPAN",{});var uo=i(ht);f(Ge.$$.fragment,uo),uo.forEach(s),mo.forEach(s),Ml=d(Ia),dt=l(Ia,"SPAN",{});var co=i(dt);Al=r(co,"Decode the image"),co.forEach(s),Ia.forEach(s),ha=d(e),O=l(e,"P",{});var Ss=i(O);Il=r(Ss,"The final step is to use the "),mt=l(Ss,"CODE",{});var fo=i(mt);Ll=r(fo,"vae"),fo.forEach(s),zl=r(Ss," to decode the latent representation into an image and get the decoded output with "),ut=l(Ss,"CODE",{});var go=i(ut);Ul=r(go,"sample"),go.forEach(s),Ol=r(Ss,":"),Ss.forEach(s),da=d(e),f(Re.$$.fragment,e),ma=d(e),me=l(e,"P",{});var La=i(me);Vl=r(La,"Lastly, convert the image to a "),ct=l(La,"CODE",{});var _o=i(ct);Fl=r(_o,"PIL.Image"),_o.forEach(s),Hl=r(La," to see your generated image!"),La.forEach(s),ua=d(e),f(Ke.$$.fragment,e),ca=d(e),Be=l(e,"DIV",{class:!0});var bo=i(Be);ft=l(bo,"IMG",{src:!0}),bo.forEach(s),fa=d(e),J=l(e,"H2",{class:!0});var za=i(J);ue=l(za,"A",{id:!0,class:!0,href:!0});var vo=i(ue);gt=l(vo,"SPAN",{});var $o=i(gt);f(Je.$$.fragment,$o),$o.forEach(s),vo.forEach(s),Yl=d(za),_t=l(za,"SPAN",{});var wo=i(_t);Wl=r(wo,"Next steps"),wo.forEach(s),za.forEach(s),ga=d(e),vs=l(e,"P",{});var yo=i(vs);Gl=r(yo,"From basic to complex pipelines, you\u2019ve seen that all you really need to write your own diffusion system is a denoising loop. The loop should set the scheduler\u2019s timesteps, iterate over them, and alternate between calling the UNet model to predict the noise residual and passing it to the scheduler to compute the previous noisy sample."),yo.forEach(s),_a=d(e),$s=l(e,"P",{});var jo=i($s);Rl=r(jo,"This is really what \u{1F9E8} Diffusers is designed for: to make it intuitive and easy to write your own diffusion system using models and schedulers."),jo.forEach(s),ba=d(e),ws=l(e,"P",{});var ko=i(ws);Kl=r(ko,"For your next steps, feel free to:"),ko.forEach(s),va=d(e),ce=l(e,"UL",{});var Ua=i(ce);Qe=l(Ua,"LI",{});var Oa=i(Qe);Bl=r(Oa,"Learn how to "),ys=l(Oa,"A",{href:!0});var Eo=i(ys);Jl=r(Eo,"build and contribute a pipeline"),Eo.forEach(s),Ql=r(Oa," to \u{1F9E8} Diffusers. We can\u2019t wait and see what you\u2019ll come up with!"),Oa.forEach(s),Xl=d(Ua),Xe=l(Ua,"LI",{});var Va=i(Xe);Zl=r(Va,"Explore "),js=l(Va,"A",{href:!0});var xo=i(js);ei=r(xo,"existing pipelines"),xo.forEach(s),si=r(Va," in the library, and see if you can deconstruct and build a pipeline from scratch using the models and schedulers separately."),Va.forEach(s),Ua.forEach(s),this.h()},h(){m(u,"name","hf:doc:metadata"),m(u,"content",JSON.stringify(Uo)),m(j,"id","understanding-pipelines-models-and-schedulers"),m(j,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(j,"href","#understanding-pipelines-models-and-schedulers"),m($,"class","relative group"),m(es,"href","/docs/diffusers/v0.16.0/en/api/diffusion_pipeline#diffusers.DiffusionPipeline"),m(Z,"id","deconstruct-a-basic-pipeline"),m(Z,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(Z,"href","#deconstruct-a-basic-pipeline"),m(H,"class","relative group"),Po(as.src,ai="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/ddpm-cat.png")||m(as,"src",ai),m(as,"alt","Image of cat created from DDPMPipeline"),m(ve,"class","flex justify-center"),m(is,"href","/docs/diffusers/v0.16.0/en/api/models#diffusers.UNet2DModel.forward"),m(os,"href","/docs/diffusers/v0.16.0/en/api/schedulers/ddpm#diffusers.DDPMScheduler.step"),m(se,"id","deconstruct-the-stable-diffusion-pipeline"),m(se,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(se,"href","#deconstruct-the-stable-diffusion-pipeline"),m(W,"class","relative group"),m(hs,"href","/docs/diffusers/v0.16.0/en/api/models#diffusers.ModelMixin.from_pretrained"),m(Ne,"href","https://huggingface.co/runwayml/stable-diffusion-v1-5"),m(Ne,"rel","nofollow"),m(ds,"href","/docs/diffusers/v0.16.0/en/api/schedulers/pndm#diffusers.PNDMScheduler"),m(ms,"href","/docs/diffusers/v0.16.0/en/api/schedulers/unipc#diffusers.UniPCMultistepScheduler"),m(ne,"id","create-text-embeddings"),m(ne,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(ne,"href","#create-text-embeddings"),m(G,"class","relative group"),m(ie,"id","create-random-noise"),m(ie,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(ie,"href","#create-random-noise"),m(R,"class","relative group"),m(pe,"id","denoise-the-image"),m(pe,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(pe,"href","#denoise-the-image"),m(K,"class","relative group"),m(bs,"href","/docs/diffusers/v0.16.0/en/api/schedulers/unipc#diffusers.UniPCMultistepScheduler"),m(de,"id","decode-the-image"),m(de,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(de,"href","#decode-the-image"),m(B,"class","relative group"),Po(ft.src,ni="https://huggingface.co/blog/assets/98_stable_diffusion/stable_diffusion_k_lms.png")||m(ft,"src",ni),m(Be,"class","flex justify-center"),m(ue,"id","next-steps"),m(ue,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(ue,"href","#next-steps"),m(J,"class","relative group"),m(ys,"href","using-diffusers/#contribute_pipeline"),m(js,"href","./api/pipelines/overview")},m(e,a){t(document.head,u),p(e,P,a),p(e,$,a),t($,j),t(j,x),g(y,x,null),t($,w),t($,D),t(D,E),p(e,N,a),g(A,e,a),p(e,$t,a),p(e,X,a),t(X,Fa),t(X,es),t(es,Ha),t(X,Ya),p(e,wt,a),p(e,ss,a),t(ss,Wa),p(e,yt,a),p(e,H,a),t(H,Z),t(Z,Ts),g(_e,Ts,null),t(H,Ga),t(H,qs),t(qs,Ra),p(e,jt,a),p(e,ts,a),t(ts,Ka),p(e,kt,a),g(be,e,a),p(e,Et,a),p(e,ve,a),t(ve,as),p(e,xt,a),p(e,ns,a),t(ns,Ba),p(e,Pt,a),p(e,ee,a),t(ee,Ja),t(ee,Ns),t(Ns,Qa),t(ee,Xa),p(e,Dt,a),p(e,ls,a),t(ls,Za),p(e,St,a),p(e,S,a),t(S,$e),t($e,Cs),t(Cs,en),t($e,sn),g(we,$e,null),t(S,tn),t(S,ye),t(ye,Ms),t(Ms,an),t(ye,nn),g(je,ye,null),t(S,ln),t(S,ke),t(ke,As),t(As,on),t(ke,rn),g(Ee,ke,null),t(S,pn),t(S,xe),t(xe,Is),t(Is,hn),t(xe,dn),g(Pe,xe,null),t(S,mn),t(S,Y),t(Y,M),t(M,un),t(M,is),t(is,cn),t(M,fn),t(M,os),t(os,gn),t(M,_n),t(M,Ls),t(Ls,bn),t(M,vn),t(Y,$n),g(De,Y,null),t(Y,wn),t(Y,zs),t(zs,yn),t(S,jn),t(S,Se),t(Se,Us),t(Us,kn),t(Se,En),g(Te,Se,null),p(e,Tt,a),p(e,T,a),t(T,xn),t(T,Os),t(Os,Pn),t(T,Dn),t(T,Vs),t(Vs,Sn),t(T,Tn),t(T,Fs),t(Fs,qn),t(T,Nn),t(T,Hs),t(Hs,Cn),t(T,Mn),p(e,qt,a),p(e,rs,a),t(rs,An),p(e,Nt,a),p(e,W,a),t(W,se),t(se,Ys),g(qe,Ys,null),t(W,In),t(W,Ws),t(Ws,Ln),p(e,Ct,a),p(e,te,a),t(te,zn),t(te,Gs),t(Gs,Un),t(te,On),p(e,Mt,a),p(e,ps,a),t(ps,Vn),p(e,At,a),g(ae,e,a),p(e,It,a),p(e,I,a),t(I,Fn),t(I,hs),t(hs,Hn),t(I,Yn),t(I,Ne),t(Ne,Rs),t(Rs,Wn),t(I,Gn),p(e,Lt,a),g(Ce,e,a),p(e,zt,a),p(e,L,a),t(L,Rn),t(L,ds),t(ds,Kn),t(L,Bn),t(L,ms),t(ms,Jn),t(L,Qn),p(e,Ut,a),g(Me,e,a),p(e,Ot,a),p(e,us,a),t(us,Xn),p(e,Vt,a),g(Ae,e,a),p(e,Ft,a),p(e,G,a),t(G,ne),t(ne,Ks),g(Ie,Ks,null),t(G,Zn),t(G,Bs),t(Bs,el),p(e,Ht,a),p(e,cs,a),t(cs,sl),p(e,Yt,a),g(le,e,a),p(e,Wt,a),p(e,fs,a),t(fs,tl),p(e,Gt,a),g(Le,e,a),p(e,Rt,a),p(e,gs,a),t(gs,al),p(e,Kt,a),g(ze,e,a),p(e,Bt,a),p(e,q,a),t(q,nl),t(q,Js),t(Js,ll),t(q,il),t(q,Qs),t(Qs,ol),t(q,rl),t(q,Xs),t(Xs,pl),t(q,hl),t(q,Zs),t(Zs,dl),t(q,ml),p(e,Jt,a),g(Ue,e,a),p(e,Qt,a),p(e,_s,a),t(_s,ul),p(e,Xt,a),g(Oe,e,a),p(e,Zt,a),p(e,R,a),t(R,ie),t(ie,et),g(Ve,et,null),t(R,cl),t(R,st),t(st,fl),p(e,ea,a),p(e,oe,a),t(oe,gl),t(oe,tt),t(tt,_l),t(oe,bl),p(e,sa,a),g(re,e,a),p(e,ta,a),g(Fe,e,a),p(e,aa,a),p(e,K,a),t(K,pe),t(pe,at),g(He,at,null),t(K,vl),t(K,nt),t(nt,$l),p(e,na,a),p(e,z,a),t(z,wl),t(z,lt),t(lt,yl),t(z,jl),t(z,bs),t(bs,kl),t(z,El),p(e,la,a),g(Ye,e,a),p(e,ia,a),p(e,he,a),t(he,xl),t(he,it),t(it,Pl),t(he,Dl),p(e,oa,a),p(e,U,a),t(U,ot),t(ot,Sl),t(U,Tl),t(U,rt),t(rt,ql),t(U,Nl),t(U,pt),t(pt,Cl),p(e,ra,a),g(We,e,a),p(e,pa,a),p(e,B,a),t(B,de),t(de,ht),g(Ge,ht,null),t(B,Ml),t(B,dt),t(dt,Al),p(e,ha,a),p(e,O,a),t(O,Il),t(O,mt),t(mt,Ll),t(O,zl),t(O,ut),t(ut,Ul),t(O,Ol),p(e,da,a),g(Re,e,a),p(e,ma,a),p(e,me,a),t(me,Vl),t(me,ct),t(ct,Fl),t(me,Hl),p(e,ua,a),g(Ke,e,a),p(e,ca,a),p(e,Be,a),t(Be,ft),p(e,fa,a),p(e,J,a),t(J,ue),t(ue,gt),g(Je,gt,null),t(J,Yl),t(J,_t),t(_t,Wl),p(e,ga,a),p(e,vs,a),t(vs,Gl),p(e,_a,a),p(e,$s,a),t($s,Rl),p(e,ba,a),p(e,ws,a),t(ws,Kl),p(e,va,a),p(e,ce,a),t(ce,Qe),t(Qe,Bl),t(Qe,ys),t(ys,Jl),t(Qe,Ql),t(ce,Xl),t(ce,Xe),t(Xe,Zl),t(Xe,js),t(js,ei),t(Xe,si),$a=!0},p(e,[a]){const Ze={};a&2&&(Ze.$$scope={dirty:a,ctx:e}),ae.$set(Ze);const bt={};a&2&&(bt.$$scope={dirty:a,ctx:e}),le.$set(bt);const vt={};a&2&&(vt.$$scope={dirty:a,ctx:e}),re.$set(vt)},i(e){$a||(_(y.$$.fragment,e),_(A.$$.fragment,e),_(_e.$$.fragment,e),_(be.$$.fragment,e),_(we.$$.fragment,e),_(je.$$.fragment,e),_(Ee.$$.fragment,e),_(Pe.$$.fragment,e),_(De.$$.fragment,e),_(Te.$$.fragment,e),_(qe.$$.fragment,e),_(ae.$$.fragment,e),_(Ce.$$.fragment,e),_(Me.$$.fragment,e),_(Ae.$$.fragment,e),_(Ie.$$.fragment,e),_(le.$$.fragment,e),_(Le.$$.fragment,e),_(ze.$$.fragment,e),_(Ue.$$.fragment,e),_(Oe.$$.fragment,e),_(Ve.$$.fragment,e),_(re.$$.fragment,e),_(Fe.$$.fragment,e),_(He.$$.fragment,e),_(Ye.$$.fragment,e),_(We.$$.fragment,e),_(Ge.$$.fragment,e),_(Re.$$.fragment,e),_(Ke.$$.fragment,e),_(Je.$$.fragment,e),$a=!0)},o(e){b(y.$$.fragment,e),b(A.$$.fragment,e),b(_e.$$.fragment,e),b(be.$$.fragment,e),b(we.$$.fragment,e),b(je.$$.fragment,e),b(Ee.$$.fragment,e),b(Pe.$$.fragment,e),b(De.$$.fragment,e),b(Te.$$.fragment,e),b(qe.$$.fragment,e),b(ae.$$.fragment,e),b(Ce.$$.fragment,e),b(Me.$$.fragment,e),b(Ae.$$.fragment,e),b(Ie.$$.fragment,e),b(le.$$.fragment,e),b(Le.$$.fragment,e),b(ze.$$.fragment,e),b(Ue.$$.fragment,e),b(Oe.$$.fragment,e),b(Ve.$$.fragment,e),b(re.$$.fragment,e),b(Fe.$$.fragment,e),b(He.$$.fragment,e),b(Ye.$$.fragment,e),b(We.$$.fragment,e),b(Ge.$$.fragment,e),b(Re.$$.fragment,e),b(Ke.$$.fragment,e),b(Je.$$.fragment,e),$a=!1},d(e){s(u),e&&s(P),e&&s($),v(y),e&&s(N),v(A,e),e&&s($t),e&&s(X),e&&s(wt),e&&s(ss),e&&s(yt),e&&s(H),v(_e),e&&s(jt),e&&s(ts),e&&s(kt),v(be,e),e&&s(Et),e&&s(ve),e&&s(xt),e&&s(ns),e&&s(Pt),e&&s(ee),e&&s(Dt),e&&s(ls),e&&s(St),e&&s(S),v(we),v(je),v(Ee),v(Pe),v(De),v(Te),e&&s(Tt),e&&s(T),e&&s(qt),e&&s(rs),e&&s(Nt),e&&s(W),v(qe),e&&s(Ct),e&&s(te),e&&s(Mt),e&&s(ps),e&&s(At),v(ae,e),e&&s(It),e&&s(I),e&&s(Lt),v(Ce,e),e&&s(zt),e&&s(L),e&&s(Ut),v(Me,e),e&&s(Ot),e&&s(us),e&&s(Vt),v(Ae,e),e&&s(Ft),e&&s(G),v(Ie),e&&s(Ht),e&&s(cs),e&&s(Yt),v(le,e),e&&s(Wt),e&&s(fs),e&&s(Gt),v(Le,e),e&&s(Rt),e&&s(gs),e&&s(Kt),v(ze,e),e&&s(Bt),e&&s(q),e&&s(Jt),v(Ue,e),e&&s(Qt),e&&s(_s),e&&s(Xt),v(Oe,e),e&&s(Zt),e&&s(R),v(Ve),e&&s(ea),e&&s(oe),e&&s(sa),v(re,e),e&&s(ta),v(Fe,e),e&&s(aa),e&&s(K),v(He),e&&s(na),e&&s(z),e&&s(la),v(Ye,e),e&&s(ia),e&&s(he),e&&s(oa),e&&s(U),e&&s(ra),v(We,e),e&&s(pa),e&&s(B),v(Ge),e&&s(ha),e&&s(O),e&&s(da),v(Re,e),e&&s(ma),e&&s(me),e&&s(ua),v(Ke,e),e&&s(ca),e&&s(Be),e&&s(fa),e&&s(J),v(Je),e&&s(ga),e&&s(vs),e&&s(_a),e&&s($s),e&&s(ba),e&&s(ws),e&&s(va),e&&s(ce)}}}const Uo={local:"understanding-pipelines-models-and-schedulers",sections:[{local:"deconstruct-a-basic-pipeline",title:"Deconstruct a basic pipeline"},{local:"deconstruct-the-stable-diffusion-pipeline",sections:[{local:"create-text-embeddings",title:"Create text embeddings"},{local:"create-random-noise",title:"Create random noise"},{local:"denoise-the-image",title:"Denoise the image"},{local:"decode-the-image",title:"Decode the image"}],title:"Deconstruct the Stable Diffusion pipeline"},{local:"next-steps",title:"Next steps"}],title:"Understanding pipelines, models and schedulers"};function Oo(Q){return No(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Go extends Do{constructor(u){super();So(this,u,Oo,zo,To,{})}}export{Go as default,Uo as metadata};

Xet Storage Details

Size:
57.8 kB
·
Xet hash:
5caf49e965dfcbe1eab8a02f9dda4040be9fcb4470b5458f9e423b63300284a6

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.