Buckets:
hf-doc-build/doc / diffusers /main /en /_app /pages /using-diffusers /controlling_generation.mdx-hf-doc-builder.js
| import{S as T1,i as D1,s as k1,e as r,k as f,w as _,t as l,M as S1,c as i,d as a,m as h,a as o,x as w,h as s,b as n,G as e,g as d,y as b,q as x,o as P,B as y,v as I1}from"../../chunks/vendor-hf-doc-builder.js";import{T as $1}from"../../chunks/Tip-hf-doc-builder.js";import{I as A}from"../../chunks/IconCopyLink-hf-doc-builder.js";function N1(ga){let g,D,E,$,Y;return{c(){g=r("p"),D=l(`Pix2Pix Zero is the first model that allows \u201Czero-shot\u201D image editing. This means that the model | |
| can edit an image in less than a minute on a consumer GPU as shown `),E=r("a"),$=l("here"),Y=l("."),this.h()},l(T){g=i(T,"P",{});var V=o(g);D=s(V,`Pix2Pix Zero is the first model that allows \u201Czero-shot\u201D image editing. This means that the model | |
| can edit an image in less than a minute on a consumer GPU as shown `),E=i(V,"A",{href:!0});var ve=o(E);$=s(ve,"here"),ve.forEach(a),Y=s(V,"."),V.forEach(a),this.h()},h(){n(E,"href","../api/pipelines/pix2pix_zero#usage-example")},m(T,V){d(T,g,V),e(g,D),e(g,E),e(E,$),e(g,Y)},d(T){T&&a(g)}}}function G1(ga){let g,D;return{c(){g=r("p"),D=l(`An important distinction between methods like InstructPix2Pix and Pix2Pix Zero is that the former | |
| involves fine-tuning the pre-trained weights while the latter does not. This means that you can | |
| apply Pix2Pix Zero to any of the available Stable Diffusion models.`)},l(E){g=i(E,"P",{});var $=o(g);D=s($,`An important distinction between methods like InstructPix2Pix and Pix2Pix Zero is that the former | |
| involves fine-tuning the pre-trained weights while the latter does not. This means that you can | |
| apply Pix2Pix Zero to any of the available Stable Diffusion models.`),$.forEach(a)},m(E,$){d(E,g,$),e(g,D)},d(E){E&&a(g)}}}function C1(ga){let g,D,E,$,Y,T,V,ve,zs,Pn,Ea,Bs,yn,_a,Zs,An,wa,Ws,$n,k,Us,ro,Os,Vs,Pt,Js,Ys,yt,Ks,Qs,Tn,ba,Xs,Dn,xa,ef,kn,Pa,tf,Sn,u,io,ya,af,rf,oo,Aa,of,nf,no,$a,lf,sf,lo,Ta,ff,hf,so,Da,pf,df,fo,ka,uf,cf,ho,Sa,mf,vf,po,Ia,gf,Ef,uo,Na,_f,wf,co,Ga,bf,xf,mo,Ca,Pf,yf,vo,qa,Af,$f,go,Ra,Tf,Df,Eo,La,kf,Sf,_o,Ma,If,Nf,wo,Fa,Gf,In,ja,Cf,Nn,ge,bo,I,Ha,xo,qf,Rf,za,Po,Lf,Mf,Ba,At,Ff,jf,Hf,zf,Za,yo,Bf,Zf,c,N,Wa,Ua,Wf,Uf,Oa,Of,Vf,Va,Jf,Yf,S,Kf,Qf,Xf,eh,th,ah,rh,ih,G,Ja,Ya,oh,nh,Ka,lh,sh,Qa,fh,hh,Ao,ph,C,Xa,er,dh,uh,tr,ch,mh,ar,vh,gh,$o,Eh,q,rr,ir,_h,wh,or,bh,xh,nr,Ph,yh,To,Ah,R,lr,sr,$h,Th,fr,Dh,kh,hr,Sh,Ih,Do,Nh,L,pr,dr,Gh,Ch,ur,qh,Rh,cr,Lh,Mh,ko,Fh,M,mr,vr,jh,Hh,gr,zh,Bh,Er,Zh,Wh,So,Uh,F,_r,wr,Oh,Vh,br,Jh,Yh,xr,Kh,Qh,Io,Xh,j,Pr,yr,ep,tp,Ar,ap,rp,$r,ip,op,No,np,H,Tr,Dr,lp,sp,kr,fp,hp,Sr,pp,dp,J,up,cp,mp,vp,gp,Ep,z,Ir,Nr,_p,wp,Gr,bp,xp,Cr,Pp,yp,Go,Ap,B,qr,Rr,$p,Tp,Lr,Dp,kp,Mr,Sp,Ip,Co,Np,Z,Fr,jr,Gp,Cp,Hr,qp,Rp,zr,Lp,Mp,qo,Fp,W,Br,Zr,jp,Hp,Wr,zp,Bp,Ur,Zp,Wp,Ro,Up,U,Or,Vr,Op,Vp,Jr,Jp,Yp,Yr,Kp,Qp,Lo,Xp,O,Kr,Qr,ed,td,Xr,ad,rd,ei,id,od,Mo,Gn,K,Ee,Fo,$t,nd,jo,ld,Cn,ti,Tt,sd,qn,Q,ai,fd,hd,Dt,pd,dd,Rn,_e,ud,ri,cd,md,Ln,X,we,Ho,kt,vd,zo,gd,Mn,ii,St,Ed,Fn,It,oi,_d,wd,jn,ni,bd,Hn,li,xd,zn,be,Nt,Pd,Gt,yd,Ad,$d,Ct,Td,qt,Dd,kd,Bn,xe,Zn,Pe,Sd,si,Id,Nd,Wn,ye,Gd,fi,Cd,qd,Un,ee,Ae,Bo,Rt,Rd,Zo,Ld,On,hi,Lt,Md,Vn,Mt,pi,Fd,jd,Jn,di,Hd,Yn,$e,zd,ui,Bd,Zd,Kn,Te,Wd,ci,Ud,Od,Qn,te,De,Wo,Ft,Vd,Uo,Jd,Xn,mi,jt,Yd,el,vi,Kd,tl,gi,Qd,al,Ei,Xd,rl,ke,eu,_i,tu,au,il,ae,Se,Oo,Ht,ru,Vo,iu,ol,wi,zt,ou,nl,Bt,bi,nu,lu,ll,xi,su,sl,Ie,fu,Pi,hu,pu,fl,re,Ne,Jo,Zt,du,Yo,uu,hl,yi,Wt,cu,pl,Ut,Ai,mu,vu,dl,$i,gu,ul,Ge,Eu,Ti,_u,wu,cl,Ce,ml,ie,qe,Ko,Ot,bu,Qo,xu,vl,Di,Vt,Pu,gl,Re,yu,ki,Au,$u,El,Le,Tu,Si,Du,ku,_l,oe,Me,Xo,Jt,Su,en,Iu,wl,Ii,Nu,bl,ne,Fe,tn,Yt,Gu,an,Cu,xl,Kt,Ni,qu,Ru,Pl,je,Lu,Gi,Mu,Fu,yl,le,He,rn,Qt,ju,on,Hu,Al,Xt,Ci,zu,Bu,$l,ze,Zu,qi,Wu,Uu,Tl,se,Be,nn,ea,Ou,ln,Vu,Dl,Ri,ta,Ju,kl,fe,Li,Yu,Ku,Mi,Qu,Xu,Sl,Ze,ec,Fi,tc,ac,Il,he,We,sn,aa,rc,fn,ic,Nl,ji,oc,Gl,Ue,nc,Hi,lc,sc,Cl,pe,Oe,hn,ra,fc,pn,hc,ql,ia,zi,pc,dc,Rl,Ve,uc,Bi,cc,mc,Ll,de,Je,dn,oa,vc,un,gc,Ml,Zi,na,Ec,Fl,Ye,_c,Wi,wc,bc,jl,Ke,xc,Ui,Pc,yc,Hl,ue,Qe,cn,la,Ac,mn,$c,zl,Oi,sa,Tc,Bl,fa,Vi,Dc,kc,Zl,Xe,Sc,Ji,Ic,Nc,Wl,ce,et,vn,ha,Gc,gn,Cc,Ul,Yi,pa,qc,Ol,da,Ki,Rc,Lc,Vl,tt,Mc,Qi,Fc,jc,Jl,me,at,En,ua,Hc,_n,zc,Yl,Xi,ca,Bc,Kl,ma,eo,Zc,Wc,Ql,rt,Uc,to,Oc,Vc,Xl;return T=new A({}),$t=new A({}),kt=new A({}),xe=new $1({props:{$$slots:{default:[N1]},$$scope:{ctx:ga}}}),Rt=new A({}),Ft=new A({}),Ht=new A({}),Zt=new A({}),Ce=new $1({props:{$$slots:{default:[G1]},$$scope:{ctx:ga}}}),Ot=new A({}),Jt=new A({}),Yt=new A({}),Qt=new A({}),ea=new A({}),aa=new A({}),ra=new A({}),oa=new A({}),la=new A({}),ha=new A({}),ua=new A({}),{c(){g=r("meta"),D=f(),E=r("h1"),$=r("a"),Y=r("span"),_(T.$$.fragment),V=f(),ve=r("span"),zs=l("Controlled generation"),Pn=f(),Ea=r("p"),Bs=l("Controlling outputs generated by diffusion models has been long pursued by the community and is now an active research topic. In many popular diffusion models, subtle changes in inputs, both images and text prompts, can drastically change outputs. In an ideal world we want to be able to control how semantics are preserved and changed."),yn=f(),_a=r("p"),Zs=l("Most examples of preserving semantics reduce to being able to accurately map a change in input to a change in output. I.e. adding an adjective to a subject in a prompt preserves the entire image, only modifying the changed subject. Or, image variation of a particular subject preserves the subject\u2019s pose."),An=f(),wa=r("p"),Ws=l("Additionally, there are qualities of generated images that we would like to influence beyond semantic preservation. I.e. in general, we would like our outputs to be of good quality, adhere to a particular style, or be realistic."),$n=f(),k=r("p"),Us=l("We will document some of the techniques "),ro=r("code"),Os=l("diffusers"),Vs=l(" supports to control generation of diffusion models. Much is cutting edge research and can be quite nuanced. If something needs clarifying or you have a suggestion, don\u2019t hesitate to open a discussion on the "),Pt=r("a"),Js=l("forum"),Ys=l(" or a "),yt=r("a"),Ks=l("GitHub issue"),Qs=l("."),Tn=f(),ba=r("p"),Xs=l("We provide a high level explanation of how the generation can be controlled as well as a snippet of the technicals. For more in depth explanations on the technicals, the original papers which are linked from the pipelines are always the best resources."),Dn=f(),xa=r("p"),ef=l("Depending on the use case, one should choose a technique accordingly. In many cases, these techniques can be combined. For example, one can combine Textual Inversion with SEGA to provide more semantic guidance to the outputs generated using Textual Inversion."),kn=f(),Pa=r("p"),tf=l("Unless otherwise mentioned, these are techniques that work with existing models and don\u2019t require their own weights."),Sn=f(),u=r("ol"),io=r("li"),ya=r("a"),af=l("Instruct Pix2Pix"),rf=f(),oo=r("li"),Aa=r("a"),of=l("Pix2Pix Zero"),nf=f(),no=r("li"),$a=r("a"),lf=l("Attend and Excite"),sf=f(),lo=r("li"),Ta=r("a"),ff=l("Semantic Guidance"),hf=f(),so=r("li"),Da=r("a"),pf=l("Self-attention Guidance"),df=f(),fo=r("li"),ka=r("a"),uf=l("Depth2Image"),cf=f(),ho=r("li"),Sa=r("a"),mf=l("MultiDiffusion Panorama"),vf=f(),po=r("li"),Ia=r("a"),gf=l("DreamBooth"),Ef=f(),uo=r("li"),Na=r("a"),_f=l("Textual Inversion"),wf=f(),co=r("li"),Ga=r("a"),bf=l("ControlNet"),xf=f(),mo=r("li"),Ca=r("a"),Pf=l("Prompt Weighting"),yf=f(),vo=r("li"),qa=r("a"),Af=l("Custom Diffusion"),$f=f(),go=r("li"),Ra=r("a"),Tf=l("Model Editing"),Df=f(),Eo=r("li"),La=r("a"),kf=l("DiffEdit"),Sf=f(),_o=r("li"),Ma=r("a"),If=l("T2I-Adapter"),Nf=f(),wo=r("li"),Fa=r("a"),Gf=l("FABRIC"),In=f(),ja=r("p"),Cf=l("For convenience, we provide a table to denote which methods are inference-only and which require fine-tuning/training."),Nn=f(),ge=r("table"),bo=r("thead"),I=r("tr"),Ha=r("th"),xo=r("strong"),qf=l("Method"),Rf=f(),za=r("th"),Po=r("strong"),Lf=l("Inference only"),Mf=f(),Ba=r("th"),At=r("strong"),Ff=l("Requires training /"),jf=r("br"),Hf=l(" fine-tuning"),zf=f(),Za=r("th"),yo=r("strong"),Bf=l("Comments"),Zf=f(),c=r("tbody"),N=r("tr"),Wa=r("td"),Ua=r("a"),Wf=l("Instruct Pix2Pix"),Uf=f(),Oa=r("td"),Of=l("\u2705"),Vf=f(),Va=r("td"),Jf=l("\u274C"),Yf=f(),S=r("td"),Kf=l("Can additionally be"),Qf=r("br"),Xf=l("fine-tuned for better "),eh=r("br"),th=l("performance on specific "),ah=r("br"),rh=l("edit instructions."),ih=f(),G=r("tr"),Ja=r("td"),Ya=r("a"),oh=l("Pix2Pix Zero"),nh=f(),Ka=r("td"),lh=l("\u2705"),sh=f(),Qa=r("td"),fh=l("\u274C"),hh=f(),Ao=r("td"),ph=f(),C=r("tr"),Xa=r("td"),er=r("a"),dh=l("Attend and Excite"),uh=f(),tr=r("td"),ch=l("\u2705"),mh=f(),ar=r("td"),vh=l("\u274C"),gh=f(),$o=r("td"),Eh=f(),q=r("tr"),rr=r("td"),ir=r("a"),_h=l("Semantic Guidance"),wh=f(),or=r("td"),bh=l("\u2705"),xh=f(),nr=r("td"),Ph=l("\u274C"),yh=f(),To=r("td"),Ah=f(),R=r("tr"),lr=r("td"),sr=r("a"),$h=l("Self-attention Guidance"),Th=f(),fr=r("td"),Dh=l("\u2705"),kh=f(),hr=r("td"),Sh=l("\u274C"),Ih=f(),Do=r("td"),Nh=f(),L=r("tr"),pr=r("td"),dr=r("a"),Gh=l("Depth2Image"),Ch=f(),ur=r("td"),qh=l("\u2705"),Rh=f(),cr=r("td"),Lh=l("\u274C"),Mh=f(),ko=r("td"),Fh=f(),M=r("tr"),mr=r("td"),vr=r("a"),jh=l("MultiDiffusion Panorama"),Hh=f(),gr=r("td"),zh=l("\u2705"),Bh=f(),Er=r("td"),Zh=l("\u274C"),Wh=f(),So=r("td"),Uh=f(),F=r("tr"),_r=r("td"),wr=r("a"),Oh=l("DreamBooth"),Vh=f(),br=r("td"),Jh=l("\u274C"),Yh=f(),xr=r("td"),Kh=l("\u2705"),Qh=f(),Io=r("td"),Xh=f(),j=r("tr"),Pr=r("td"),yr=r("a"),ep=l("Textual Inversion"),tp=f(),Ar=r("td"),ap=l("\u274C"),rp=f(),$r=r("td"),ip=l("\u2705"),op=f(),No=r("td"),np=f(),H=r("tr"),Tr=r("td"),Dr=r("a"),lp=l("ControlNet"),sp=f(),kr=r("td"),fp=l("\u2705"),hp=f(),Sr=r("td"),pp=l("\u274C"),dp=f(),J=r("td"),up=l("A ControlNet can be "),cp=r("br"),mp=l("trained/fine-tuned on"),vp=r("br"),gp=l("a custom conditioning."),Ep=f(),z=r("tr"),Ir=r("td"),Nr=r("a"),_p=l("Prompt Weighting"),wp=f(),Gr=r("td"),bp=l("\u2705"),xp=f(),Cr=r("td"),Pp=l("\u274C"),yp=f(),Go=r("td"),Ap=f(),B=r("tr"),qr=r("td"),Rr=r("a"),$p=l("Custom Diffusion"),Tp=f(),Lr=r("td"),Dp=l("\u274C"),kp=f(),Mr=r("td"),Sp=l("\u2705"),Ip=f(),Co=r("td"),Np=f(),Z=r("tr"),Fr=r("td"),jr=r("a"),Gp=l("Model Editing"),Cp=f(),Hr=r("td"),qp=l("\u2705"),Rp=f(),zr=r("td"),Lp=l("\u274C"),Mp=f(),qo=r("td"),Fp=f(),W=r("tr"),Br=r("td"),Zr=r("a"),jp=l("DiffEdit"),Hp=f(),Wr=r("td"),zp=l("\u2705"),Bp=f(),Ur=r("td"),Zp=l("\u274C"),Wp=f(),Ro=r("td"),Up=f(),U=r("tr"),Or=r("td"),Vr=r("a"),Op=l("T2I-Adapter"),Vp=f(),Jr=r("td"),Jp=l("\u2705"),Yp=f(),Yr=r("td"),Kp=l("\u274C"),Qp=f(),Lo=r("td"),Xp=f(),O=r("tr"),Kr=r("td"),Qr=r("a"),ed=l("Fabric"),td=f(),Xr=r("td"),ad=l("\u2705"),rd=f(),ei=r("td"),id=l("\u274C"),od=f(),Mo=r("td"),Gn=f(),K=r("h2"),Ee=r("a"),Fo=r("span"),_($t.$$.fragment),nd=f(),jo=r("span"),ld=l("Instruct Pix2Pix"),Cn=f(),ti=r("p"),Tt=r("a"),sd=l("Paper"),qn=f(),Q=r("p"),ai=r("a"),fd=l("Instruct Pix2Pix"),hd=l(` is fine-tuned from stable diffusion to support editing input images. It takes as inputs an image and a prompt describing an edit, and it outputs the edited image. | |
| Instruct Pix2Pix has been explicitly trained to work well with `),Dt=r("a"),pd=l("InstructGPT"),dd=l("-like prompts."),Rn=f(),_e=r("p"),ud=l("See "),ri=r("a"),cd=l("here"),md=l(" for more information on how to use it."),Ln=f(),X=r("h2"),we=r("a"),Ho=r("span"),_(kt.$$.fragment),vd=f(),zo=r("span"),gd=l("Pix2Pix Zero"),Mn=f(),ii=r("p"),St=r("a"),Ed=l("Paper"),Fn=f(),It=r("p"),oi=r("a"),_d=l("Pix2Pix Zero"),wd=l(" allows modifying an image so that one concept or subject is translated to another one while preserving general image semantics."),jn=f(),ni=r("p"),bd=l("The denoising process is guided from one conceptual embedding towards another conceptual embedding. The intermediate latents are optimized during the denoising process to push the attention maps towards reference attention maps. The reference attention maps are from the denoising process of the input image and are used to encourage semantic preservation."),Hn=f(),li=r("p"),xd=l("Pix2Pix Zero can be used both to edit synthetic images as well as real images."),zn=f(),be=r("ul"),Nt=r("li"),Pd=l(`To edit synthetic images, one first generates an image given a caption. | |
| Next, we generate image captions for the concept that shall be edited and for the new target concept. We can use a model like `),Gt=r("a"),yd=l("Flan-T5"),Ad=l(" for this purpose. Then, \u201Cmean\u201D prompt embeddings for both the source and target concepts are created via the text encoder. Finally, the pix2pix-zero algorithm is used to edit the synthetic image."),$d=f(),Ct=r("li"),Td=l("To edit a real image, one first generates an image caption using a model like "),qt=r("a"),Dd=l("BLIP"),kd=l(". Then one applies ddim inversion on the prompt and image to generate \u201Cinverse\u201D latents. Similar to before, \u201Cmean\u201D prompt embeddings for both source and target concepts are created and finally the pix2pix-zero algorithm in combination with the \u201Cinverse\u201D latents is used to edit the image."),Bn=f(),_(xe.$$.fragment),Zn=f(),Pe=r("p"),Sd=l(`As mentioned above, Pix2Pix Zero includes optimizing the latents (and not any of the UNet, VAE, or the text encoder) to steer the generation toward a specific concept. This means that the overall | |
| pipeline might require more memory than a standard `),si=r("a"),Id=l("StableDiffusionPipeline"),Nd=l("."),Wn=f(),ye=r("p"),Gd=l("See "),fi=r("a"),Cd=l("here"),qd=l(" for more information on how to use it."),Un=f(),ee=r("h2"),Ae=r("a"),Bo=r("span"),_(Rt.$$.fragment),Rd=f(),Zo=r("span"),Ld=l("Attend and Excite"),On=f(),hi=r("p"),Lt=r("a"),Md=l("Paper"),Vn=f(),Mt=r("p"),pi=r("a"),Fd=l("Attend and Excite"),jd=l(" allows subjects in the prompt to be faithfully represented in the final image."),Jn=f(),di=r("p"),Hd=l("A set of token indices are given as input, corresponding to the subjects in the prompt that need to be present in the image. During denoising, each token index is guaranteed to have a minimum attention threshold for at least one patch of the image. The intermediate latents are iteratively optimized during the denoising process to strengthen the attention of the most neglected subject token until the attention threshold is passed for all subject tokens."),Yn=f(),$e=r("p"),zd=l("Like Pix2Pix Zero, Attend and Excite also involves a mini optimization loop (leaving the pre-trained weights untouched) in its pipeline and can require more memory than the usual "),ui=r("a"),Bd=l("StableDiffusionPipeline"),Zd=l("."),Kn=f(),Te=r("p"),Wd=l("See "),ci=r("a"),Ud=l("here"),Od=l(" for more information on how to use it."),Qn=f(),te=r("h2"),De=r("a"),Wo=r("span"),_(Ft.$$.fragment),Vd=f(),Uo=r("span"),Jd=l("Semantic Guidance (SEGA)"),Xn=f(),mi=r("p"),jt=r("a"),Yd=l("Paper"),el=f(),vi=r("p"),Kd=l("SEGA allows applying or removing one or more concepts from an image. The strength of the concept can also be controlled. I.e. the smile concept can be used to incrementally increase or decrease the smile of a portrait."),tl=f(),gi=r("p"),Qd=l("Similar to how classifier free guidance provides guidance via empty prompt inputs, SEGA provides guidance on conceptual prompts. Multiple of these conceptual prompts can be applied simultaneously. Each conceptual prompt can either add or remove their concept depending on if the guidance is applied positively or negatively."),al=f(),Ei=r("p"),Xd=l("Unlike Pix2Pix Zero or Attend and Excite, SEGA directly interacts with the diffusion process instead of performing any explicit gradient-based optimization."),rl=f(),ke=r("p"),eu=l("See "),_i=r("a"),tu=l("here"),au=l(" for more information on how to use it."),il=f(),ae=r("h2"),Se=r("a"),Oo=r("span"),_(Ht.$$.fragment),ru=f(),Vo=r("span"),iu=l("Self-attention Guidance (SAG)"),ol=f(),wi=r("p"),zt=r("a"),ou=l("Paper"),nl=f(),Bt=r("p"),bi=r("a"),nu=l("Self-attention Guidance"),lu=l(" improves the general quality of images."),ll=f(),xi=r("p"),su=l("SAG provides guidance from predictions not conditioned on high-frequency details to fully conditioned images. The high frequency details are extracted out of the UNet self-attention maps."),sl=f(),Ie=r("p"),fu=l("See "),Pi=r("a"),hu=l("here"),pu=l(" for more information on how to use it."),fl=f(),re=r("h2"),Ne=r("a"),Jo=r("span"),_(Zt.$$.fragment),du=f(),Yo=r("span"),uu=l("Depth2Image"),hl=f(),yi=r("p"),Wt=r("a"),cu=l("Project"),pl=f(),Ut=r("p"),Ai=r("a"),mu=l("Depth2Image"),vu=l(" is fine-tuned from Stable Diffusion to better preserve semantics for text guided image variation."),dl=f(),$i=r("p"),gu=l("It conditions on a monocular depth estimate of the original image."),ul=f(),Ge=r("p"),Eu=l("See "),Ti=r("a"),_u=l("here"),wu=l(" for more information on how to use it."),cl=f(),_(Ce.$$.fragment),ml=f(),ie=r("h2"),qe=r("a"),Ko=r("span"),_(Ot.$$.fragment),bu=f(),Qo=r("span"),xu=l("MultiDiffusion Panorama"),vl=f(),Di=r("p"),Vt=r("a"),Pu=l("Paper"),gl=f(),Re=r("p"),yu=l(`MultiDiffusion defines a new generation process over a pre-trained diffusion model. This process binds together multiple diffusion generation methods that can be readily applied to generate high quality and diverse images. Results adhere to user-provided controls, such as desired aspect ratio (e.g., panorama), and spatial guiding signals, ranging from tight segmentation masks to bounding boxes. | |
| `),ki=r("a"),Au=l("MultiDiffusion Panorama"),$u=l(" allows to generate high-quality images at arbitrary aspect ratios (e.g., panoramas)."),El=f(),Le=r("p"),Tu=l("See "),Si=r("a"),Du=l("here"),ku=l(" for more information on how to use it to generate panoramic images."),_l=f(),oe=r("h2"),Me=r("a"),Xo=r("span"),_(Jt.$$.fragment),Su=f(),en=r("span"),Iu=l("Fine-tuning your own models"),wl=f(),Ii=r("p"),Nu=l("In addition to pre-trained models, Diffusers has training scripts for fine-tuning models on user-provided data."),bl=f(),ne=r("h2"),Fe=r("a"),tn=r("span"),_(Yt.$$.fragment),Gu=f(),an=r("span"),Cu=l("DreamBooth"),xl=f(),Kt=r("p"),Ni=r("a"),qu=l("DreamBooth"),Ru=l(" fine-tunes a model to teach it about a new subject. I.e. a few pictures of a person can be used to generate images of that person in different styles."),Pl=f(),je=r("p"),Lu=l("See "),Gi=r("a"),Mu=l("here"),Fu=l(" for more information on how to use it."),yl=f(),le=r("h2"),He=r("a"),rn=r("span"),_(Qt.$$.fragment),ju=f(),on=r("span"),Hu=l("Textual Inversion"),Al=f(),Xt=r("p"),Ci=r("a"),zu=l("Textual Inversion"),Bu=l(" fine-tunes a model to teach it about a new concept. I.e. a few pictures of a style of artwork can be used to generate images in that style."),$l=f(),ze=r("p"),Zu=l("See "),qi=r("a"),Wu=l("here"),Uu=l(" for more information on how to use it."),Tl=f(),se=r("h2"),Be=r("a"),nn=r("span"),_(ea.$$.fragment),Ou=f(),ln=r("span"),Vu=l("ControlNet"),Dl=f(),Ri=r("p"),ta=r("a"),Ju=l("Paper"),kl=f(),fe=r("p"),Li=r("a"),Yu=l("ControlNet"),Ku=l(` is an auxiliary network which adds an extra condition. | |
| `),Mi=r("a"),Qu=l("ControlNet"),Xu=l(` is an auxiliary network which adds an extra condition. | |
| There are 8 canonical pre-trained ControlNets trained on different conditionings such as edge detection, scribbles, | |
| depth maps, and semantic segmentations.`),Sl=f(),Ze=r("p"),ec=l("See "),Fi=r("a"),tc=l("here"),ac=l(" for more information on how to use it."),Il=f(),he=r("h2"),We=r("a"),sn=r("span"),_(aa.$$.fragment),rc=f(),fn=r("span"),ic=l("Prompt Weighting"),Nl=f(),ji=r("p"),oc=l(`Prompt weighting is a simple technique that puts more attention weight on certain parts of the text | |
| input.`),Gl=f(),Ue=r("p"),nc=l("For a more in-detail explanation and examples, see "),Hi=r("a"),lc=l("here"),sc=l("."),Cl=f(),pe=r("h2"),Oe=r("a"),hn=r("span"),_(ra.$$.fragment),fc=f(),pn=r("span"),hc=l("Custom Diffusion"),ql=f(),ia=r("p"),zi=r("a"),pc=l("Custom Diffusion"),dc=l(` only fine-tunes the cross-attention maps of a pre-trained | |
| text-to-image diffusion model. It also allows for additionally performing textual inversion. It supports | |
| multi-concept training by design. Like DreamBooth and Textual Inversion, Custom Diffusion is also used to | |
| teach a pre-trained text-to-image diffusion model about new concepts to generate outputs involving the | |
| concept(s) of interest.`),Rl=f(),Ve=r("p"),uc=l("For more details, check out our "),Bi=r("a"),cc=l("official doc"),mc=l("."),Ll=f(),de=r("h2"),Je=r("a"),dn=r("span"),_(oa.$$.fragment),vc=f(),un=r("span"),gc=l("Model Editing"),Ml=f(),Zi=r("p"),na=r("a"),Ec=l("Paper"),Fl=f(),Ye=r("p"),_c=l("The "),Wi=r("a"),wc=l("text-to-image model editing pipeline"),bc=l(` helps you mitigate some of the incorrect implicit assumptions a pre-trained text-to-image | |
| diffusion model might make about the subjects present in the input prompt. For example, if you prompt Stable Diffusion to generate images for \u201CA pack of roses\u201D, the roses in the generated images | |
| are more likely to be red. This pipeline helps you change that assumption.`),jl=f(),Ke=r("p"),xc=l("To know more details, check out the "),Ui=r("a"),Pc=l("official doc"),yc=l("."),Hl=f(),ue=r("h2"),Qe=r("a"),cn=r("span"),_(la.$$.fragment),Ac=f(),mn=r("span"),$c=l("DiffEdit"),zl=f(),Oi=r("p"),sa=r("a"),Tc=l("Paper"),Bl=f(),fa=r("p"),Vi=r("a"),Dc=l("DiffEdit"),kc=l(` allows for semantic editing of input images along with | |
| input prompts while preserving the original input images as much as possible.`),Zl=f(),Xe=r("p"),Sc=l("To know more details, check out the "),Ji=r("a"),Ic=l("official doc"),Nc=l("."),Wl=f(),ce=r("h2"),et=r("a"),vn=r("span"),_(ha.$$.fragment),Gc=f(),gn=r("span"),Cc=l("T2I-Adapter"),Ul=f(),Yi=r("p"),pa=r("a"),qc=l("Paper"),Ol=f(),da=r("p"),Ki=r("a"),Rc=l("T2I-Adapter"),Lc=l(` is an auxiliary network which adds an extra condition. | |
| There are 8 canonical pre-trained adapters trained on different conditionings such as edge detection, sketch, | |
| depth maps, and semantic segmentations.`),Vl=f(),tt=r("p"),Mc=l("See "),Qi=r("a"),Fc=l("here"),jc=l(" for more information on how to use it."),Jl=f(),me=r("h2"),at=r("a"),En=r("span"),_(ua.$$.fragment),Hc=f(),_n=r("span"),zc=l("Fabric"),Yl=f(),Xi=r("p"),ca=r("a"),Bc=l("Paper"),Kl=f(),ma=r("p"),eo=r("a"),Zc=l("Fabric"),Wc=l(` is a training-free | |
| approach applicable to a wide range of popular diffusion models, which exploits | |
| the self-attention layer present in the most widely used architectures to condition | |
| the diffusion process on a set of feedback images.`),Ql=f(),rt=r("p"),Uc=l("To know more details, check out the "),to=r("a"),Oc=l("official doc"),Vc=l("."),this.h()},l(t){const p=S1('[data-svelte="svelte-1phssyn"]',document.head);g=i(p,"META",{name:!0,content:!0}),p.forEach(a),D=h(t),E=i(t,"H1",{class:!0});var va=o(E);$=i(va,"A",{id:!0,class:!0,href:!0});var wn=o($);Y=i(wn,"SPAN",{});var om=o(Y);w(T.$$.fragment,om),om.forEach(a),wn.forEach(a),V=h(va),ve=i(va,"SPAN",{});var nm=o(ve);zs=s(nm,"Controlled generation"),nm.forEach(a),va.forEach(a),Pn=h(t),Ea=i(t,"P",{});var lm=o(Ea);Bs=s(lm,"Controlling outputs generated by diffusion models has been long pursued by the community and is now an active research topic. In many popular diffusion models, subtle changes in inputs, both images and text prompts, can drastically change outputs. In an ideal world we want to be able to control how semantics are preserved and changed."),lm.forEach(a),yn=h(t),_a=i(t,"P",{});var sm=o(_a);Zs=s(sm,"Most examples of preserving semantics reduce to being able to accurately map a change in input to a change in output. I.e. adding an adjective to a subject in a prompt preserves the entire image, only modifying the changed subject. Or, image variation of a particular subject preserves the subject\u2019s pose."),sm.forEach(a),An=h(t),wa=i(t,"P",{});var fm=o(wa);Ws=s(fm,"Additionally, there are qualities of generated images that we would like to influence beyond semantic preservation. I.e. in general, we would like our outputs to be of good quality, adhere to a particular style, or be realistic."),fm.forEach(a),$n=h(t),k=i(t,"P",{});var it=o(k);Us=s(it,"We will document some of the techniques "),ro=i(it,"CODE",{});var hm=o(ro);Os=s(hm,"diffusers"),hm.forEach(a),Vs=s(it," supports to control generation of diffusion models. Much is cutting edge research and can be quite nuanced. If something needs clarifying or you have a suggestion, don\u2019t hesitate to open a discussion on the "),Pt=i(it,"A",{href:!0,rel:!0});var pm=o(Pt);Js=s(pm,"forum"),pm.forEach(a),Ys=s(it," or a "),yt=i(it,"A",{href:!0,rel:!0});var dm=o(yt);Ks=s(dm,"GitHub issue"),dm.forEach(a),Qs=s(it,"."),it.forEach(a),Tn=h(t),ba=i(t,"P",{});var um=o(ba);Xs=s(um,"We provide a high level explanation of how the generation can be controlled as well as a snippet of the technicals. For more in depth explanations on the technicals, the original papers which are linked from the pipelines are always the best resources."),um.forEach(a),Dn=h(t),xa=i(t,"P",{});var cm=o(xa);ef=s(cm,"Depending on the use case, one should choose a technique accordingly. In many cases, these techniques can be combined. For example, one can combine Textual Inversion with SEGA to provide more semantic guidance to the outputs generated using Textual Inversion."),cm.forEach(a),kn=h(t),Pa=i(t,"P",{});var mm=o(Pa);tf=s(mm,"Unless otherwise mentioned, these are techniques that work with existing models and don\u2019t require their own weights."),mm.forEach(a),Sn=h(t),u=i(t,"OL",{});var m=o(u);io=i(m,"LI",{});var vm=o(io);ya=i(vm,"A",{href:!0});var gm=o(ya);af=s(gm,"Instruct Pix2Pix"),gm.forEach(a),vm.forEach(a),rf=h(m),oo=i(m,"LI",{});var Em=o(oo);Aa=i(Em,"A",{href:!0});var _m=o(Aa);of=s(_m,"Pix2Pix Zero"),_m.forEach(a),Em.forEach(a),nf=h(m),no=i(m,"LI",{});var wm=o(no);$a=i(wm,"A",{href:!0});var bm=o($a);lf=s(bm,"Attend and Excite"),bm.forEach(a),wm.forEach(a),sf=h(m),lo=i(m,"LI",{});var xm=o(lo);Ta=i(xm,"A",{href:!0});var Pm=o(Ta);ff=s(Pm,"Semantic Guidance"),Pm.forEach(a),xm.forEach(a),hf=h(m),so=i(m,"LI",{});var ym=o(so);Da=i(ym,"A",{href:!0});var Am=o(Da);pf=s(Am,"Self-attention Guidance"),Am.forEach(a),ym.forEach(a),df=h(m),fo=i(m,"LI",{});var $m=o(fo);ka=i($m,"A",{href:!0});var Tm=o(ka);uf=s(Tm,"Depth2Image"),Tm.forEach(a),$m.forEach(a),cf=h(m),ho=i(m,"LI",{});var Dm=o(ho);Sa=i(Dm,"A",{href:!0});var km=o(Sa);mf=s(km,"MultiDiffusion Panorama"),km.forEach(a),Dm.forEach(a),vf=h(m),po=i(m,"LI",{});var Sm=o(po);Ia=i(Sm,"A",{href:!0});var Im=o(Ia);gf=s(Im,"DreamBooth"),Im.forEach(a),Sm.forEach(a),Ef=h(m),uo=i(m,"LI",{});var Nm=o(uo);Na=i(Nm,"A",{href:!0});var Gm=o(Na);_f=s(Gm,"Textual Inversion"),Gm.forEach(a),Nm.forEach(a),wf=h(m),co=i(m,"LI",{});var Cm=o(co);Ga=i(Cm,"A",{href:!0});var qm=o(Ga);bf=s(qm,"ControlNet"),qm.forEach(a),Cm.forEach(a),xf=h(m),mo=i(m,"LI",{});var Rm=o(mo);Ca=i(Rm,"A",{href:!0});var Lm=o(Ca);Pf=s(Lm,"Prompt Weighting"),Lm.forEach(a),Rm.forEach(a),yf=h(m),vo=i(m,"LI",{});var Mm=o(vo);qa=i(Mm,"A",{href:!0});var Fm=o(qa);Af=s(Fm,"Custom Diffusion"),Fm.forEach(a),Mm.forEach(a),$f=h(m),go=i(m,"LI",{});var jm=o(go);Ra=i(jm,"A",{href:!0});var Hm=o(Ra);Tf=s(Hm,"Model Editing"),Hm.forEach(a),jm.forEach(a),Df=h(m),Eo=i(m,"LI",{});var zm=o(Eo);La=i(zm,"A",{href:!0});var Bm=o(La);kf=s(Bm,"DiffEdit"),Bm.forEach(a),zm.forEach(a),Sf=h(m),_o=i(m,"LI",{});var Zm=o(_o);Ma=i(Zm,"A",{href:!0});var Wm=o(Ma);If=s(Wm,"T2I-Adapter"),Wm.forEach(a),Zm.forEach(a),Nf=h(m),wo=i(m,"LI",{});var Um=o(wo);Fa=i(Um,"A",{href:!0});var Om=o(Fa);Gf=s(Om,"FABRIC"),Om.forEach(a),Um.forEach(a),m.forEach(a),In=h(t),ja=i(t,"P",{});var Vm=o(ja);Cf=s(Vm,"For convenience, we provide a table to denote which methods are inference-only and which require fine-tuning/training."),Vm.forEach(a),Nn=h(t),ge=i(t,"TABLE",{});var es=o(ge);bo=i(es,"THEAD",{});var Jm=o(bo);I=i(Jm,"TR",{});var ot=o(I);Ha=i(ot,"TH",{align:!0});var Ym=o(Ha);xo=i(Ym,"STRONG",{});var Km=o(xo);qf=s(Km,"Method"),Km.forEach(a),Ym.forEach(a),Rf=h(ot),za=i(ot,"TH",{align:!0});var Qm=o(za);Po=i(Qm,"STRONG",{});var Xm=o(Po);Lf=s(Xm,"Inference only"),Xm.forEach(a),Qm.forEach(a),Mf=h(ot),Ba=i(ot,"TH",{align:!0});var ev=o(Ba);At=i(ev,"STRONG",{});var ts=o(At);Ff=s(ts,"Requires training /"),jf=i(ts,"BR",{}),Hf=s(ts," fine-tuning"),ts.forEach(a),ev.forEach(a),zf=h(ot),Za=i(ot,"TH",{align:!0});var tv=o(Za);yo=i(tv,"STRONG",{});var av=o(yo);Bf=s(av,"Comments"),av.forEach(a),tv.forEach(a),ot.forEach(a),Jm.forEach(a),Zf=h(es),c=i(es,"TBODY",{});var v=o(c);N=i(v,"TR",{});var nt=o(N);Wa=i(nt,"TD",{align:!0});var rv=o(Wa);Ua=i(rv,"A",{href:!0});var iv=o(Ua);Wf=s(iv,"Instruct Pix2Pix"),iv.forEach(a),rv.forEach(a),Uf=h(nt),Oa=i(nt,"TD",{align:!0});var ov=o(Oa);Of=s(ov,"\u2705"),ov.forEach(a),Vf=h(nt),Va=i(nt,"TD",{align:!0});var nv=o(Va);Jf=s(nv,"\u274C"),nv.forEach(a),Yf=h(nt),S=i(nt,"TD",{align:!0});var lt=o(S);Kf=s(lt,"Can additionally be"),Qf=i(lt,"BR",{}),Xf=s(lt,"fine-tuned for better "),eh=i(lt,"BR",{}),th=s(lt,"performance on specific "),ah=i(lt,"BR",{}),rh=s(lt,"edit instructions."),lt.forEach(a),nt.forEach(a),ih=h(v),G=i(v,"TR",{});var st=o(G);Ja=i(st,"TD",{align:!0});var lv=o(Ja);Ya=i(lv,"A",{href:!0});var sv=o(Ya);oh=s(sv,"Pix2Pix Zero"),sv.forEach(a),lv.forEach(a),nh=h(st),Ka=i(st,"TD",{align:!0});var fv=o(Ka);lh=s(fv,"\u2705"),fv.forEach(a),sh=h(st),Qa=i(st,"TD",{align:!0});var hv=o(Qa);fh=s(hv,"\u274C"),hv.forEach(a),hh=h(st),Ao=i(st,"TD",{align:!0}),o(Ao).forEach(a),st.forEach(a),ph=h(v),C=i(v,"TR",{});var ft=o(C);Xa=i(ft,"TD",{align:!0});var pv=o(Xa);er=i(pv,"A",{href:!0});var dv=o(er);dh=s(dv,"Attend and Excite"),dv.forEach(a),pv.forEach(a),uh=h(ft),tr=i(ft,"TD",{align:!0});var uv=o(tr);ch=s(uv,"\u2705"),uv.forEach(a),mh=h(ft),ar=i(ft,"TD",{align:!0});var cv=o(ar);vh=s(cv,"\u274C"),cv.forEach(a),gh=h(ft),$o=i(ft,"TD",{align:!0}),o($o).forEach(a),ft.forEach(a),Eh=h(v),q=i(v,"TR",{});var ht=o(q);rr=i(ht,"TD",{align:!0});var mv=o(rr);ir=i(mv,"A",{href:!0});var vv=o(ir);_h=s(vv,"Semantic Guidance"),vv.forEach(a),mv.forEach(a),wh=h(ht),or=i(ht,"TD",{align:!0});var gv=o(or);bh=s(gv,"\u2705"),gv.forEach(a),xh=h(ht),nr=i(ht,"TD",{align:!0});var Ev=o(nr);Ph=s(Ev,"\u274C"),Ev.forEach(a),yh=h(ht),To=i(ht,"TD",{align:!0}),o(To).forEach(a),ht.forEach(a),Ah=h(v),R=i(v,"TR",{});var pt=o(R);lr=i(pt,"TD",{align:!0});var _v=o(lr);sr=i(_v,"A",{href:!0});var wv=o(sr);$h=s(wv,"Self-attention Guidance"),wv.forEach(a),_v.forEach(a),Th=h(pt),fr=i(pt,"TD",{align:!0});var bv=o(fr);Dh=s(bv,"\u2705"),bv.forEach(a),kh=h(pt),hr=i(pt,"TD",{align:!0});var xv=o(hr);Sh=s(xv,"\u274C"),xv.forEach(a),Ih=h(pt),Do=i(pt,"TD",{align:!0}),o(Do).forEach(a),pt.forEach(a),Nh=h(v),L=i(v,"TR",{});var dt=o(L);pr=i(dt,"TD",{align:!0});var Pv=o(pr);dr=i(Pv,"A",{href:!0});var yv=o(dr);Gh=s(yv,"Depth2Image"),yv.forEach(a),Pv.forEach(a),Ch=h(dt),ur=i(dt,"TD",{align:!0});var Av=o(ur);qh=s(Av,"\u2705"),Av.forEach(a),Rh=h(dt),cr=i(dt,"TD",{align:!0});var $v=o(cr);Lh=s($v,"\u274C"),$v.forEach(a),Mh=h(dt),ko=i(dt,"TD",{align:!0}),o(ko).forEach(a),dt.forEach(a),Fh=h(v),M=i(v,"TR",{});var ut=o(M);mr=i(ut,"TD",{align:!0});var Tv=o(mr);vr=i(Tv,"A",{href:!0});var Dv=o(vr);jh=s(Dv,"MultiDiffusion Panorama"),Dv.forEach(a),Tv.forEach(a),Hh=h(ut),gr=i(ut,"TD",{align:!0});var kv=o(gr);zh=s(kv,"\u2705"),kv.forEach(a),Bh=h(ut),Er=i(ut,"TD",{align:!0});var Sv=o(Er);Zh=s(Sv,"\u274C"),Sv.forEach(a),Wh=h(ut),So=i(ut,"TD",{align:!0}),o(So).forEach(a),ut.forEach(a),Uh=h(v),F=i(v,"TR",{});var ct=o(F);_r=i(ct,"TD",{align:!0});var Iv=o(_r);wr=i(Iv,"A",{href:!0});var Nv=o(wr);Oh=s(Nv,"DreamBooth"),Nv.forEach(a),Iv.forEach(a),Vh=h(ct),br=i(ct,"TD",{align:!0});var Gv=o(br);Jh=s(Gv,"\u274C"),Gv.forEach(a),Yh=h(ct),xr=i(ct,"TD",{align:!0});var Cv=o(xr);Kh=s(Cv,"\u2705"),Cv.forEach(a),Qh=h(ct),Io=i(ct,"TD",{align:!0}),o(Io).forEach(a),ct.forEach(a),Xh=h(v),j=i(v,"TR",{});var mt=o(j);Pr=i(mt,"TD",{align:!0});var qv=o(Pr);yr=i(qv,"A",{href:!0});var Rv=o(yr);ep=s(Rv,"Textual Inversion"),Rv.forEach(a),qv.forEach(a),tp=h(mt),Ar=i(mt,"TD",{align:!0});var Lv=o(Ar);ap=s(Lv,"\u274C"),Lv.forEach(a),rp=h(mt),$r=i(mt,"TD",{align:!0});var Mv=o($r);ip=s(Mv,"\u2705"),Mv.forEach(a),op=h(mt),No=i(mt,"TD",{align:!0}),o(No).forEach(a),mt.forEach(a),np=h(v),H=i(v,"TR",{});var vt=o(H);Tr=i(vt,"TD",{align:!0});var Fv=o(Tr);Dr=i(Fv,"A",{href:!0});var jv=o(Dr);lp=s(jv,"ControlNet"),jv.forEach(a),Fv.forEach(a),sp=h(vt),kr=i(vt,"TD",{align:!0});var Hv=o(kr);fp=s(Hv,"\u2705"),Hv.forEach(a),hp=h(vt),Sr=i(vt,"TD",{align:!0});var zv=o(Sr);pp=s(zv,"\u274C"),zv.forEach(a),dp=h(vt),J=i(vt,"TD",{align:!0});var ao=o(J);up=s(ao,"A ControlNet can be "),cp=i(ao,"BR",{}),mp=s(ao,"trained/fine-tuned on"),vp=i(ao,"BR",{}),gp=s(ao,"a custom conditioning."),ao.forEach(a),vt.forEach(a),Ep=h(v),z=i(v,"TR",{});var gt=o(z);Ir=i(gt,"TD",{align:!0});var Bv=o(Ir);Nr=i(Bv,"A",{href:!0});var Zv=o(Nr);_p=s(Zv,"Prompt Weighting"),Zv.forEach(a),Bv.forEach(a),wp=h(gt),Gr=i(gt,"TD",{align:!0});var Wv=o(Gr);bp=s(Wv,"\u2705"),Wv.forEach(a),xp=h(gt),Cr=i(gt,"TD",{align:!0});var Uv=o(Cr);Pp=s(Uv,"\u274C"),Uv.forEach(a),yp=h(gt),Go=i(gt,"TD",{align:!0}),o(Go).forEach(a),gt.forEach(a),Ap=h(v),B=i(v,"TR",{});var Et=o(B);qr=i(Et,"TD",{align:!0});var Ov=o(qr);Rr=i(Ov,"A",{href:!0});var Vv=o(Rr);$p=s(Vv,"Custom Diffusion"),Vv.forEach(a),Ov.forEach(a),Tp=h(Et),Lr=i(Et,"TD",{align:!0});var Jv=o(Lr);Dp=s(Jv,"\u274C"),Jv.forEach(a),kp=h(Et),Mr=i(Et,"TD",{align:!0});var Yv=o(Mr);Sp=s(Yv,"\u2705"),Yv.forEach(a),Ip=h(Et),Co=i(Et,"TD",{align:!0}),o(Co).forEach(a),Et.forEach(a),Np=h(v),Z=i(v,"TR",{});var _t=o(Z);Fr=i(_t,"TD",{align:!0});var Kv=o(Fr);jr=i(Kv,"A",{href:!0});var Qv=o(jr);Gp=s(Qv,"Model Editing"),Qv.forEach(a),Kv.forEach(a),Cp=h(_t),Hr=i(_t,"TD",{align:!0});var Xv=o(Hr);qp=s(Xv,"\u2705"),Xv.forEach(a),Rp=h(_t),zr=i(_t,"TD",{align:!0});var e2=o(zr);Lp=s(e2,"\u274C"),e2.forEach(a),Mp=h(_t),qo=i(_t,"TD",{align:!0}),o(qo).forEach(a),_t.forEach(a),Fp=h(v),W=i(v,"TR",{});var wt=o(W);Br=i(wt,"TD",{align:!0});var t2=o(Br);Zr=i(t2,"A",{href:!0});var a2=o(Zr);jp=s(a2,"DiffEdit"),a2.forEach(a),t2.forEach(a),Hp=h(wt),Wr=i(wt,"TD",{align:!0});var r2=o(Wr);zp=s(r2,"\u2705"),r2.forEach(a),Bp=h(wt),Ur=i(wt,"TD",{align:!0});var i2=o(Ur);Zp=s(i2,"\u274C"),i2.forEach(a),Wp=h(wt),Ro=i(wt,"TD",{align:!0}),o(Ro).forEach(a),wt.forEach(a),Up=h(v),U=i(v,"TR",{});var bt=o(U);Or=i(bt,"TD",{align:!0});var o2=o(Or);Vr=i(o2,"A",{href:!0});var n2=o(Vr);Op=s(n2,"T2I-Adapter"),n2.forEach(a),o2.forEach(a),Vp=h(bt),Jr=i(bt,"TD",{align:!0});var l2=o(Jr);Jp=s(l2,"\u2705"),l2.forEach(a),Yp=h(bt),Yr=i(bt,"TD",{align:!0});var s2=o(Yr);Kp=s(s2,"\u274C"),s2.forEach(a),Qp=h(bt),Lo=i(bt,"TD",{align:!0}),o(Lo).forEach(a),bt.forEach(a),Xp=h(v),O=i(v,"TR",{});var xt=o(O);Kr=i(xt,"TD",{align:!0});var f2=o(Kr);Qr=i(f2,"A",{href:!0});var h2=o(Qr);ed=s(h2,"Fabric"),h2.forEach(a),f2.forEach(a),td=h(xt),Xr=i(xt,"TD",{align:!0});var p2=o(Xr);ad=s(p2,"\u2705"),p2.forEach(a),rd=h(xt),ei=i(xt,"TD",{align:!0});var d2=o(ei);id=s(d2,"\u274C"),d2.forEach(a),od=h(xt),Mo=i(xt,"TD",{align:!0}),o(Mo).forEach(a),xt.forEach(a),v.forEach(a),es.forEach(a),Gn=h(t),K=i(t,"H2",{class:!0});var as=o(K);Ee=i(as,"A",{id:!0,class:!0,href:!0});var u2=o(Ee);Fo=i(u2,"SPAN",{});var c2=o(Fo);w($t.$$.fragment,c2),c2.forEach(a),u2.forEach(a),nd=h(as),jo=i(as,"SPAN",{});var m2=o(jo);ld=s(m2,"Instruct Pix2Pix"),m2.forEach(a),as.forEach(a),Cn=h(t),ti=i(t,"P",{});var v2=o(ti);Tt=i(v2,"A",{href:!0,rel:!0});var g2=o(Tt);sd=s(g2,"Paper"),g2.forEach(a),v2.forEach(a),qn=h(t),Q=i(t,"P",{});var bn=o(Q);ai=i(bn,"A",{href:!0});var E2=o(ai);fd=s(E2,"Instruct Pix2Pix"),E2.forEach(a),hd=s(bn,` is fine-tuned from stable diffusion to support editing input images. It takes as inputs an image and a prompt describing an edit, and it outputs the edited image. | |
| Instruct Pix2Pix has been explicitly trained to work well with `),Dt=i(bn,"A",{href:!0,rel:!0});var _2=o(Dt);pd=s(_2,"InstructGPT"),_2.forEach(a),dd=s(bn,"-like prompts."),bn.forEach(a),Rn=h(t),_e=i(t,"P",{});var rs=o(_e);ud=s(rs,"See "),ri=i(rs,"A",{href:!0});var w2=o(ri);cd=s(w2,"here"),w2.forEach(a),md=s(rs," for more information on how to use it."),rs.forEach(a),Ln=h(t),X=i(t,"H2",{class:!0});var is=o(X);we=i(is,"A",{id:!0,class:!0,href:!0});var b2=o(we);Ho=i(b2,"SPAN",{});var x2=o(Ho);w(kt.$$.fragment,x2),x2.forEach(a),b2.forEach(a),vd=h(is),zo=i(is,"SPAN",{});var P2=o(zo);gd=s(P2,"Pix2Pix Zero"),P2.forEach(a),is.forEach(a),Mn=h(t),ii=i(t,"P",{});var y2=o(ii);St=i(y2,"A",{href:!0,rel:!0});var A2=o(St);Ed=s(A2,"Paper"),A2.forEach(a),y2.forEach(a),Fn=h(t),It=i(t,"P",{});var Jc=o(It);oi=i(Jc,"A",{href:!0});var $2=o(oi);_d=s($2,"Pix2Pix Zero"),$2.forEach(a),wd=s(Jc," allows modifying an image so that one concept or subject is translated to another one while preserving general image semantics."),Jc.forEach(a),jn=h(t),ni=i(t,"P",{});var T2=o(ni);bd=s(T2,"The denoising process is guided from one conceptual embedding towards another conceptual embedding. The intermediate latents are optimized during the denoising process to push the attention maps towards reference attention maps. The reference attention maps are from the denoising process of the input image and are used to encourage semantic preservation."),T2.forEach(a),Hn=h(t),li=i(t,"P",{});var D2=o(li);xd=s(D2,"Pix2Pix Zero can be used both to edit synthetic images as well as real images."),D2.forEach(a),zn=h(t),be=i(t,"UL",{});var os=o(be);Nt=i(os,"LI",{});var ns=o(Nt);Pd=s(ns,`To edit synthetic images, one first generates an image given a caption. | |
| Next, we generate image captions for the concept that shall be edited and for the new target concept. We can use a model like `),Gt=i(ns,"A",{href:!0,rel:!0});var k2=o(Gt);yd=s(k2,"Flan-T5"),k2.forEach(a),Ad=s(ns," for this purpose. Then, \u201Cmean\u201D prompt embeddings for both the source and target concepts are created via the text encoder. Finally, the pix2pix-zero algorithm is used to edit the synthetic image."),ns.forEach(a),$d=h(os),Ct=i(os,"LI",{});var ls=o(Ct);Td=s(ls,"To edit a real image, one first generates an image caption using a model like "),qt=i(ls,"A",{href:!0,rel:!0});var S2=o(qt);Dd=s(S2,"BLIP"),S2.forEach(a),kd=s(ls,". Then one applies ddim inversion on the prompt and image to generate \u201Cinverse\u201D latents. Similar to before, \u201Cmean\u201D prompt embeddings for both source and target concepts are created and finally the pix2pix-zero algorithm in combination with the \u201Cinverse\u201D latents is used to edit the image."),ls.forEach(a),os.forEach(a),Bn=h(t),w(xe.$$.fragment,t),Zn=h(t),Pe=i(t,"P",{});var ss=o(Pe);Sd=s(ss,`As mentioned above, Pix2Pix Zero includes optimizing the latents (and not any of the UNet, VAE, or the text encoder) to steer the generation toward a specific concept. This means that the overall | |
| pipeline might require more memory than a standard `),si=i(ss,"A",{href:!0});var I2=o(si);Id=s(I2,"StableDiffusionPipeline"),I2.forEach(a),Nd=s(ss,"."),ss.forEach(a),Wn=h(t),ye=i(t,"P",{});var fs=o(ye);Gd=s(fs,"See "),fi=i(fs,"A",{href:!0});var N2=o(fi);Cd=s(N2,"here"),N2.forEach(a),qd=s(fs," for more information on how to use it."),fs.forEach(a),Un=h(t),ee=i(t,"H2",{class:!0});var hs=o(ee);Ae=i(hs,"A",{id:!0,class:!0,href:!0});var G2=o(Ae);Bo=i(G2,"SPAN",{});var C2=o(Bo);w(Rt.$$.fragment,C2),C2.forEach(a),G2.forEach(a),Rd=h(hs),Zo=i(hs,"SPAN",{});var q2=o(Zo);Ld=s(q2,"Attend and Excite"),q2.forEach(a),hs.forEach(a),On=h(t),hi=i(t,"P",{});var R2=o(hi);Lt=i(R2,"A",{href:!0,rel:!0});var L2=o(Lt);Md=s(L2,"Paper"),L2.forEach(a),R2.forEach(a),Vn=h(t),Mt=i(t,"P",{});var Yc=o(Mt);pi=i(Yc,"A",{href:!0});var M2=o(pi);Fd=s(M2,"Attend and Excite"),M2.forEach(a),jd=s(Yc," allows subjects in the prompt to be faithfully represented in the final image."),Yc.forEach(a),Jn=h(t),di=i(t,"P",{});var F2=o(di);Hd=s(F2,"A set of token indices are given as input, corresponding to the subjects in the prompt that need to be present in the image. During denoising, each token index is guaranteed to have a minimum attention threshold for at least one patch of the image. The intermediate latents are iteratively optimized during the denoising process to strengthen the attention of the most neglected subject token until the attention threshold is passed for all subject tokens."),F2.forEach(a),Yn=h(t),$e=i(t,"P",{});var ps=o($e);zd=s(ps,"Like Pix2Pix Zero, Attend and Excite also involves a mini optimization loop (leaving the pre-trained weights untouched) in its pipeline and can require more memory than the usual "),ui=i(ps,"A",{href:!0});var j2=o(ui);Bd=s(j2,"StableDiffusionPipeline"),j2.forEach(a),Zd=s(ps,"."),ps.forEach(a),Kn=h(t),Te=i(t,"P",{});var ds=o(Te);Wd=s(ds,"See "),ci=i(ds,"A",{href:!0});var H2=o(ci);Ud=s(H2,"here"),H2.forEach(a),Od=s(ds," for more information on how to use it."),ds.forEach(a),Qn=h(t),te=i(t,"H2",{class:!0});var us=o(te);De=i(us,"A",{id:!0,class:!0,href:!0});var z2=o(De);Wo=i(z2,"SPAN",{});var B2=o(Wo);w(Ft.$$.fragment,B2),B2.forEach(a),z2.forEach(a),Vd=h(us),Uo=i(us,"SPAN",{});var Z2=o(Uo);Jd=s(Z2,"Semantic Guidance (SEGA)"),Z2.forEach(a),us.forEach(a),Xn=h(t),mi=i(t,"P",{});var W2=o(mi);jt=i(W2,"A",{href:!0,rel:!0});var U2=o(jt);Yd=s(U2,"Paper"),U2.forEach(a),W2.forEach(a),el=h(t),vi=i(t,"P",{});var O2=o(vi);Kd=s(O2,"SEGA allows applying or removing one or more concepts from an image. The strength of the concept can also be controlled. I.e. the smile concept can be used to incrementally increase or decrease the smile of a portrait."),O2.forEach(a),tl=h(t),gi=i(t,"P",{});var V2=o(gi);Qd=s(V2,"Similar to how classifier free guidance provides guidance via empty prompt inputs, SEGA provides guidance on conceptual prompts. Multiple of these conceptual prompts can be applied simultaneously. Each conceptual prompt can either add or remove their concept depending on if the guidance is applied positively or negatively."),V2.forEach(a),al=h(t),Ei=i(t,"P",{});var J2=o(Ei);Xd=s(J2,"Unlike Pix2Pix Zero or Attend and Excite, SEGA directly interacts with the diffusion process instead of performing any explicit gradient-based optimization."),J2.forEach(a),rl=h(t),ke=i(t,"P",{});var cs=o(ke);eu=s(cs,"See "),_i=i(cs,"A",{href:!0});var Y2=o(_i);tu=s(Y2,"here"),Y2.forEach(a),au=s(cs," for more information on how to use it."),cs.forEach(a),il=h(t),ae=i(t,"H2",{class:!0});var ms=o(ae);Se=i(ms,"A",{id:!0,class:!0,href:!0});var K2=o(Se);Oo=i(K2,"SPAN",{});var Q2=o(Oo);w(Ht.$$.fragment,Q2),Q2.forEach(a),K2.forEach(a),ru=h(ms),Vo=i(ms,"SPAN",{});var X2=o(Vo);iu=s(X2,"Self-attention Guidance (SAG)"),X2.forEach(a),ms.forEach(a),ol=h(t),wi=i(t,"P",{});var eg=o(wi);zt=i(eg,"A",{href:!0,rel:!0});var tg=o(zt);ou=s(tg,"Paper"),tg.forEach(a),eg.forEach(a),nl=h(t),Bt=i(t,"P",{});var Kc=o(Bt);bi=i(Kc,"A",{href:!0});var ag=o(bi);nu=s(ag,"Self-attention Guidance"),ag.forEach(a),lu=s(Kc," improves the general quality of images."),Kc.forEach(a),ll=h(t),xi=i(t,"P",{});var rg=o(xi);su=s(rg,"SAG provides guidance from predictions not conditioned on high-frequency details to fully conditioned images. The high frequency details are extracted out of the UNet self-attention maps."),rg.forEach(a),sl=h(t),Ie=i(t,"P",{});var vs=o(Ie);fu=s(vs,"See "),Pi=i(vs,"A",{href:!0});var ig=o(Pi);hu=s(ig,"here"),ig.forEach(a),pu=s(vs," for more information on how to use it."),vs.forEach(a),fl=h(t),re=i(t,"H2",{class:!0});var gs=o(re);Ne=i(gs,"A",{id:!0,class:!0,href:!0});var og=o(Ne);Jo=i(og,"SPAN",{});var ng=o(Jo);w(Zt.$$.fragment,ng),ng.forEach(a),og.forEach(a),du=h(gs),Yo=i(gs,"SPAN",{});var lg=o(Yo);uu=s(lg,"Depth2Image"),lg.forEach(a),gs.forEach(a),hl=h(t),yi=i(t,"P",{});var sg=o(yi);Wt=i(sg,"A",{href:!0,rel:!0});var fg=o(Wt);cu=s(fg,"Project"),fg.forEach(a),sg.forEach(a),pl=h(t),Ut=i(t,"P",{});var Qc=o(Ut);Ai=i(Qc,"A",{href:!0});var hg=o(Ai);mu=s(hg,"Depth2Image"),hg.forEach(a),vu=s(Qc," is fine-tuned from Stable Diffusion to better preserve semantics for text guided image variation."),Qc.forEach(a),dl=h(t),$i=i(t,"P",{});var pg=o($i);gu=s(pg,"It conditions on a monocular depth estimate of the original image."),pg.forEach(a),ul=h(t),Ge=i(t,"P",{});var Es=o(Ge);Eu=s(Es,"See "),Ti=i(Es,"A",{href:!0});var dg=o(Ti);_u=s(dg,"here"),dg.forEach(a),wu=s(Es," for more information on how to use it."),Es.forEach(a),cl=h(t),w(Ce.$$.fragment,t),ml=h(t),ie=i(t,"H2",{class:!0});var _s=o(ie);qe=i(_s,"A",{id:!0,class:!0,href:!0});var ug=o(qe);Ko=i(ug,"SPAN",{});var cg=o(Ko);w(Ot.$$.fragment,cg),cg.forEach(a),ug.forEach(a),bu=h(_s),Qo=i(_s,"SPAN",{});var mg=o(Qo);xu=s(mg,"MultiDiffusion Panorama"),mg.forEach(a),_s.forEach(a),vl=h(t),Di=i(t,"P",{});var vg=o(Di);Vt=i(vg,"A",{href:!0,rel:!0});var gg=o(Vt);Pu=s(gg,"Paper"),gg.forEach(a),vg.forEach(a),gl=h(t),Re=i(t,"P",{});var ws=o(Re);yu=s(ws,`MultiDiffusion defines a new generation process over a pre-trained diffusion model. This process binds together multiple diffusion generation methods that can be readily applied to generate high quality and diverse images. Results adhere to user-provided controls, such as desired aspect ratio (e.g., panorama), and spatial guiding signals, ranging from tight segmentation masks to bounding boxes. | |
| `),ki=i(ws,"A",{href:!0});var Eg=o(ki);Au=s(Eg,"MultiDiffusion Panorama"),Eg.forEach(a),$u=s(ws," allows to generate high-quality images at arbitrary aspect ratios (e.g., panoramas)."),ws.forEach(a),El=h(t),Le=i(t,"P",{});var bs=o(Le);Tu=s(bs,"See "),Si=i(bs,"A",{href:!0});var _g=o(Si);Du=s(_g,"here"),_g.forEach(a),ku=s(bs," for more information on how to use it to generate panoramic images."),bs.forEach(a),_l=h(t),oe=i(t,"H2",{class:!0});var xs=o(oe);Me=i(xs,"A",{id:!0,class:!0,href:!0});var wg=o(Me);Xo=i(wg,"SPAN",{});var bg=o(Xo);w(Jt.$$.fragment,bg),bg.forEach(a),wg.forEach(a),Su=h(xs),en=i(xs,"SPAN",{});var xg=o(en);Iu=s(xg,"Fine-tuning your own models"),xg.forEach(a),xs.forEach(a),wl=h(t),Ii=i(t,"P",{});var Pg=o(Ii);Nu=s(Pg,"In addition to pre-trained models, Diffusers has training scripts for fine-tuning models on user-provided data."),Pg.forEach(a),bl=h(t),ne=i(t,"H2",{class:!0});var Ps=o(ne);Fe=i(Ps,"A",{id:!0,class:!0,href:!0});var yg=o(Fe);tn=i(yg,"SPAN",{});var Ag=o(tn);w(Yt.$$.fragment,Ag),Ag.forEach(a),yg.forEach(a),Gu=h(Ps),an=i(Ps,"SPAN",{});var $g=o(an);Cu=s($g,"DreamBooth"),$g.forEach(a),Ps.forEach(a),xl=h(t),Kt=i(t,"P",{});var Xc=o(Kt);Ni=i(Xc,"A",{href:!0});var Tg=o(Ni);qu=s(Tg,"DreamBooth"),Tg.forEach(a),Ru=s(Xc," fine-tunes a model to teach it about a new subject. I.e. a few pictures of a person can be used to generate images of that person in different styles."),Xc.forEach(a),Pl=h(t),je=i(t,"P",{});var ys=o(je);Lu=s(ys,"See "),Gi=i(ys,"A",{href:!0});var Dg=o(Gi);Mu=s(Dg,"here"),Dg.forEach(a),Fu=s(ys," for more information on how to use it."),ys.forEach(a),yl=h(t),le=i(t,"H2",{class:!0});var As=o(le);He=i(As,"A",{id:!0,class:!0,href:!0});var kg=o(He);rn=i(kg,"SPAN",{});var Sg=o(rn);w(Qt.$$.fragment,Sg),Sg.forEach(a),kg.forEach(a),ju=h(As),on=i(As,"SPAN",{});var Ig=o(on);Hu=s(Ig,"Textual Inversion"),Ig.forEach(a),As.forEach(a),Al=h(t),Xt=i(t,"P",{});var em=o(Xt);Ci=i(em,"A",{href:!0});var Ng=o(Ci);zu=s(Ng,"Textual Inversion"),Ng.forEach(a),Bu=s(em," fine-tunes a model to teach it about a new concept. I.e. a few pictures of a style of artwork can be used to generate images in that style."),em.forEach(a),$l=h(t),ze=i(t,"P",{});var $s=o(ze);Zu=s($s,"See "),qi=i($s,"A",{href:!0});var Gg=o(qi);Wu=s(Gg,"here"),Gg.forEach(a),Uu=s($s," for more information on how to use it."),$s.forEach(a),Tl=h(t),se=i(t,"H2",{class:!0});var Ts=o(se);Be=i(Ts,"A",{id:!0,class:!0,href:!0});var Cg=o(Be);nn=i(Cg,"SPAN",{});var qg=o(nn);w(ea.$$.fragment,qg),qg.forEach(a),Cg.forEach(a),Ou=h(Ts),ln=i(Ts,"SPAN",{});var Rg=o(ln);Vu=s(Rg,"ControlNet"),Rg.forEach(a),Ts.forEach(a),Dl=h(t),Ri=i(t,"P",{});var Lg=o(Ri);ta=i(Lg,"A",{href:!0,rel:!0});var Mg=o(ta);Ju=s(Mg,"Paper"),Mg.forEach(a),Lg.forEach(a),kl=h(t),fe=i(t,"P",{});var xn=o(fe);Li=i(xn,"A",{href:!0});var Fg=o(Li);Yu=s(Fg,"ControlNet"),Fg.forEach(a),Ku=s(xn,` is an auxiliary network which adds an extra condition. | |
| `),Mi=i(xn,"A",{href:!0});var jg=o(Mi);Qu=s(jg,"ControlNet"),jg.forEach(a),Xu=s(xn,` is an auxiliary network which adds an extra condition. | |
| There are 8 canonical pre-trained ControlNets trained on different conditionings such as edge detection, scribbles, | |
| depth maps, and semantic segmentations.`),xn.forEach(a),Sl=h(t),Ze=i(t,"P",{});var Ds=o(Ze);ec=s(Ds,"See "),Fi=i(Ds,"A",{href:!0});var Hg=o(Fi);tc=s(Hg,"here"),Hg.forEach(a),ac=s(Ds," for more information on how to use it."),Ds.forEach(a),Il=h(t),he=i(t,"H2",{class:!0});var ks=o(he);We=i(ks,"A",{id:!0,class:!0,href:!0});var zg=o(We);sn=i(zg,"SPAN",{});var Bg=o(sn);w(aa.$$.fragment,Bg),Bg.forEach(a),zg.forEach(a),rc=h(ks),fn=i(ks,"SPAN",{});var Zg=o(fn);ic=s(Zg,"Prompt Weighting"),Zg.forEach(a),ks.forEach(a),Nl=h(t),ji=i(t,"P",{});var Wg=o(ji);oc=s(Wg,`Prompt weighting is a simple technique that puts more attention weight on certain parts of the text | |
| input.`),Wg.forEach(a),Gl=h(t),Ue=i(t,"P",{});var Ss=o(Ue);nc=s(Ss,"For a more in-detail explanation and examples, see "),Hi=i(Ss,"A",{href:!0});var Ug=o(Hi);lc=s(Ug,"here"),Ug.forEach(a),sc=s(Ss,"."),Ss.forEach(a),Cl=h(t),pe=i(t,"H2",{class:!0});var Is=o(pe);Oe=i(Is,"A",{id:!0,class:!0,href:!0});var Og=o(Oe);hn=i(Og,"SPAN",{});var Vg=o(hn);w(ra.$$.fragment,Vg),Vg.forEach(a),Og.forEach(a),fc=h(Is),pn=i(Is,"SPAN",{});var Jg=o(pn);hc=s(Jg,"Custom Diffusion"),Jg.forEach(a),Is.forEach(a),ql=h(t),ia=i(t,"P",{});var tm=o(ia);zi=i(tm,"A",{href:!0});var Yg=o(zi);pc=s(Yg,"Custom Diffusion"),Yg.forEach(a),dc=s(tm,` only fine-tunes the cross-attention maps of a pre-trained | |
| text-to-image diffusion model. It also allows for additionally performing textual inversion. It supports | |
| multi-concept training by design. Like DreamBooth and Textual Inversion, Custom Diffusion is also used to | |
| teach a pre-trained text-to-image diffusion model about new concepts to generate outputs involving the | |
| concept(s) of interest.`),tm.forEach(a),Rl=h(t),Ve=i(t,"P",{});var Ns=o(Ve);uc=s(Ns,"For more details, check out our "),Bi=i(Ns,"A",{href:!0});var Kg=o(Bi);cc=s(Kg,"official doc"),Kg.forEach(a),mc=s(Ns,"."),Ns.forEach(a),Ll=h(t),de=i(t,"H2",{class:!0});var Gs=o(de);Je=i(Gs,"A",{id:!0,class:!0,href:!0});var Qg=o(Je);dn=i(Qg,"SPAN",{});var Xg=o(dn);w(oa.$$.fragment,Xg),Xg.forEach(a),Qg.forEach(a),vc=h(Gs),un=i(Gs,"SPAN",{});var e1=o(un);gc=s(e1,"Model Editing"),e1.forEach(a),Gs.forEach(a),Ml=h(t),Zi=i(t,"P",{});var t1=o(Zi);na=i(t1,"A",{href:!0,rel:!0});var a1=o(na);Ec=s(a1,"Paper"),a1.forEach(a),t1.forEach(a),Fl=h(t),Ye=i(t,"P",{});var Cs=o(Ye);_c=s(Cs,"The "),Wi=i(Cs,"A",{href:!0});var r1=o(Wi);wc=s(r1,"text-to-image model editing pipeline"),r1.forEach(a),bc=s(Cs,` helps you mitigate some of the incorrect implicit assumptions a pre-trained text-to-image | |
| diffusion model might make about the subjects present in the input prompt. For example, if you prompt Stable Diffusion to generate images for \u201CA pack of roses\u201D, the roses in the generated images | |
| are more likely to be red. This pipeline helps you change that assumption.`),Cs.forEach(a),jl=h(t),Ke=i(t,"P",{});var qs=o(Ke);xc=s(qs,"To know more details, check out the "),Ui=i(qs,"A",{href:!0});var i1=o(Ui);Pc=s(i1,"official doc"),i1.forEach(a),yc=s(qs,"."),qs.forEach(a),Hl=h(t),ue=i(t,"H2",{class:!0});var Rs=o(ue);Qe=i(Rs,"A",{id:!0,class:!0,href:!0});var o1=o(Qe);cn=i(o1,"SPAN",{});var n1=o(cn);w(la.$$.fragment,n1),n1.forEach(a),o1.forEach(a),Ac=h(Rs),mn=i(Rs,"SPAN",{});var l1=o(mn);$c=s(l1,"DiffEdit"),l1.forEach(a),Rs.forEach(a),zl=h(t),Oi=i(t,"P",{});var s1=o(Oi);sa=i(s1,"A",{href:!0,rel:!0});var f1=o(sa);Tc=s(f1,"Paper"),f1.forEach(a),s1.forEach(a),Bl=h(t),fa=i(t,"P",{});var am=o(fa);Vi=i(am,"A",{href:!0});var h1=o(Vi);Dc=s(h1,"DiffEdit"),h1.forEach(a),kc=s(am,` allows for semantic editing of input images along with | |
| input prompts while preserving the original input images as much as possible.`),am.forEach(a),Zl=h(t),Xe=i(t,"P",{});var Ls=o(Xe);Sc=s(Ls,"To know more details, check out the "),Ji=i(Ls,"A",{href:!0});var p1=o(Ji);Ic=s(p1,"official doc"),p1.forEach(a),Nc=s(Ls,"."),Ls.forEach(a),Wl=h(t),ce=i(t,"H2",{class:!0});var Ms=o(ce);et=i(Ms,"A",{id:!0,class:!0,href:!0});var d1=o(et);vn=i(d1,"SPAN",{});var u1=o(vn);w(ha.$$.fragment,u1),u1.forEach(a),d1.forEach(a),Gc=h(Ms),gn=i(Ms,"SPAN",{});var c1=o(gn);Cc=s(c1,"T2I-Adapter"),c1.forEach(a),Ms.forEach(a),Ul=h(t),Yi=i(t,"P",{});var m1=o(Yi);pa=i(m1,"A",{href:!0,rel:!0});var v1=o(pa);qc=s(v1,"Paper"),v1.forEach(a),m1.forEach(a),Ol=h(t),da=i(t,"P",{});var rm=o(da);Ki=i(rm,"A",{href:!0});var g1=o(Ki);Rc=s(g1,"T2I-Adapter"),g1.forEach(a),Lc=s(rm,` is an auxiliary network which adds an extra condition. | |
| There are 8 canonical pre-trained adapters trained on different conditionings such as edge detection, sketch, | |
| depth maps, and semantic segmentations.`),rm.forEach(a),Vl=h(t),tt=i(t,"P",{});var Fs=o(tt);Mc=s(Fs,"See "),Qi=i(Fs,"A",{href:!0});var E1=o(Qi);Fc=s(E1,"here"),E1.forEach(a),jc=s(Fs," for more information on how to use it."),Fs.forEach(a),Jl=h(t),me=i(t,"H2",{class:!0});var js=o(me);at=i(js,"A",{id:!0,class:!0,href:!0});var _1=o(at);En=i(_1,"SPAN",{});var w1=o(En);w(ua.$$.fragment,w1),w1.forEach(a),_1.forEach(a),Hc=h(js),_n=i(js,"SPAN",{});var b1=o(_n);zc=s(b1,"Fabric"),b1.forEach(a),js.forEach(a),Yl=h(t),Xi=i(t,"P",{});var x1=o(Xi);ca=i(x1,"A",{href:!0,rel:!0});var P1=o(ca);Bc=s(P1,"Paper"),P1.forEach(a),x1.forEach(a),Kl=h(t),ma=i(t,"P",{});var im=o(ma);eo=i(im,"A",{href:!0});var y1=o(eo);Zc=s(y1,"Fabric"),y1.forEach(a),Wc=s(im,` is a training-free | |
| approach applicable to a wide range of popular diffusion models, which exploits | |
| the self-attention layer present in the most widely used architectures to condition | |
| the diffusion process on a set of feedback images.`),im.forEach(a),Ql=h(t),rt=i(t,"P",{});var Hs=o(rt);Uc=s(Hs,"To know more details, check out the "),to=i(Hs,"A",{href:!0});var A1=o(to);Oc=s(A1,"official doc"),A1.forEach(a),Vc=s(Hs,"."),Hs.forEach(a),this.h()},h(){n(g,"name","hf:doc:metadata"),n(g,"content",JSON.stringify(q1)),n($,"id","controlled-generation"),n($,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n($,"href","#controlled-generation"),n(E,"class","relative group"),n(Pt,"href","https://discuss.huggingface.co/"),n(Pt,"rel","nofollow"),n(yt,"href","https://github.com/huggingface/diffusers/issues"),n(yt,"rel","nofollow"),n(ya,"href","#instruct-pix2pix"),n(Aa,"href","#pix2pixzero"),n($a,"href","#attend-and-excite"),n(Ta,"href","#semantic-guidance"),n(Da,"href","#self-attention-guidance"),n(ka,"href","#depth2image"),n(Sa,"href","#multidiffusion-panorama"),n(Ia,"href","#dreambooth"),n(Na,"href","#textual-inversion"),n(Ga,"href","#controlnet"),n(Ca,"href","#prompt-weighting"),n(qa,"href","#custom-diffusion"),n(Ra,"href","#model-editing"),n(La,"href","#diffedit"),n(Ma,"href","#t2i-adapter"),n(Fa,"href","#fabric"),n(Ha,"align","center"),n(za,"align","center"),n(Ba,"align","center"),n(Za,"align","center"),n(Ua,"href","#instruct-pix2pix"),n(Wa,"align","center"),n(Oa,"align","center"),n(Va,"align","center"),n(S,"align","center"),n(Ya,"href","#pix2pixzero"),n(Ja,"align","center"),n(Ka,"align","center"),n(Qa,"align","center"),n(Ao,"align","center"),n(er,"href","#attend-and-excite"),n(Xa,"align","center"),n(tr,"align","center"),n(ar,"align","center"),n($o,"align","center"),n(ir,"href","#semantic-guidance"),n(rr,"align","center"),n(or,"align","center"),n(nr,"align","center"),n(To,"align","center"),n(sr,"href","#self-attention-guidance"),n(lr,"align","center"),n(fr,"align","center"),n(hr,"align","center"),n(Do,"align","center"),n(dr,"href","#depth2image"),n(pr,"align","center"),n(ur,"align","center"),n(cr,"align","center"),n(ko,"align","center"),n(vr,"href","#multidiffusion-panorama"),n(mr,"align","center"),n(gr,"align","center"),n(Er,"align","center"),n(So,"align","center"),n(wr,"href","#dreambooth"),n(_r,"align","center"),n(br,"align","center"),n(xr,"align","center"),n(Io,"align","center"),n(yr,"href","#textual-inversion"),n(Pr,"align","center"),n(Ar,"align","center"),n($r,"align","center"),n(No,"align","center"),n(Dr,"href","#controlnet"),n(Tr,"align","center"),n(kr,"align","center"),n(Sr,"align","center"),n(J,"align","center"),n(Nr,"href","#prompt-weighting"),n(Ir,"align","center"),n(Gr,"align","center"),n(Cr,"align","center"),n(Go,"align","center"),n(Rr,"href","#custom-diffusion"),n(qr,"align","center"),n(Lr,"align","center"),n(Mr,"align","center"),n(Co,"align","center"),n(jr,"href","#model-editing"),n(Fr,"align","center"),n(Hr,"align","center"),n(zr,"align","center"),n(qo,"align","center"),n(Zr,"href","#diffedit"),n(Br,"align","center"),n(Wr,"align","center"),n(Ur,"align","center"),n(Ro,"align","center"),n(Vr,"href","#t2i-adapter"),n(Or,"align","center"),n(Jr,"align","center"),n(Yr,"align","center"),n(Lo,"align","center"),n(Qr,"href","#fabric"),n(Kr,"align","center"),n(Xr,"align","center"),n(ei,"align","center"),n(Mo,"align","center"),n(Ee,"id","instruct-pix2pix"),n(Ee,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(Ee,"href","#instruct-pix2pix"),n(K,"class","relative group"),n(Tt,"href","https://arxiv.org/abs/2211.09800"),n(Tt,"rel","nofollow"),n(ai,"href","../api/pipelines/pix2pix"),n(Dt,"href","https://openai.com/blog/instruction-following/"),n(Dt,"rel","nofollow"),n(ri,"href","../api/pipelines/pix2pix"),n(we,"id","pix2pix-zero"),n(we,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(we,"href","#pix2pix-zero"),n(X,"class","relative group"),n(St,"href","https://arxiv.org/abs/2302.03027"),n(St,"rel","nofollow"),n(oi,"href","../api/pipelines/pix2pix_zero"),n(Gt,"href","https://huggingface.co/docs/transformers/model_doc/flan-t5"),n(Gt,"rel","nofollow"),n(qt,"href","https://huggingface.co/docs/transformers/model_doc/blip"),n(qt,"rel","nofollow"),n(si,"href","../api/pipelines/stable_diffusion/text2img"),n(fi,"href","../api/pipelines/pix2pix_zero"),n(Ae,"id","attend-and-excite"),n(Ae,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(Ae,"href","#attend-and-excite"),n(ee,"class","relative group"),n(Lt,"href","https://arxiv.org/abs/2301.13826"),n(Lt,"rel","nofollow"),n(pi,"href","../api/pipelines/attend_and_excite"),n(ui,"href","../api/pipelines/stable_diffusion/text2img"),n(ci,"href","../api/pipelines/attend_and_excite"),n(De,"id","semantic-guidance-sega"),n(De,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(De,"href","#semantic-guidance-sega"),n(te,"class","relative group"),n(jt,"href","https://arxiv.org/abs/2301.12247"),n(jt,"rel","nofollow"),n(_i,"href","../api/pipelines/semantic_stable_diffusion"),n(Se,"id","selfattention-guidance-sag"),n(Se,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(Se,"href","#selfattention-guidance-sag"),n(ae,"class","relative group"),n(zt,"href","https://arxiv.org/abs/2210.00939"),n(zt,"rel","nofollow"),n(bi,"href","../api/pipelines/self_attention_guidance"),n(Pi,"href","../api/pipelines/self_attention_guidance"),n(Ne,"id","depth2image"),n(Ne,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(Ne,"href","#depth2image"),n(re,"class","relative group"),n(Wt,"href","https://huggingface.co/stabilityai/stable-diffusion-2-depth"),n(Wt,"rel","nofollow"),n(Ai,"href","../pipelines/stable_diffusion_2#depthtoimage"),n(Ti,"href","../api/pipelines/stable_diffusion_2#depthtoimage"),n(qe,"id","multidiffusion-panorama"),n(qe,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(qe,"href","#multidiffusion-panorama"),n(ie,"class","relative group"),n(Vt,"href","https://arxiv.org/abs/2302.08113"),n(Vt,"rel","nofollow"),n(ki,"href","../api/pipelines/panorama"),n(Si,"href","../api/pipelines/panorama"),n(Me,"id","finetuning-your-own-models"),n(Me,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(Me,"href","#finetuning-your-own-models"),n(oe,"class","relative group"),n(Fe,"id","dreambooth"),n(Fe,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(Fe,"href","#dreambooth"),n(ne,"class","relative group"),n(Ni,"href","../training/dreambooth"),n(Gi,"href","../training/dreambooth"),n(He,"id","textual-inversion"),n(He,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(He,"href","#textual-inversion"),n(le,"class","relative group"),n(Ci,"href","../training/text_inversion"),n(qi,"href","../training/text_inversion"),n(Be,"id","controlnet"),n(Be,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(Be,"href","#controlnet"),n(se,"class","relative group"),n(ta,"href","https://arxiv.org/abs/2302.05543"),n(ta,"rel","nofollow"),n(Li,"href","../api/pipelines/controlnet"),n(Mi,"href","../api/pipelines/controlnet"),n(Fi,"href","../api/pipelines/controlnet"),n(We,"id","prompt-weighting"),n(We,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(We,"href","#prompt-weighting"),n(he,"class","relative group"),n(Hi,"href","../using-diffusers/weighted_prompts"),n(Oe,"id","custom-diffusion"),n(Oe,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(Oe,"href","#custom-diffusion"),n(pe,"class","relative group"),n(zi,"href","../training/custom_diffusion"),n(Bi,"href","../training/custom_diffusion"),n(Je,"id","model-editing"),n(Je,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(Je,"href","#model-editing"),n(de,"class","relative group"),n(na,"href","https://arxiv.org/abs/2303.08084"),n(na,"rel","nofollow"),n(Wi,"href","../api/pipelines/model_editing"),n(Ui,"href","../api/pipelines/model_editing"),n(Qe,"id","diffedit"),n(Qe,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(Qe,"href","#diffedit"),n(ue,"class","relative group"),n(sa,"href","https://arxiv.org/abs/2210.11427"),n(sa,"rel","nofollow"),n(Vi,"href","../api/pipelines/diffedit"),n(Ji,"href","../api/pipelines/diffedit"),n(et,"id","t2iadapter"),n(et,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(et,"href","#t2iadapter"),n(ce,"class","relative group"),n(pa,"href","https://arxiv.org/abs/2302.08453"),n(pa,"rel","nofollow"),n(Ki,"href","../api/pipelines/stable_diffusion/adapter"),n(Qi,"href","../api/pipelines/stable_diffusion/adapter"),n(at,"id","fabric"),n(at,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),n(at,"href","#fabric"),n(me,"class","relative group"),n(ca,"href","https://arxiv.org/abs/2307.10159"),n(ca,"rel","nofollow"),n(eo,"href","../api/pipelines/fabric"),n(to,"href","../api/pipelines/fabric")},m(t,p){e(document.head,g),d(t,D,p),d(t,E,p),e(E,$),e($,Y),b(T,Y,null),e(E,V),e(E,ve),e(ve,zs),d(t,Pn,p),d(t,Ea,p),e(Ea,Bs),d(t,yn,p),d(t,_a,p),e(_a,Zs),d(t,An,p),d(t,wa,p),e(wa,Ws),d(t,$n,p),d(t,k,p),e(k,Us),e(k,ro),e(ro,Os),e(k,Vs),e(k,Pt),e(Pt,Js),e(k,Ys),e(k,yt),e(yt,Ks),e(k,Qs),d(t,Tn,p),d(t,ba,p),e(ba,Xs),d(t,Dn,p),d(t,xa,p),e(xa,ef),d(t,kn,p),d(t,Pa,p),e(Pa,tf),d(t,Sn,p),d(t,u,p),e(u,io),e(io,ya),e(ya,af),e(u,rf),e(u,oo),e(oo,Aa),e(Aa,of),e(u,nf),e(u,no),e(no,$a),e($a,lf),e(u,sf),e(u,lo),e(lo,Ta),e(Ta,ff),e(u,hf),e(u,so),e(so,Da),e(Da,pf),e(u,df),e(u,fo),e(fo,ka),e(ka,uf),e(u,cf),e(u,ho),e(ho,Sa),e(Sa,mf),e(u,vf),e(u,po),e(po,Ia),e(Ia,gf),e(u,Ef),e(u,uo),e(uo,Na),e(Na,_f),e(u,wf),e(u,co),e(co,Ga),e(Ga,bf),e(u,xf),e(u,mo),e(mo,Ca),e(Ca,Pf),e(u,yf),e(u,vo),e(vo,qa),e(qa,Af),e(u,$f),e(u,go),e(go,Ra),e(Ra,Tf),e(u,Df),e(u,Eo),e(Eo,La),e(La,kf),e(u,Sf),e(u,_o),e(_o,Ma),e(Ma,If),e(u,Nf),e(u,wo),e(wo,Fa),e(Fa,Gf),d(t,In,p),d(t,ja,p),e(ja,Cf),d(t,Nn,p),d(t,ge,p),e(ge,bo),e(bo,I),e(I,Ha),e(Ha,xo),e(xo,qf),e(I,Rf),e(I,za),e(za,Po),e(Po,Lf),e(I,Mf),e(I,Ba),e(Ba,At),e(At,Ff),e(At,jf),e(At,Hf),e(I,zf),e(I,Za),e(Za,yo),e(yo,Bf),e(ge,Zf),e(ge,c),e(c,N),e(N,Wa),e(Wa,Ua),e(Ua,Wf),e(N,Uf),e(N,Oa),e(Oa,Of),e(N,Vf),e(N,Va),e(Va,Jf),e(N,Yf),e(N,S),e(S,Kf),e(S,Qf),e(S,Xf),e(S,eh),e(S,th),e(S,ah),e(S,rh),e(c,ih),e(c,G),e(G,Ja),e(Ja,Ya),e(Ya,oh),e(G,nh),e(G,Ka),e(Ka,lh),e(G,sh),e(G,Qa),e(Qa,fh),e(G,hh),e(G,Ao),e(c,ph),e(c,C),e(C,Xa),e(Xa,er),e(er,dh),e(C,uh),e(C,tr),e(tr,ch),e(C,mh),e(C,ar),e(ar,vh),e(C,gh),e(C,$o),e(c,Eh),e(c,q),e(q,rr),e(rr,ir),e(ir,_h),e(q,wh),e(q,or),e(or,bh),e(q,xh),e(q,nr),e(nr,Ph),e(q,yh),e(q,To),e(c,Ah),e(c,R),e(R,lr),e(lr,sr),e(sr,$h),e(R,Th),e(R,fr),e(fr,Dh),e(R,kh),e(R,hr),e(hr,Sh),e(R,Ih),e(R,Do),e(c,Nh),e(c,L),e(L,pr),e(pr,dr),e(dr,Gh),e(L,Ch),e(L,ur),e(ur,qh),e(L,Rh),e(L,cr),e(cr,Lh),e(L,Mh),e(L,ko),e(c,Fh),e(c,M),e(M,mr),e(mr,vr),e(vr,jh),e(M,Hh),e(M,gr),e(gr,zh),e(M,Bh),e(M,Er),e(Er,Zh),e(M,Wh),e(M,So),e(c,Uh),e(c,F),e(F,_r),e(_r,wr),e(wr,Oh),e(F,Vh),e(F,br),e(br,Jh),e(F,Yh),e(F,xr),e(xr,Kh),e(F,Qh),e(F,Io),e(c,Xh),e(c,j),e(j,Pr),e(Pr,yr),e(yr,ep),e(j,tp),e(j,Ar),e(Ar,ap),e(j,rp),e(j,$r),e($r,ip),e(j,op),e(j,No),e(c,np),e(c,H),e(H,Tr),e(Tr,Dr),e(Dr,lp),e(H,sp),e(H,kr),e(kr,fp),e(H,hp),e(H,Sr),e(Sr,pp),e(H,dp),e(H,J),e(J,up),e(J,cp),e(J,mp),e(J,vp),e(J,gp),e(c,Ep),e(c,z),e(z,Ir),e(Ir,Nr),e(Nr,_p),e(z,wp),e(z,Gr),e(Gr,bp),e(z,xp),e(z,Cr),e(Cr,Pp),e(z,yp),e(z,Go),e(c,Ap),e(c,B),e(B,qr),e(qr,Rr),e(Rr,$p),e(B,Tp),e(B,Lr),e(Lr,Dp),e(B,kp),e(B,Mr),e(Mr,Sp),e(B,Ip),e(B,Co),e(c,Np),e(c,Z),e(Z,Fr),e(Fr,jr),e(jr,Gp),e(Z,Cp),e(Z,Hr),e(Hr,qp),e(Z,Rp),e(Z,zr),e(zr,Lp),e(Z,Mp),e(Z,qo),e(c,Fp),e(c,W),e(W,Br),e(Br,Zr),e(Zr,jp),e(W,Hp),e(W,Wr),e(Wr,zp),e(W,Bp),e(W,Ur),e(Ur,Zp),e(W,Wp),e(W,Ro),e(c,Up),e(c,U),e(U,Or),e(Or,Vr),e(Vr,Op),e(U,Vp),e(U,Jr),e(Jr,Jp),e(U,Yp),e(U,Yr),e(Yr,Kp),e(U,Qp),e(U,Lo),e(c,Xp),e(c,O),e(O,Kr),e(Kr,Qr),e(Qr,ed),e(O,td),e(O,Xr),e(Xr,ad),e(O,rd),e(O,ei),e(ei,id),e(O,od),e(O,Mo),d(t,Gn,p),d(t,K,p),e(K,Ee),e(Ee,Fo),b($t,Fo,null),e(K,nd),e(K,jo),e(jo,ld),d(t,Cn,p),d(t,ti,p),e(ti,Tt),e(Tt,sd),d(t,qn,p),d(t,Q,p),e(Q,ai),e(ai,fd),e(Q,hd),e(Q,Dt),e(Dt,pd),e(Q,dd),d(t,Rn,p),d(t,_e,p),e(_e,ud),e(_e,ri),e(ri,cd),e(_e,md),d(t,Ln,p),d(t,X,p),e(X,we),e(we,Ho),b(kt,Ho,null),e(X,vd),e(X,zo),e(zo,gd),d(t,Mn,p),d(t,ii,p),e(ii,St),e(St,Ed),d(t,Fn,p),d(t,It,p),e(It,oi),e(oi,_d),e(It,wd),d(t,jn,p),d(t,ni,p),e(ni,bd),d(t,Hn,p),d(t,li,p),e(li,xd),d(t,zn,p),d(t,be,p),e(be,Nt),e(Nt,Pd),e(Nt,Gt),e(Gt,yd),e(Nt,Ad),e(be,$d),e(be,Ct),e(Ct,Td),e(Ct,qt),e(qt,Dd),e(Ct,kd),d(t,Bn,p),b(xe,t,p),d(t,Zn,p),d(t,Pe,p),e(Pe,Sd),e(Pe,si),e(si,Id),e(Pe,Nd),d(t,Wn,p),d(t,ye,p),e(ye,Gd),e(ye,fi),e(fi,Cd),e(ye,qd),d(t,Un,p),d(t,ee,p),e(ee,Ae),e(Ae,Bo),b(Rt,Bo,null),e(ee,Rd),e(ee,Zo),e(Zo,Ld),d(t,On,p),d(t,hi,p),e(hi,Lt),e(Lt,Md),d(t,Vn,p),d(t,Mt,p),e(Mt,pi),e(pi,Fd),e(Mt,jd),d(t,Jn,p),d(t,di,p),e(di,Hd),d(t,Yn,p),d(t,$e,p),e($e,zd),e($e,ui),e(ui,Bd),e($e,Zd),d(t,Kn,p),d(t,Te,p),e(Te,Wd),e(Te,ci),e(ci,Ud),e(Te,Od),d(t,Qn,p),d(t,te,p),e(te,De),e(De,Wo),b(Ft,Wo,null),e(te,Vd),e(te,Uo),e(Uo,Jd),d(t,Xn,p),d(t,mi,p),e(mi,jt),e(jt,Yd),d(t,el,p),d(t,vi,p),e(vi,Kd),d(t,tl,p),d(t,gi,p),e(gi,Qd),d(t,al,p),d(t,Ei,p),e(Ei,Xd),d(t,rl,p),d(t,ke,p),e(ke,eu),e(ke,_i),e(_i,tu),e(ke,au),d(t,il,p),d(t,ae,p),e(ae,Se),e(Se,Oo),b(Ht,Oo,null),e(ae,ru),e(ae,Vo),e(Vo,iu),d(t,ol,p),d(t,wi,p),e(wi,zt),e(zt,ou),d(t,nl,p),d(t,Bt,p),e(Bt,bi),e(bi,nu),e(Bt,lu),d(t,ll,p),d(t,xi,p),e(xi,su),d(t,sl,p),d(t,Ie,p),e(Ie,fu),e(Ie,Pi),e(Pi,hu),e(Ie,pu),d(t,fl,p),d(t,re,p),e(re,Ne),e(Ne,Jo),b(Zt,Jo,null),e(re,du),e(re,Yo),e(Yo,uu),d(t,hl,p),d(t,yi,p),e(yi,Wt),e(Wt,cu),d(t,pl,p),d(t,Ut,p),e(Ut,Ai),e(Ai,mu),e(Ut,vu),d(t,dl,p),d(t,$i,p),e($i,gu),d(t,ul,p),d(t,Ge,p),e(Ge,Eu),e(Ge,Ti),e(Ti,_u),e(Ge,wu),d(t,cl,p),b(Ce,t,p),d(t,ml,p),d(t,ie,p),e(ie,qe),e(qe,Ko),b(Ot,Ko,null),e(ie,bu),e(ie,Qo),e(Qo,xu),d(t,vl,p),d(t,Di,p),e(Di,Vt),e(Vt,Pu),d(t,gl,p),d(t,Re,p),e(Re,yu),e(Re,ki),e(ki,Au),e(Re,$u),d(t,El,p),d(t,Le,p),e(Le,Tu),e(Le,Si),e(Si,Du),e(Le,ku),d(t,_l,p),d(t,oe,p),e(oe,Me),e(Me,Xo),b(Jt,Xo,null),e(oe,Su),e(oe,en),e(en,Iu),d(t,wl,p),d(t,Ii,p),e(Ii,Nu),d(t,bl,p),d(t,ne,p),e(ne,Fe),e(Fe,tn),b(Yt,tn,null),e(ne,Gu),e(ne,an),e(an,Cu),d(t,xl,p),d(t,Kt,p),e(Kt,Ni),e(Ni,qu),e(Kt,Ru),d(t,Pl,p),d(t,je,p),e(je,Lu),e(je,Gi),e(Gi,Mu),e(je,Fu),d(t,yl,p),d(t,le,p),e(le,He),e(He,rn),b(Qt,rn,null),e(le,ju),e(le,on),e(on,Hu),d(t,Al,p),d(t,Xt,p),e(Xt,Ci),e(Ci,zu),e(Xt,Bu),d(t,$l,p),d(t,ze,p),e(ze,Zu),e(ze,qi),e(qi,Wu),e(ze,Uu),d(t,Tl,p),d(t,se,p),e(se,Be),e(Be,nn),b(ea,nn,null),e(se,Ou),e(se,ln),e(ln,Vu),d(t,Dl,p),d(t,Ri,p),e(Ri,ta),e(ta,Ju),d(t,kl,p),d(t,fe,p),e(fe,Li),e(Li,Yu),e(fe,Ku),e(fe,Mi),e(Mi,Qu),e(fe,Xu),d(t,Sl,p),d(t,Ze,p),e(Ze,ec),e(Ze,Fi),e(Fi,tc),e(Ze,ac),d(t,Il,p),d(t,he,p),e(he,We),e(We,sn),b(aa,sn,null),e(he,rc),e(he,fn),e(fn,ic),d(t,Nl,p),d(t,ji,p),e(ji,oc),d(t,Gl,p),d(t,Ue,p),e(Ue,nc),e(Ue,Hi),e(Hi,lc),e(Ue,sc),d(t,Cl,p),d(t,pe,p),e(pe,Oe),e(Oe,hn),b(ra,hn,null),e(pe,fc),e(pe,pn),e(pn,hc),d(t,ql,p),d(t,ia,p),e(ia,zi),e(zi,pc),e(ia,dc),d(t,Rl,p),d(t,Ve,p),e(Ve,uc),e(Ve,Bi),e(Bi,cc),e(Ve,mc),d(t,Ll,p),d(t,de,p),e(de,Je),e(Je,dn),b(oa,dn,null),e(de,vc),e(de,un),e(un,gc),d(t,Ml,p),d(t,Zi,p),e(Zi,na),e(na,Ec),d(t,Fl,p),d(t,Ye,p),e(Ye,_c),e(Ye,Wi),e(Wi,wc),e(Ye,bc),d(t,jl,p),d(t,Ke,p),e(Ke,xc),e(Ke,Ui),e(Ui,Pc),e(Ke,yc),d(t,Hl,p),d(t,ue,p),e(ue,Qe),e(Qe,cn),b(la,cn,null),e(ue,Ac),e(ue,mn),e(mn,$c),d(t,zl,p),d(t,Oi,p),e(Oi,sa),e(sa,Tc),d(t,Bl,p),d(t,fa,p),e(fa,Vi),e(Vi,Dc),e(fa,kc),d(t,Zl,p),d(t,Xe,p),e(Xe,Sc),e(Xe,Ji),e(Ji,Ic),e(Xe,Nc),d(t,Wl,p),d(t,ce,p),e(ce,et),e(et,vn),b(ha,vn,null),e(ce,Gc),e(ce,gn),e(gn,Cc),d(t,Ul,p),d(t,Yi,p),e(Yi,pa),e(pa,qc),d(t,Ol,p),d(t,da,p),e(da,Ki),e(Ki,Rc),e(da,Lc),d(t,Vl,p),d(t,tt,p),e(tt,Mc),e(tt,Qi),e(Qi,Fc),e(tt,jc),d(t,Jl,p),d(t,me,p),e(me,at),e(at,En),b(ua,En,null),e(me,Hc),e(me,_n),e(_n,zc),d(t,Yl,p),d(t,Xi,p),e(Xi,ca),e(ca,Bc),d(t,Kl,p),d(t,ma,p),e(ma,eo),e(eo,Zc),e(ma,Wc),d(t,Ql,p),d(t,rt,p),e(rt,Uc),e(rt,to),e(to,Oc),e(rt,Vc),Xl=!0},p(t,[p]){const va={};p&2&&(va.$$scope={dirty:p,ctx:t}),xe.$set(va);const wn={};p&2&&(wn.$$scope={dirty:p,ctx:t}),Ce.$set(wn)},i(t){Xl||(x(T.$$.fragment,t),x($t.$$.fragment,t),x(kt.$$.fragment,t),x(xe.$$.fragment,t),x(Rt.$$.fragment,t),x(Ft.$$.fragment,t),x(Ht.$$.fragment,t),x(Zt.$$.fragment,t),x(Ce.$$.fragment,t),x(Ot.$$.fragment,t),x(Jt.$$.fragment,t),x(Yt.$$.fragment,t),x(Qt.$$.fragment,t),x(ea.$$.fragment,t),x(aa.$$.fragment,t),x(ra.$$.fragment,t),x(oa.$$.fragment,t),x(la.$$.fragment,t),x(ha.$$.fragment,t),x(ua.$$.fragment,t),Xl=!0)},o(t){P(T.$$.fragment,t),P($t.$$.fragment,t),P(kt.$$.fragment,t),P(xe.$$.fragment,t),P(Rt.$$.fragment,t),P(Ft.$$.fragment,t),P(Ht.$$.fragment,t),P(Zt.$$.fragment,t),P(Ce.$$.fragment,t),P(Ot.$$.fragment,t),P(Jt.$$.fragment,t),P(Yt.$$.fragment,t),P(Qt.$$.fragment,t),P(ea.$$.fragment,t),P(aa.$$.fragment,t),P(ra.$$.fragment,t),P(oa.$$.fragment,t),P(la.$$.fragment,t),P(ha.$$.fragment,t),P(ua.$$.fragment,t),Xl=!1},d(t){a(g),t&&a(D),t&&a(E),y(T),t&&a(Pn),t&&a(Ea),t&&a(yn),t&&a(_a),t&&a(An),t&&a(wa),t&&a($n),t&&a(k),t&&a(Tn),t&&a(ba),t&&a(Dn),t&&a(xa),t&&a(kn),t&&a(Pa),t&&a(Sn),t&&a(u),t&&a(In),t&&a(ja),t&&a(Nn),t&&a(ge),t&&a(Gn),t&&a(K),y($t),t&&a(Cn),t&&a(ti),t&&a(qn),t&&a(Q),t&&a(Rn),t&&a(_e),t&&a(Ln),t&&a(X),y(kt),t&&a(Mn),t&&a(ii),t&&a(Fn),t&&a(It),t&&a(jn),t&&a(ni),t&&a(Hn),t&&a(li),t&&a(zn),t&&a(be),t&&a(Bn),y(xe,t),t&&a(Zn),t&&a(Pe),t&&a(Wn),t&&a(ye),t&&a(Un),t&&a(ee),y(Rt),t&&a(On),t&&a(hi),t&&a(Vn),t&&a(Mt),t&&a(Jn),t&&a(di),t&&a(Yn),t&&a($e),t&&a(Kn),t&&a(Te),t&&a(Qn),t&&a(te),y(Ft),t&&a(Xn),t&&a(mi),t&&a(el),t&&a(vi),t&&a(tl),t&&a(gi),t&&a(al),t&&a(Ei),t&&a(rl),t&&a(ke),t&&a(il),t&&a(ae),y(Ht),t&&a(ol),t&&a(wi),t&&a(nl),t&&a(Bt),t&&a(ll),t&&a(xi),t&&a(sl),t&&a(Ie),t&&a(fl),t&&a(re),y(Zt),t&&a(hl),t&&a(yi),t&&a(pl),t&&a(Ut),t&&a(dl),t&&a($i),t&&a(ul),t&&a(Ge),t&&a(cl),y(Ce,t),t&&a(ml),t&&a(ie),y(Ot),t&&a(vl),t&&a(Di),t&&a(gl),t&&a(Re),t&&a(El),t&&a(Le),t&&a(_l),t&&a(oe),y(Jt),t&&a(wl),t&&a(Ii),t&&a(bl),t&&a(ne),y(Yt),t&&a(xl),t&&a(Kt),t&&a(Pl),t&&a(je),t&&a(yl),t&&a(le),y(Qt),t&&a(Al),t&&a(Xt),t&&a($l),t&&a(ze),t&&a(Tl),t&&a(se),y(ea),t&&a(Dl),t&&a(Ri),t&&a(kl),t&&a(fe),t&&a(Sl),t&&a(Ze),t&&a(Il),t&&a(he),y(aa),t&&a(Nl),t&&a(ji),t&&a(Gl),t&&a(Ue),t&&a(Cl),t&&a(pe),y(ra),t&&a(ql),t&&a(ia),t&&a(Rl),t&&a(Ve),t&&a(Ll),t&&a(de),y(oa),t&&a(Ml),t&&a(Zi),t&&a(Fl),t&&a(Ye),t&&a(jl),t&&a(Ke),t&&a(Hl),t&&a(ue),y(la),t&&a(zl),t&&a(Oi),t&&a(Bl),t&&a(fa),t&&a(Zl),t&&a(Xe),t&&a(Wl),t&&a(ce),y(ha),t&&a(Ul),t&&a(Yi),t&&a(Ol),t&&a(da),t&&a(Vl),t&&a(tt),t&&a(Jl),t&&a(me),y(ua),t&&a(Yl),t&&a(Xi),t&&a(Kl),t&&a(ma),t&&a(Ql),t&&a(rt)}}}const q1={local:"controlled-generation",sections:[{local:"instruct-pix2pix",title:"Instruct Pix2Pix"},{local:"pix2pix-zero",title:"Pix2Pix Zero"},{local:"attend-and-excite",title:"Attend and Excite"},{local:"semantic-guidance-sega",title:"Semantic Guidance (SEGA)"},{local:"selfattention-guidance-sag",title:"Self-attention Guidance (SAG)"},{local:"depth2image",title:"Depth2Image"},{local:"multidiffusion-panorama",title:"MultiDiffusion Panorama"},{local:"finetuning-your-own-models",title:"Fine-tuning your own models"},{local:"dreambooth",title:"DreamBooth"},{local:"textual-inversion",title:"Textual Inversion"},{local:"controlnet",title:"ControlNet"},{local:"prompt-weighting",title:"Prompt Weighting"},{local:"custom-diffusion",title:"Custom Diffusion"},{local:"model-editing",title:"Model Editing"},{local:"diffedit",title:"DiffEdit"},{local:"t2iadapter",title:"T2I-Adapter"},{local:"fabric",title:"Fabric"}],title:"Controlled generation"};function R1(ga){return I1(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class j1 extends T1{constructor(g){super();D1(this,g,R1,C1,k1,{})}}export{j1 as default,q1 as metadata}; | |
Xet Storage Details
- Size:
- 74.4 kB
- Xet hash:
- ae90a5799d15fc26c389d02cdc717122ba5ec1a603f8e5a16626b444f715e329
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.