Buckets:
hf-doc-build/doc / diffusers /main /en /_app /pages /using-diffusers /diffedit.mdx-hf-doc-builder.js
| import{S as ei,i as ti,s as li,e as n,k as m,w as f,t as s,M as ai,c as o,d as t,m as c,a as p,x as u,h as i,b as d,N as La,G as l,g as r,y as h,q as J,o as M,B as y,v as si}from"../../chunks/vendor-hf-doc-builder.js";import{T as ii}from"../../chunks/Tip-hf-doc-builder.js";import{I as Ka}from"../../chunks/IconCopyLink-hf-doc-builder.js";import{C as g}from"../../chunks/CodeBlock-hf-doc-builder.js";import{D as ni}from"../../chunks/DocNotebookDropdown-hf-doc-builder.js";function oi(Jt){let w,Y,T,_,k;return{c(){w=n("p"),Y=s("Check out the "),T=n("a"),_=s("generation strategy"),k=s(" guide if you\u2019re interested in learning more about strategies for generating different quality text."),this.h()},l(j){w=o(j,"P",{});var G=p(w);Y=i(G,"Check out the "),T=o(G,"A",{href:!0,rel:!0});var N=p(T);_=i(N,"generation strategy"),N.forEach(t),k=i(G," guide if you\u2019re interested in learning more about strategies for generating different quality text."),G.forEach(t),this.h()},h(){d(T,"href","https://huggingface.co/docs/transformers/main/en/generation_strategies"),d(T,"rel","nofollow")},m(j,G){r(j,w,G),l(w,Y),l(w,T),l(T,_),l(w,k)},d(j){j&&t(w)}}}function ri(Jt){let w,Y,T,_,k,j,G,N,Ml,Mt,q,yt,be,yl,wt,E,Ae,wl,bl,qe,Tl,gl,Pe,Il,bt,Te,Ul,Tt,ge,Zl,gt,P,It,b,jl,Ie,_l,vl,Ue,Gl,El,Le,Wl,Rl,Ke,Bl,Xl,Oe,kl,Vl,et,Cl,$l,Ut,L,Zt,I,Yl,Ze,Nl,zl,tt,Hl,Fl,lt,Sl,xl,at,Ql,Dl,jt,je,Al,_t,K,vt,_e,ql,Gt,O,Et,v,Pl,ve,Ll,Kl,st,Ol,ea,it,ta,la,Wt,ee,Rt,Ge,aa,Bt,te,Xt,U,sa,nt,ia,na,ot,oa,ra,rt,pa,ma,pt,ca,da,kt,le,Vt,V,ae,Ee,Oa,fa,We,ua,ha,se,Re,es,Ja,Be,Ma,Ct,C,z,mt,ie,ya,ct,wa,$t,H,ba,ne,Ta,ga,Yt,Xe,Ia,Nt,oe,zt,ke,Ua,Ht,re,Ft,Ve,Za,St,pe,xt,F,Qt,S,ja,Ce,_a,va,Dt,me,At,W,Ga,$e,Ea,Wa,Ye,Ra,Ba,qt,ce,Pt,$,x,dt,de,Xa,ft,ka,Lt,R,Va,ut,Ca,$a,fe,Ya,Na,Kt,Ne,za,Ot,ue,el,ze,Ha,tl,he,ll,Q,Fa,ht,Sa,xa,al,Je,sl,Me,ye,He,ts,Qa,Fe,Da,il,D,Aa,Se,qa,Pa,nl;return j=new Ka({}),q=new ni({props:{classNames:"absolute z-10 right-0 top-0",options:[{label:"Mixed",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers_doc/en/diffedit.ipynb"},{label:"PyTorch",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers_doc/en/pytorch/diffedit.ipynb"},{label:"TensorFlow",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers_doc/en/tensorflow/diffedit.ipynb"},{label:"Mixed",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/diffusers_doc/en/diffedit.ipynb"},{label:"PyTorch",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/diffusers_doc/en/pytorch/diffedit.ipynb"},{label:"TensorFlow",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/diffusers_doc/en/tensorflow/diffedit.ipynb"}]}}),P=new g({props:{code:"JTIzJTIwdW5jb21tZW50JTIwdG8lMjBpbnN0YWxsJTIwdGhlJTIwbmVjZXNzYXJ5JTIwbGlicmFyaWVzJTIwaW4lMjBDb2xhYiUwQSUyMyFwaXAlMjBpbnN0YWxsJTIwZGlmZnVzZXJzJTIwdHJhbnNmb3JtZXJzJTIwYWNjZWxlcmF0ZSUyMHNhZmV0ZW5zb3Jz",highlighted:`<span class="hljs-comment"># uncomment to install the necessary libraries in Colab</span> | |
| <span class="hljs-comment">#!pip install diffusers transformers accelerate safetensors</span>`}}),L=new g({props:{code:"c291cmNlX3Byb21wdCUyMCUzRCUyMCUyMmElMjBib3dsJTIwb2YlMjBmcnVpdHMlMjIlMEF0YXJnZXRfcHJvbXB0JTIwJTNEJTIwJTIyYSUyMGJvd2wlMjBvZiUyMHBlYXJzJTIy",highlighted:`source_prompt = <span class="hljs-string">"a bowl of fruits"</span> | |
| target_prompt = <span class="hljs-string">"a bowl of pears"</span>`}}),K=new g({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRERJTVNjaGVkdWxlciUyQyUyMERESU1JbnZlcnNlU2NoZWR1bGVyJTJDJTIwU3RhYmxlRGlmZnVzaW9uRGlmZkVkaXRQaXBlbGluZSUwQSUwQXBpcGVsaW5lJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uRGlmZkVkaXRQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyc3RhYmlsaXR5YWklMkZzdGFibGUtZGlmZnVzaW9uLTItMSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUwQSUyMCUyMCUyMCUyMHNhZmV0eV9jaGVja2VyJTNETm9uZSUyQyUwQSUyMCUyMCUyMCUyMHVzZV9zYWZldGVuc29ycyUzRFRydWUlMkMlMEEpJTBBcGlwZWxpbmUuc2NoZWR1bGVyJTIwJTNEJTIwRERJTVNjaGVkdWxlci5mcm9tX2NvbmZpZyhwaXBlbGluZS5zY2hlZHVsZXIuY29uZmlnKSUwQXBpcGVsaW5lLmludmVyc2Vfc2NoZWR1bGVyJTIwJTNEJTIwRERJTUludmVyc2VTY2hlZHVsZXIuZnJvbV9jb25maWcocGlwZWxpbmUuc2NoZWR1bGVyLmNvbmZpZyklMEFwaXBlbGluZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQXBpcGVsaW5lLmVuYWJsZV92YWVfc2xpY2luZygp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DDIMScheduler, DDIMInverseScheduler, StableDiffusionDiffEditPipeline | |
| pipeline = StableDiffusionDiffEditPipeline.from_pretrained( | |
| <span class="hljs-string">"stabilityai/stable-diffusion-2-1"</span>, | |
| torch_dtype=torch.float16, | |
| safety_checker=<span class="hljs-literal">None</span>, | |
| use_safetensors=<span class="hljs-literal">True</span>, | |
| ) | |
| pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config) | |
| pipeline.inverse_scheduler = DDIMInverseScheduler.from_config(pipeline.scheduler.config) | |
| pipeline.enable_model_cpu_offload() | |
| pipeline.enable_vae_slicing()`}}),O=new g({props:{code:"ZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGxvYWRfaW1hZ2UlMEElMEFpbWdfdXJsJTIwJTNEJTIwJTIyaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGWGlhbmctY2QlMkZEaWZmRWRpdC1zdGFibGUtZGlmZnVzaW9uJTJGcmF3JTJGbWFpbiUyRmFzc2V0cyUyRm9yaWdpbi5wbmclMjIlMEFyYXdfaW1hZ2UlMjAlM0QlMjBsb2FkX2ltYWdlKGltZ191cmwpLmNvbnZlcnQoJTIyUkdCJTIyKS5yZXNpemUoKDc2OCUyQyUyMDc2OCkp",highlighted:`<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image | |
| img_url = <span class="hljs-string">"https://github.com/Xiang-cd/DiffEdit-stable-diffusion/raw/main/assets/origin.png"</span> | |
| raw_image = load_image(img_url).convert(<span class="hljs-string">"RGB"</span>).resize((<span class="hljs-number">768</span>, <span class="hljs-number">768</span>))`}}),ee=new g({props:{code:"c291cmNlX3Byb21wdCUyMCUzRCUyMCUyMmElMjBib3dsJTIwb2YlMjBmcnVpdHMlMjIlMEF0YXJnZXRfcHJvbXB0JTIwJTNEJTIwJTIyYSUyMGJhc2tldCUyMG9mJTIwcGVhcnMlMjIlMEFtYXNrX2ltYWdlJTIwJTNEJTIwcGlwZWxpbmUuZ2VuZXJhdGVfbWFzayglMEElMjAlMjAlMjAlMjBpbWFnZSUzRHJhd19pbWFnZSUyQyUwQSUyMCUyMCUyMCUyMHNvdXJjZV9wcm9tcHQlM0Rzb3VyY2VfcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwdGFyZ2V0X3Byb21wdCUzRHRhcmdldF9wcm9tcHQlMkMlMEEp",highlighted:`source_prompt = <span class="hljs-string">"a bowl of fruits"</span> | |
| target_prompt = <span class="hljs-string">"a basket of pears"</span> | |
| mask_image = pipeline.generate_mask( | |
| image=raw_image, | |
| source_prompt=source_prompt, | |
| target_prompt=target_prompt, | |
| )`}}),te=new g({props:{code:"aW52X2xhdGVudHMlMjAlM0QlMjBwaXBlbGluZS5pbnZlcnQocHJvbXB0JTNEc291cmNlX3Byb21wdCUyQyUyMGltYWdlJTNEcmF3X2ltYWdlKS5sYXRlbnRz",highlighted:"inv_latents = pipeline.invert(prompt=source_prompt, image=raw_image).latents"}}),le=new g({props:{code:"aW1hZ2UlMjAlM0QlMjBwaXBlbGluZSglMEElMjAlMjAlMjAlMjBwcm9tcHQlM0R0YXJnZXRfcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbWFza19pbWFnZSUzRG1hc2tfaW1hZ2UlMkMlMEElMjAlMjAlMjAlMjBpbWFnZV9sYXRlbnRzJTNEaW52X2xhdGVudHMlMkMlMEElMjAlMjAlMjAlMjBuZWdhdGl2ZV9wcm9tcHQlM0Rzb3VyY2VfcHJvbXB0JTJDJTBBKS5pbWFnZXMlNUIwJTVEJTBBaW1hZ2Uuc2F2ZSglMjJlZGl0ZWRfaW1hZ2UucG5nJTIyKQ==",highlighted:`image = pipeline( | |
| prompt=target_prompt, | |
| mask_image=mask_image, | |
| image_latents=inv_latents, | |
| negative_prompt=source_prompt, | |
| ).images[<span class="hljs-number">0</span>] | |
| image.save(<span class="hljs-string">"edited_image.png"</span>)`}}),ie=new Ka({}),oe=new g({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMFQ1Rm9yQ29uZGl0aW9uYWxHZW5lcmF0aW9uJTBBJTBBdG9rZW5pemVyJTIwJTNEJTIwQXV0b1Rva2VuaXplci5mcm9tX3ByZXRyYWluZWQoJTIyZ29vZ2xlJTJGZmxhbi10NS14bCUyMiklMEFtb2RlbCUyMCUzRCUyMFQ1Rm9yQ29uZGl0aW9uYWxHZW5lcmF0aW9uLmZyb21fcHJldHJhaW5lZCglMjJnb29nbGUlMkZmbGFuLXQ1LXhsJTIyJTJDJTIwZGV2aWNlX21hcCUzRCUyMmF1dG8lMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, T5ForConditionalGeneration | |
| tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"google/flan-t5-xl"</span>) | |
| model = T5ForConditionalGeneration.from_pretrained(<span class="hljs-string">"google/flan-t5-xl"</span>, device_map=<span class="hljs-string">"auto"</span>, torch_dtype=torch.float16)`}}),re=new g({props:{code:"c291cmNlX2NvbmNlcHQlMjAlM0QlMjAlMjJib3dsJTIyJTBBdGFyZ2V0X2NvbmNlcHQlMjAlM0QlMjAlMjJiYXNrZXQlMjIlMEElMEFzb3VyY2VfdGV4dCUyMCUzRCUyMGYlMjJQcm92aWRlJTIwYSUyMGNhcHRpb24lMjBmb3IlMjBpbWFnZXMlMjBjb250YWluaW5nJTIwYSUyMCU3QnNvdXJjZV9jb25jZXB0JTdELiUyMCUyMiUwQSUyMlRoZSUyMGNhcHRpb25zJTIwc2hvdWxkJTIwYmUlMjBpbiUyMEVuZ2xpc2glMjBhbmQlMjBzaG91bGQlMjBiZSUyMG5vJTIwbG9uZ2VyJTIwdGhhbiUyMDE1MCUyMGNoYXJhY3RlcnMuJTIyJTBBJTBBdGFyZ2V0X3RleHQlMjAlM0QlMjBmJTIyUHJvdmlkZSUyMGElMjBjYXB0aW9uJTIwZm9yJTIwaW1hZ2VzJTIwY29udGFpbmluZyUyMGElMjAlN0J0YXJnZXRfY29uY2VwdCU3RC4lMjAlMjIlMEElMjJUaGUlMjBjYXB0aW9ucyUyMHNob3VsZCUyMGJlJTIwaW4lMjBFbmdsaXNoJTIwYW5kJTIwc2hvdWxkJTIwYmUlMjBubyUyMGxvbmdlciUyMHRoYW4lMjAxNTAlMjBjaGFyYWN0ZXJzLiUyMg==",highlighted:`source_concept = <span class="hljs-string">"bowl"</span> | |
| target_concept = <span class="hljs-string">"basket"</span> | |
| source_text = <span class="hljs-string">f"Provide a caption for images containing a <span class="hljs-subst">{source_concept}</span>. "</span> | |
| <span class="hljs-string">"The captions should be in English and should be no longer than 150 characters."</span> | |
| target_text = <span class="hljs-string">f"Provide a caption for images containing a <span class="hljs-subst">{target_concept}</span>. "</span> | |
| <span class="hljs-string">"The captions should be in English and should be no longer than 150 characters."</span>`}}),pe=new g({props:{code:"JTQwdG9yY2gubm9fZ3JhZCUwQWRlZiUyMGdlbmVyYXRlX3Byb21wdHMoaW5wdXRfcHJvbXB0KSUzQSUwQSUyMCUyMCUyMCUyMGlucHV0X2lkcyUyMCUzRCUyMHRva2VuaXplcihpbnB1dF9wcm9tcHQlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyKS5pbnB1dF9pZHMudG8oJTIyY3VkYSUyMiklMEElMEElMjAlMjAlMjAlMjBvdXRwdXRzJTIwJTNEJTIwbW9kZWwuZ2VuZXJhdGUoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaW5wdXRfaWRzJTJDJTIwdGVtcGVyYXR1cmUlM0QwLjglMkMlMjBudW1fcmV0dXJuX3NlcXVlbmNlcyUzRDE2JTJDJTIwZG9fc2FtcGxlJTNEVHJ1ZSUyQyUyMG1heF9uZXdfdG9rZW5zJTNEMTI4JTJDJTIwdG9wX2slM0QxMCUwQSUyMCUyMCUyMCUyMCklMEElMjAlMjAlMjAlMjByZXR1cm4lMjB0b2tlbml6ZXIuYmF0Y2hfZGVjb2RlKG91dHB1dHMlMkMlMjBza2lwX3NwZWNpYWxfdG9rZW5zJTNEVHJ1ZSklMEElMEFzb3VyY2VfcHJvbXB0cyUyMCUzRCUyMGdlbmVyYXRlX3Byb21wdHMoc291cmNlX3RleHQpJTBBdGFyZ2V0X3Byb21wdHMlMjAlM0QlMjBnZW5lcmF0ZV9wcm9tcHRzKHRhcmdldF90ZXh0KSUwQXByaW50KHNvdXJjZV9wcm9tcHRzKSUwQXByaW50KHRhcmdldF9wcm9tcHRzKQ==",highlighted:`<span class="hljs-meta">@torch.no_grad</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">generate_prompts</span>(<span class="hljs-params">input_prompt</span>): | |
| input_ids = tokenizer(input_prompt, return_tensors=<span class="hljs-string">"pt"</span>).input_ids.to(<span class="hljs-string">"cuda"</span>) | |
| outputs = model.generate( | |
| input_ids, temperature=<span class="hljs-number">0.8</span>, num_return_sequences=<span class="hljs-number">16</span>, do_sample=<span class="hljs-literal">True</span>, max_new_tokens=<span class="hljs-number">128</span>, top_k=<span class="hljs-number">10</span> | |
| ) | |
| <span class="hljs-keyword">return</span> tokenizer.batch_decode(outputs, skip_special_tokens=<span class="hljs-literal">True</span>) | |
| source_prompts = generate_prompts(source_text) | |
| target_prompts = generate_prompts(target_text) | |
| <span class="hljs-built_in">print</span>(source_prompts) | |
| <span class="hljs-built_in">print</span>(target_prompts)`}}),F=new ii({props:{$$slots:{default:[oi]},$$scope:{ctx:Jt}}}),me=new g({props:{code:"aW1wb3J0JTIwdG9yY2glMjAlMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uRGlmZkVkaXRQaXBlbGluZSUyMCUwQSUwQXBpcGVsaW5lJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uRGlmZkVkaXRQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyc3RhYmlsaXR5YWklMkZzdGFibGUtZGlmZnVzaW9uLTItMSUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUyMHVzZV9zYWZldGVuc29ycyUzRFRydWUlMEEpLnRvKCUyMmN1ZGElMjIpJTBBcGlwZWxpbmUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKCklMEFwaXBlbGluZS5lbmFibGVfdmFlX3NsaWNpbmcoKSUwQSUwQSU0MHRvcmNoLm5vX2dyYWQoKSUwQWRlZiUyMGVtYmVkX3Byb21wdHMoc2VudGVuY2VzJTJDJTIwdG9rZW5pemVyJTJDJTIwdGV4dF9lbmNvZGVyJTJDJTIwZGV2aWNlJTNEJTIyY3VkYSUyMiklM0ElMEElMjAlMjAlMjAlMjBlbWJlZGRpbmdzJTIwJTNEJTIwJTVCJTVEJTBBJTIwJTIwJTIwJTIwZm9yJTIwc2VudCUyMGluJTIwc2VudGVuY2VzJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwdGV4dF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2VudCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHBhZGRpbmclM0QlMjJtYXhfbGVuZ3RoJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbWF4X2xlbmd0aCUzRHRva2VuaXplci5tb2RlbF9tYXhfbGVuZ3RoJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwdHJ1bmNhdGlvbiUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRleHRfaW5wdXRfaWRzJTIwJTNEJTIwdGV4dF9pbnB1dHMuaW5wdXRfaWRzJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHJvbXB0X2VtYmVkcyUyMCUzRCUyMHRleHRfZW5jb2Rlcih0ZXh0X2lucHV0X2lkcy50byhkZXZpY2UpJTJDJTIwYXR0ZW50aW9uX21hc2slM0ROb25lKSU1QjAlNUQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBlbWJlZGRpbmdzLmFwcGVuZChwcm9tcHRfZW1iZWRzKSUwQSUyMCUyMCUyMCUyMHJldHVybiUyMHRvcmNoLmNvbmNhdGVuYXRlKGVtYmVkZGluZ3MlMkMlMjBkaW0lM0QwKS5tZWFuKGRpbSUzRDApLnVuc3F1ZWV6ZSgwKSUwQSUwQXNvdXJjZV9lbWJlZHMlMjAlM0QlMjBlbWJlZF9wcm9tcHRzKHNvdXJjZV9wcm9tcHRzJTJDJTIwcGlwZWxpbmUudG9rZW5pemVyJTJDJTIwcGlwZWxpbmUudGV4dF9lbmNvZGVyKSUwQXRhcmdldF9lbWJlZHMlMjAlM0QlMjBlbWJlZF9wcm9tcHRzKHRhcmdldF9wcm9tcHRzJTJDJTIwcGlwZWxpbmUudG9rZW5pemVyJTJDJTIwcGlwZWxpbmUudGV4dF9lbmNvZGVyKQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionDiffEditPipeline | |
| pipeline = StableDiffusionDiffEditPipeline.from_pretrained( | |
| <span class="hljs-string">"stabilityai/stable-diffusion-2-1"</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span> | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| pipeline.enable_model_cpu_offload() | |
| pipeline.enable_vae_slicing() | |
| <span class="hljs-meta">@torch.no_grad()</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">embed_prompts</span>(<span class="hljs-params">sentences, tokenizer, text_encoder, device=<span class="hljs-string">"cuda"</span></span>): | |
| embeddings = [] | |
| <span class="hljs-keyword">for</span> sent <span class="hljs-keyword">in</span> sentences: | |
| text_inputs = tokenizer( | |
| sent, | |
| padding=<span class="hljs-string">"max_length"</span>, | |
| max_length=tokenizer.model_max_length, | |
| truncation=<span class="hljs-literal">True</span>, | |
| return_tensors=<span class="hljs-string">"pt"</span>, | |
| ) | |
| text_input_ids = text_inputs.input_ids | |
| prompt_embeds = text_encoder(text_input_ids.to(device), attention_mask=<span class="hljs-literal">None</span>)[<span class="hljs-number">0</span>] | |
| embeddings.append(prompt_embeds) | |
| <span class="hljs-keyword">return</span> torch.concatenate(embeddings, dim=<span class="hljs-number">0</span>).mean(dim=<span class="hljs-number">0</span>).unsqueeze(<span class="hljs-number">0</span>) | |
| source_embeds = embed_prompts(source_prompts, pipeline.tokenizer, pipeline.text_encoder) | |
| target_embeds = embed_prompts(target_prompts, pipeline.tokenizer, pipeline.text_encoder)`}}),ce=new g({props:{code:"JTIwJTIwZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMERESU1JbnZlcnNlU2NoZWR1bGVyJTJDJTIwRERJTVNjaGVkdWxlciUwQSUyMCUyMGZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBsb2FkX2ltYWdlJTBBJTBBJTIwJTIwcGlwZWxpbmUuc2NoZWR1bGVyJTIwJTNEJTIwRERJTVNjaGVkdWxlci5mcm9tX2NvbmZpZyhwaXBlbGluZS5zY2hlZHVsZXIuY29uZmlnKSUwQSUyMCUyMHBpcGVsaW5lLmludmVyc2Vfc2NoZWR1bGVyJTIwJTNEJTIwRERJTUludmVyc2VTY2hlZHVsZXIuZnJvbV9jb25maWcocGlwZWxpbmUuc2NoZWR1bGVyLmNvbmZpZyklMEElMEElMjAlMjBpbWdfdXJsJTIwJTNEJTIwJTIyaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGWGlhbmctY2QlMkZEaWZmRWRpdC1zdGFibGUtZGlmZnVzaW9uJTJGcmF3JTJGbWFpbiUyRmFzc2V0cyUyRm9yaWdpbi5wbmclMjIlMEElMjAlMjByYXdfaW1hZ2UlMjAlM0QlMjBsb2FkX2ltYWdlKGltZ191cmwpLmNvbnZlcnQoJTIyUkdCJTIyKS5yZXNpemUoKDc2OCUyQyUyMDc2OCkpJTBBJTBBJTBBJTIwJTIwbWFza19pbWFnZSUyMCUzRCUyMHBpcGVsaW5lLmdlbmVyYXRlX21hc2soJTBBJTIwJTIwJTIwJTIwJTIwJTIwaW1hZ2UlM0RyYXdfaW1hZ2UlMkMlMEElMkIlMjAlMjAlMjAlMjAlMjBzb3VyY2VfcHJvbXB0X2VtYmVkcyUzRHNvdXJjZV9lbWJlZHMlMkMlMEElMkIlMjAlMjAlMjAlMjAlMjB0YXJnZXRfcHJvbXB0X2VtYmVkcyUzRHRhcmdldF9lbWJlZHMlMkMlMEElMjAlMjApJTBBJTBBJTIwJTIwaW52X2xhdGVudHMlMjAlM0QlMjBwaXBlbGluZS5pbnZlcnQoJTBBJTJCJTIwJTIwJTIwJTIwJTIwcHJvbXB0X2VtYmVkcyUzRHNvdXJjZV9lbWJlZHMlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjBpbWFnZSUzRHJhd19pbWFnZSUyQyUwQSUyMCUyMCkubGF0ZW50cyUwQSUwQSUyMCUyMGltYWdlcyUyMCUzRCUyMHBpcGVsaW5lKCUwQSUyMCUyMCUyMCUyMCUyMCUyMG1hc2tfaW1hZ2UlM0RtYXNrX2ltYWdlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwaW1hZ2VfbGF0ZW50cyUzRGludl9sYXRlbnRzJTJDJTBBJTJCJTIwJTIwJTIwJTIwJTIwcHJvbXB0X2VtYmVkcyUzRHRhcmdldF9lbWJlZHMlMkMlMEElMkIlMjAlMjAlMjAlMjAlMjBuZWdhdGl2ZV9wcm9tcHRfZW1iZWRzJTNEc291cmNlX2VtYmVkcyUyQyUwQSUyMCUyMCkuaW1hZ2VzJTBBJTIwJTIwaW1hZ2VzJTVCMCU1RC5zYXZlKCUyMmVkaXRlZF9pbWFnZS5wbmclMjIp",highlighted:` from diffusers import DDIMInverseScheduler, DDIMScheduler | |
| from diffusers.utils import load_image | |
| pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config) | |
| pipeline.inverse_scheduler = DDIMInverseScheduler.from_config(pipeline.scheduler.config) | |
| img_url = "https://github.com/Xiang-cd/DiffEdit-stable-diffusion/raw/main/assets/origin.png" | |
| raw_image = load_image(img_url).convert("RGB").resize((768, 768)) | |
| mask_image = pipeline.generate_mask( | |
| image=raw_image, | |
| <span class="hljs-addition">+ source_prompt_embeds=source_embeds,</span> | |
| <span class="hljs-addition">+ target_prompt_embeds=target_embeds,</span> | |
| ) | |
| inv_latents = pipeline.invert( | |
| <span class="hljs-addition">+ prompt_embeds=source_embeds,</span> | |
| image=raw_image, | |
| ).latents | |
| images = pipeline( | |
| mask_image=mask_image, | |
| image_latents=inv_latents, | |
| <span class="hljs-addition">+ prompt_embeds=target_embeds,</span> | |
| <span class="hljs-addition">+ negative_prompt_embeds=source_embeds,</span> | |
| ).images | |
| images[0].save("edited_image.png")`}}),de=new Ka({}),ue=new g({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQmxpcEZvckNvbmRpdGlvbmFsR2VuZXJhdGlvbiUyQyUyMEJsaXBQcm9jZXNzb3IlMEElMEFwcm9jZXNzb3IlMjAlM0QlMjBCbGlwUHJvY2Vzc29yLmZyb21fcHJldHJhaW5lZCglMjJTYWxlc2ZvcmNlJTJGYmxpcC1pbWFnZS1jYXB0aW9uaW5nLWJhc2UlMjIpJTBBbW9kZWwlMjAlM0QlMjBCbGlwRm9yQ29uZGl0aW9uYWxHZW5lcmF0aW9uLmZyb21fcHJldHJhaW5lZCglMjJTYWxlc2ZvcmNlJTJGYmxpcC1pbWFnZS1jYXB0aW9uaW5nLWJhc2UlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMkMlMjBsb3dfY3B1X21lbV91c2FnZSUzRFRydWUp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BlipForConditionalGeneration, BlipProcessor | |
| processor = BlipProcessor.from_pretrained(<span class="hljs-string">"Salesforce/blip-image-captioning-base"</span>) | |
| model = BlipForConditionalGeneration.from_pretrained(<span class="hljs-string">"Salesforce/blip-image-captioning-base"</span>, torch_dtype=torch.float16, low_cpu_mem_usage=<span class="hljs-literal">True</span>)`}}),he=new g({props:{code:"JTQwdG9yY2gubm9fZ3JhZCgpJTBBZGVmJTIwZ2VuZXJhdGVfY2FwdGlvbihpbWFnZXMlMkMlMjBjYXB0aW9uX2dlbmVyYXRvciUyQyUyMGNhcHRpb25fcHJvY2Vzc29yKSUzQSUwQSUyMCUyMCUyMCUyMHRleHQlMjAlM0QlMjAlMjJhJTIwcGhvdG9ncmFwaCUyMG9mJTIyJTBBJTBBJTIwJTIwJTIwJTIwaW5wdXRzJTIwJTNEJTIwY2FwdGlvbl9wcm9jZXNzb3IoaW1hZ2VzJTJDJTIwdGV4dCUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpLnRvKGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRGNhcHRpb25fZ2VuZXJhdG9yLmR0eXBlKSUwQSUyMCUyMCUyMCUyMGNhcHRpb25fZ2VuZXJhdG9yLnRvKCUyMmN1ZGElMjIpJTBBJTIwJTIwJTIwJTIwb3V0cHV0cyUyMCUzRCUyMGNhcHRpb25fZ2VuZXJhdG9yLmdlbmVyYXRlKCoqaW5wdXRzJTJDJTIwbWF4X25ld190b2tlbnMlM0QxMjgpJTBBJTBBJTIwJTIwJTIwJTIwJTIzJTIwb2ZmbG9hZCUyMGNhcHRpb24lMjBnZW5lcmF0b3IlMEElMjAlMjAlMjAlMjBjYXB0aW9uX2dlbmVyYXRvci50byglMjJjcHUlMjIpJTBBJTBBJTIwJTIwJTIwJTIwY2FwdGlvbiUyMCUzRCUyMGNhcHRpb25fcHJvY2Vzc29yLmJhdGNoX2RlY29kZShvdXRwdXRzJTJDJTIwc2tpcF9zcGVjaWFsX3Rva2VucyUzRFRydWUpJTVCMCU1RCUwQSUyMCUyMCUyMCUyMHJldHVybiUyMGNhcHRpb24=",highlighted:`<span class="hljs-meta">@torch.no_grad()</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">generate_caption</span>(<span class="hljs-params">images, caption_generator, caption_processor</span>): | |
| text = <span class="hljs-string">"a photograph of"</span> | |
| inputs = caption_processor(images, text, return_tensors=<span class="hljs-string">"pt"</span>).to(device=<span class="hljs-string">"cuda"</span>, dtype=caption_generator.dtype) | |
| caption_generator.to(<span class="hljs-string">"cuda"</span>) | |
| outputs = caption_generator.generate(**inputs, max_new_tokens=<span class="hljs-number">128</span>) | |
| <span class="hljs-comment"># offload caption generator</span> | |
| caption_generator.to(<span class="hljs-string">"cpu"</span>) | |
| caption = caption_processor.batch_decode(outputs, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>] | |
| <span class="hljs-keyword">return</span> caption`}}),Je=new g({props:{code:"ZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGxvYWRfaW1hZ2UlMEElMEFpbWdfdXJsJTIwJTNEJTIwJTIyaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGWGlhbmctY2QlMkZEaWZmRWRpdC1zdGFibGUtZGlmZnVzaW9uJTJGcmF3JTJGbWFpbiUyRmFzc2V0cyUyRm9yaWdpbi5wbmclMjIlMEFyYXdfaW1hZ2UlMjAlM0QlMjBsb2FkX2ltYWdlKGltZ191cmwpLmNvbnZlcnQoJTIyUkdCJTIyKS5yZXNpemUoKDc2OCUyQyUyMDc2OCkpJTBBY2FwdGlvbiUyMCUzRCUyMGdlbmVyYXRlX2NhcHRpb24ocmF3X2ltYWdlJTJDJTIwbW9kZWwlMkMlMjBwcm9jZXNzb3Ip",highlighted:`<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image | |
| img_url = <span class="hljs-string">"https://github.com/Xiang-cd/DiffEdit-stable-diffusion/raw/main/assets/origin.png"</span> | |
| raw_image = load_image(img_url).convert(<span class="hljs-string">"RGB"</span>).resize((<span class="hljs-number">768</span>, <span class="hljs-number">768</span>)) | |
| caption = generate_caption(raw_image, model, processor)`}}),{c(){w=n("meta"),Y=m(),T=n("h1"),_=n("a"),k=n("span"),f(j.$$.fragment),G=m(),N=n("span"),Ml=s("DiffEdit"),Mt=m(),f(q.$$.fragment),yt=m(),be=n("p"),yl=s("Image editing typically requires providing a mask of the area to be edited. DiffEdit automatically generates the mask for you based on a text query, making it easier overall to create a mask without image editing software. The DiffEdit algorithm works in three steps:"),wt=m(),E=n("ol"),Ae=n("li"),wl=s("the diffusion model denoises an image conditioned on some query text and reference text which produces different noise estimates for different areas of the image; the difference is used to infer a mask to identify which area of the image needs to be changed to match the query text"),bl=m(),qe=n("li"),Tl=s("the input image is encoded into latent space with DDIM"),gl=m(),Pe=n("li"),Il=s("the latents are decoded with the diffusion model conditioned on the text query, using the mask as a guide such that pixels outside the mask remain the same as in the input image"),bt=m(),Te=n("p"),Ul=s("This guide will show you how to use DiffEdit to edit images without manually creating a mask."),Tt=m(),ge=n("p"),Zl=s("Before you begin, make sure you have the following libraries installed:"),gt=m(),f(P.$$.fragment),It=m(),b=n("p"),jl=s("The "),Ie=n("a"),_l=s("StableDiffusionDiffEditPipeline"),vl=s(" requires an image mask and a set of partially inverted latents. The image mask is generated from the "),Ue=n("a"),Gl=s("generate_mask()"),El=s(" function, and includes two parameters, "),Le=n("code"),Wl=s("source_prompt"),Rl=s(" and "),Ke=n("code"),Bl=s("target_prompt"),Xl=s(". These parameters determine what to edit in the image. For example, if you want to change a bowl of "),Oe=n("em"),kl=s("fruits"),Vl=s(" to a bowl of "),et=n("em"),Cl=s("pears"),$l=s(", then:"),Ut=m(),f(L.$$.fragment),Zt=m(),I=n("p"),Yl=s("The partially inverted latents are generated from the "),Ze=n("a"),Nl=s("invert()"),zl=s(" function, and it is generally a good idea to include a "),tt=n("code"),Hl=s("prompt"),Fl=s(" or "),lt=n("em"),Sl=s("caption"),xl=s(" describing the image to help guide the inverse latent sampling process. The caption can often be your "),at=n("code"),Ql=s("source_prompt"),Dl=s(", but feel free to experiment with other text descriptions!"),jt=m(),je=n("p"),Al=s("Let\u2019s load the pipeline, scheduler, inverse scheduler, and enable some optimizations to reduce memory usage:"),_t=m(),f(K.$$.fragment),vt=m(),_e=n("p"),ql=s("Load the image to edit:"),Gt=m(),f(O.$$.fragment),Et=m(),v=n("p"),Pl=s("Use the "),ve=n("a"),Ll=s("generate_mask()"),Kl=s(" function to generate the image mask. You\u2019ll need to pass it the "),st=n("code"),Ol=s("source_prompt"),ea=s(" and "),it=n("code"),ta=s("target_prompt"),la=s(" to specify what to edit in the image:"),Wt=m(),f(ee.$$.fragment),Rt=m(),Ge=n("p"),aa=s("Next, create the inverted latents and pass it a caption describing the image:"),Bt=m(),f(te.$$.fragment),Xt=m(),U=n("p"),sa=s("Finally, pass the image mask and inverted latents to the pipeline. The "),nt=n("code"),ia=s("target_prompt"),na=s(" becomes the "),ot=n("code"),oa=s("prompt"),ra=s(" now, and the "),rt=n("code"),pa=s("source_prompt"),ma=s(" is used as the "),pt=n("code"),ca=s("negative_prompt"),da=s(":"),kt=m(),f(le.$$.fragment),Vt=m(),V=n("div"),ae=n("div"),Ee=n("img"),fa=m(),We=n("figcaption"),ua=s("original image"),ha=m(),se=n("div"),Re=n("img"),Ja=m(),Be=n("figcaption"),Ma=s("edited image"),Ct=m(),C=n("h2"),z=n("a"),mt=n("span"),f(ie.$$.fragment),ya=m(),ct=n("span"),wa=s("Generate source and target embeddings"),$t=m(),H=n("p"),ba=s("The source and target embeddings can be automatically generated with the "),ne=n("a"),Ta=s("Flan-T5"),ga=s(" model instead of creating them manually."),Yt=m(),Xe=n("p"),Ia=s("Load the Flan-T5 model and tokenizer from the \u{1F917} Transformers library:"),Nt=m(),f(oe.$$.fragment),zt=m(),ke=n("p"),Ua=s("Provide some initial text to prompt the model to generate the source and target prompts."),Ht=m(),f(re.$$.fragment),Ft=m(),Ve=n("p"),Za=s("Next, create a utility function to generate the prompts:"),St=m(),f(pe.$$.fragment),xt=m(),f(F.$$.fragment),Qt=m(),S=n("p"),ja=s("Load the text encoder model used by the "),Ce=n("a"),_a=s("StableDiffusionDiffEditPipeline"),va=s(" to encode the text. You\u2019ll use the text encoder to compute the text embeddings:"),Dt=m(),f(me.$$.fragment),At=m(),W=n("p"),Ga=s("Finally, pass the embeddings to the "),$e=n("a"),Ea=s("generate_mask()"),Wa=s(" and "),Ye=n("a"),Ra=s("invert()"),Ba=s(" functions, and pipeline to generate the image:"),qt=m(),f(ce.$$.fragment),Pt=m(),$=n("h2"),x=n("a"),dt=n("span"),f(de.$$.fragment),Xa=m(),ft=n("span"),ka=s("Generate a caption for inversion"),Lt=m(),R=n("p"),Va=s("While you can use the "),ut=n("code"),Ca=s("source_prompt"),$a=s(" as a caption to help generate the partially inverted latents, you can also use the "),fe=n("a"),Ya=s("BLIP"),Na=s(" model to automatically generate a caption."),Kt=m(),Ne=n("p"),za=s("Load the BLIP model and processor from the \u{1F917} Transformers library:"),Ot=m(),f(ue.$$.fragment),el=m(),ze=n("p"),Ha=s("Create a utility function to generate a caption from the input image:"),tl=m(),f(he.$$.fragment),ll=m(),Q=n("p"),Fa=s("Load an input image and generate a caption for it using the "),ht=n("code"),Sa=s("generate_caption"),xa=s(" function:"),al=m(),f(Je.$$.fragment),sl=m(),Me=n("div"),ye=n("figure"),He=n("img"),Qa=m(),Fe=n("figcaption"),Da=s('generated caption: "a photograph of a bowl of fruit on a table"'),il=m(),D=n("p"),Aa=s("Now you can drop the caption into the "),Se=n("a"),qa=s("invert()"),Pa=s(" function to generate the partially inverted latents!"),this.h()},l(e){const a=ai('[data-svelte="svelte-1phssyn"]',document.head);w=o(a,"META",{name:!0,content:!0}),a.forEach(t),Y=c(e),T=o(e,"H1",{class:!0});var we=p(T);_=o(we,"A",{id:!0,class:!0,href:!0});var ls=p(_);k=o(ls,"SPAN",{});var as=p(k);u(j.$$.fragment,as),as.forEach(t),ls.forEach(t),G=c(we),N=o(we,"SPAN",{});var ss=p(N);Ml=i(ss,"DiffEdit"),ss.forEach(t),we.forEach(t),Mt=c(e),u(q.$$.fragment,e),yt=c(e),be=o(e,"P",{});var is=p(be);yl=i(is,"Image editing typically requires providing a mask of the area to be edited. DiffEdit automatically generates the mask for you based on a text query, making it easier overall to create a mask without image editing software. The DiffEdit algorithm works in three steps:"),is.forEach(t),wt=c(e),E=o(e,"OL",{});var xe=p(E);Ae=o(xe,"LI",{});var ns=p(Ae);wl=i(ns,"the diffusion model denoises an image conditioned on some query text and reference text which produces different noise estimates for different areas of the image; the difference is used to infer a mask to identify which area of the image needs to be changed to match the query text"),ns.forEach(t),bl=c(xe),qe=o(xe,"LI",{});var os=p(qe);Tl=i(os,"the input image is encoded into latent space with DDIM"),os.forEach(t),gl=c(xe),Pe=o(xe,"LI",{});var rs=p(Pe);Il=i(rs,"the latents are decoded with the diffusion model conditioned on the text query, using the mask as a guide such that pixels outside the mask remain the same as in the input image"),rs.forEach(t),xe.forEach(t),bt=c(e),Te=o(e,"P",{});var ps=p(Te);Ul=i(ps,"This guide will show you how to use DiffEdit to edit images without manually creating a mask."),ps.forEach(t),Tt=c(e),ge=o(e,"P",{});var ms=p(ge);Zl=i(ms,"Before you begin, make sure you have the following libraries installed:"),ms.forEach(t),gt=c(e),u(P.$$.fragment,e),It=c(e),b=o(e,"P",{});var Z=p(b);jl=i(Z,"The "),Ie=o(Z,"A",{href:!0});var cs=p(Ie);_l=i(cs,"StableDiffusionDiffEditPipeline"),cs.forEach(t),vl=i(Z," requires an image mask and a set of partially inverted latents. The image mask is generated from the "),Ue=o(Z,"A",{href:!0});var ds=p(Ue);Gl=i(ds,"generate_mask()"),ds.forEach(t),El=i(Z," function, and includes two parameters, "),Le=o(Z,"CODE",{});var fs=p(Le);Wl=i(fs,"source_prompt"),fs.forEach(t),Rl=i(Z," and "),Ke=o(Z,"CODE",{});var us=p(Ke);Bl=i(us,"target_prompt"),us.forEach(t),Xl=i(Z,". These parameters determine what to edit in the image. For example, if you want to change a bowl of "),Oe=o(Z,"EM",{});var hs=p(Oe);kl=i(hs,"fruits"),hs.forEach(t),Vl=i(Z," to a bowl of "),et=o(Z,"EM",{});var Js=p(et);Cl=i(Js,"pears"),Js.forEach(t),$l=i(Z,", then:"),Z.forEach(t),Ut=c(e),u(L.$$.fragment,e),Zt=c(e),I=o(e,"P",{});var B=p(I);Yl=i(B,"The partially inverted latents are generated from the "),Ze=o(B,"A",{href:!0});var Ms=p(Ze);Nl=i(Ms,"invert()"),Ms.forEach(t),zl=i(B," function, and it is generally a good idea to include a "),tt=o(B,"CODE",{});var ys=p(tt);Hl=i(ys,"prompt"),ys.forEach(t),Fl=i(B," or "),lt=o(B,"EM",{});var ws=p(lt);Sl=i(ws,"caption"),ws.forEach(t),xl=i(B," describing the image to help guide the inverse latent sampling process. The caption can often be your "),at=o(B,"CODE",{});var bs=p(at);Ql=i(bs,"source_prompt"),bs.forEach(t),Dl=i(B,", but feel free to experiment with other text descriptions!"),B.forEach(t),jt=c(e),je=o(e,"P",{});var Ts=p(je);Al=i(Ts,"Let\u2019s load the pipeline, scheduler, inverse scheduler, and enable some optimizations to reduce memory usage:"),Ts.forEach(t),_t=c(e),u(K.$$.fragment,e),vt=c(e),_e=o(e,"P",{});var gs=p(_e);ql=i(gs,"Load the image to edit:"),gs.forEach(t),Gt=c(e),u(O.$$.fragment,e),Et=c(e),v=o(e,"P",{});var A=p(v);Pl=i(A,"Use the "),ve=o(A,"A",{href:!0});var Is=p(ve);Ll=i(Is,"generate_mask()"),Is.forEach(t),Kl=i(A," function to generate the image mask. You\u2019ll need to pass it the "),st=o(A,"CODE",{});var Us=p(st);Ol=i(Us,"source_prompt"),Us.forEach(t),ea=i(A," and "),it=o(A,"CODE",{});var Zs=p(it);ta=i(Zs,"target_prompt"),Zs.forEach(t),la=i(A," to specify what to edit in the image:"),A.forEach(t),Wt=c(e),u(ee.$$.fragment,e),Rt=c(e),Ge=o(e,"P",{});var js=p(Ge);aa=i(js,"Next, create the inverted latents and pass it a caption describing the image:"),js.forEach(t),Bt=c(e),u(te.$$.fragment,e),Xt=c(e),U=o(e,"P",{});var X=p(U);sa=i(X,"Finally, pass the image mask and inverted latents to the pipeline. The "),nt=o(X,"CODE",{});var _s=p(nt);ia=i(_s,"target_prompt"),_s.forEach(t),na=i(X," becomes the "),ot=o(X,"CODE",{});var vs=p(ot);oa=i(vs,"prompt"),vs.forEach(t),ra=i(X," now, and the "),rt=o(X,"CODE",{});var Gs=p(rt);pa=i(Gs,"source_prompt"),Gs.forEach(t),ma=i(X," is used as the "),pt=o(X,"CODE",{});var Es=p(pt);ca=i(Es,"negative_prompt"),Es.forEach(t),da=i(X,":"),X.forEach(t),kt=c(e),u(le.$$.fragment,e),Vt=c(e),V=o(e,"DIV",{class:!0});var ol=p(V);ae=o(ol,"DIV",{});var rl=p(ae);Ee=o(rl,"IMG",{class:!0,src:!0}),fa=c(rl),We=o(rl,"FIGCAPTION",{class:!0});var Ws=p(We);ua=i(Ws,"original image"),Ws.forEach(t),rl.forEach(t),ha=c(ol),se=o(ol,"DIV",{});var pl=p(se);Re=o(pl,"IMG",{class:!0,src:!0}),Ja=c(pl),Be=o(pl,"FIGCAPTION",{class:!0});var Rs=p(Be);Ma=i(Rs,"edited image"),Rs.forEach(t),pl.forEach(t),ol.forEach(t),Ct=c(e),C=o(e,"H2",{class:!0});var ml=p(C);z=o(ml,"A",{id:!0,class:!0,href:!0});var Bs=p(z);mt=o(Bs,"SPAN",{});var Xs=p(mt);u(ie.$$.fragment,Xs),Xs.forEach(t),Bs.forEach(t),ya=c(ml),ct=o(ml,"SPAN",{});var ks=p(ct);wa=i(ks,"Generate source and target embeddings"),ks.forEach(t),ml.forEach(t),$t=c(e),H=o(e,"P",{});var cl=p(H);ba=i(cl,"The source and target embeddings can be automatically generated with the "),ne=o(cl,"A",{href:!0,rel:!0});var Vs=p(ne);Ta=i(Vs,"Flan-T5"),Vs.forEach(t),ga=i(cl," model instead of creating them manually."),cl.forEach(t),Yt=c(e),Xe=o(e,"P",{});var Cs=p(Xe);Ia=i(Cs,"Load the Flan-T5 model and tokenizer from the \u{1F917} Transformers library:"),Cs.forEach(t),Nt=c(e),u(oe.$$.fragment,e),zt=c(e),ke=o(e,"P",{});var $s=p(ke);Ua=i($s,"Provide some initial text to prompt the model to generate the source and target prompts."),$s.forEach(t),Ht=c(e),u(re.$$.fragment,e),Ft=c(e),Ve=o(e,"P",{});var Ys=p(Ve);Za=i(Ys,"Next, create a utility function to generate the prompts:"),Ys.forEach(t),St=c(e),u(pe.$$.fragment,e),xt=c(e),u(F.$$.fragment,e),Qt=c(e),S=o(e,"P",{});var dl=p(S);ja=i(dl,"Load the text encoder model used by the "),Ce=o(dl,"A",{href:!0});var Ns=p(Ce);_a=i(Ns,"StableDiffusionDiffEditPipeline"),Ns.forEach(t),va=i(dl," to encode the text. You\u2019ll use the text encoder to compute the text embeddings:"),dl.forEach(t),Dt=c(e),u(me.$$.fragment,e),At=c(e),W=o(e,"P",{});var Qe=p(W);Ga=i(Qe,"Finally, pass the embeddings to the "),$e=o(Qe,"A",{href:!0});var zs=p($e);Ea=i(zs,"generate_mask()"),zs.forEach(t),Wa=i(Qe," and "),Ye=o(Qe,"A",{href:!0});var Hs=p(Ye);Ra=i(Hs,"invert()"),Hs.forEach(t),Ba=i(Qe," functions, and pipeline to generate the image:"),Qe.forEach(t),qt=c(e),u(ce.$$.fragment,e),Pt=c(e),$=o(e,"H2",{class:!0});var fl=p($);x=o(fl,"A",{id:!0,class:!0,href:!0});var Fs=p(x);dt=o(Fs,"SPAN",{});var Ss=p(dt);u(de.$$.fragment,Ss),Ss.forEach(t),Fs.forEach(t),Xa=c(fl),ft=o(fl,"SPAN",{});var xs=p(ft);ka=i(xs,"Generate a caption for inversion"),xs.forEach(t),fl.forEach(t),Lt=c(e),R=o(e,"P",{});var De=p(R);Va=i(De,"While you can use the "),ut=o(De,"CODE",{});var Qs=p(ut);Ca=i(Qs,"source_prompt"),Qs.forEach(t),$a=i(De," as a caption to help generate the partially inverted latents, you can also use the "),fe=o(De,"A",{href:!0,rel:!0});var Ds=p(fe);Ya=i(Ds,"BLIP"),Ds.forEach(t),Na=i(De," model to automatically generate a caption."),De.forEach(t),Kt=c(e),Ne=o(e,"P",{});var As=p(Ne);za=i(As,"Load the BLIP model and processor from the \u{1F917} Transformers library:"),As.forEach(t),Ot=c(e),u(ue.$$.fragment,e),el=c(e),ze=o(e,"P",{});var qs=p(ze);Ha=i(qs,"Create a utility function to generate a caption from the input image:"),qs.forEach(t),tl=c(e),u(he.$$.fragment,e),ll=c(e),Q=o(e,"P",{});var ul=p(Q);Fa=i(ul,"Load an input image and generate a caption for it using the "),ht=o(ul,"CODE",{});var Ps=p(ht);Sa=i(Ps,"generate_caption"),Ps.forEach(t),xa=i(ul," function:"),ul.forEach(t),al=c(e),u(Je.$$.fragment,e),sl=c(e),Me=o(e,"DIV",{class:!0});var Ls=p(Me);ye=o(Ls,"FIGURE",{});var hl=p(ye);He=o(hl,"IMG",{class:!0,src:!0}),Qa=c(hl),Fe=o(hl,"FIGCAPTION",{class:!0});var Ks=p(Fe);Da=i(Ks,'generated caption: "a photograph of a bowl of fruit on a table"'),Ks.forEach(t),hl.forEach(t),Ls.forEach(t),il=c(e),D=o(e,"P",{});var Jl=p(D);Aa=i(Jl,"Now you can drop the caption into the "),Se=o(Jl,"A",{href:!0});var Os=p(Se);qa=i(Os,"invert()"),Os.forEach(t),Pa=i(Jl," function to generate the partially inverted latents!"),Jl.forEach(t),this.h()},h(){d(w,"name","hf:doc:metadata"),d(w,"content",JSON.stringify(pi)),d(_,"id","diffedit"),d(_,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),d(_,"href","#diffedit"),d(T,"class","relative group"),d(Ie,"href","/docs/diffusers/main/en/api/pipelines/diffedit#diffusers.StableDiffusionDiffEditPipeline"),d(Ue,"href","/docs/diffusers/main/en/api/pipelines/diffedit#diffusers.StableDiffusionDiffEditPipeline.generate_mask"),d(Ze,"href","/docs/diffusers/main/en/api/pipelines/diffedit#diffusers.StableDiffusionDiffEditPipeline.invert"),d(ve,"href","/docs/diffusers/main/en/api/pipelines/diffedit#diffusers.StableDiffusionDiffEditPipeline.generate_mask"),d(Ee,"class","rounded-xl"),La(Ee.src,Oa="https://github.com/Xiang-cd/DiffEdit-stable-diffusion/raw/main/assets/origin.png")||d(Ee,"src",Oa),d(We,"class","mt-2 text-center text-sm text-gray-500"),d(Re,"class","rounded-xl"),La(Re.src,es="https://github.com/Xiang-cd/DiffEdit-stable-diffusion/blob/main/assets/target.png?raw=true")||d(Re,"src",es),d(Be,"class","mt-2 text-center text-sm text-gray-500"),d(V,"class","flex gap-4"),d(z,"id","generate-source-and-target-embeddings"),d(z,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),d(z,"href","#generate-source-and-target-embeddings"),d(C,"class","relative group"),d(ne,"href","https://huggingface.co/docs/transformers/model_doc/flan-t5"),d(ne,"rel","nofollow"),d(Ce,"href","/docs/diffusers/main/en/api/pipelines/diffedit#diffusers.StableDiffusionDiffEditPipeline"),d($e,"href","/docs/diffusers/main/en/api/pipelines/diffedit#diffusers.StableDiffusionDiffEditPipeline.generate_mask"),d(Ye,"href","/docs/diffusers/main/en/api/pipelines/diffedit#diffusers.StableDiffusionDiffEditPipeline.invert"),d(x,"id","generate-a-caption-for-inversion"),d(x,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),d(x,"href","#generate-a-caption-for-inversion"),d($,"class","relative group"),d(fe,"href","https://huggingface.co/docs/transformers/model_doc/blip"),d(fe,"rel","nofollow"),d(He,"class","rounded-xl"),La(He.src,ts="https://github.com/Xiang-cd/DiffEdit-stable-diffusion/raw/main/assets/origin.png")||d(He,"src",ts),d(Fe,"class","text-center"),d(Me,"class","flex justify-center"),d(Se,"href","/docs/diffusers/main/en/api/pipelines/diffedit#diffusers.StableDiffusionDiffEditPipeline.invert")},m(e,a){l(document.head,w),r(e,Y,a),r(e,T,a),l(T,_),l(_,k),h(j,k,null),l(T,G),l(T,N),l(N,Ml),r(e,Mt,a),h(q,e,a),r(e,yt,a),r(e,be,a),l(be,yl),r(e,wt,a),r(e,E,a),l(E,Ae),l(Ae,wl),l(E,bl),l(E,qe),l(qe,Tl),l(E,gl),l(E,Pe),l(Pe,Il),r(e,bt,a),r(e,Te,a),l(Te,Ul),r(e,Tt,a),r(e,ge,a),l(ge,Zl),r(e,gt,a),h(P,e,a),r(e,It,a),r(e,b,a),l(b,jl),l(b,Ie),l(Ie,_l),l(b,vl),l(b,Ue),l(Ue,Gl),l(b,El),l(b,Le),l(Le,Wl),l(b,Rl),l(b,Ke),l(Ke,Bl),l(b,Xl),l(b,Oe),l(Oe,kl),l(b,Vl),l(b,et),l(et,Cl),l(b,$l),r(e,Ut,a),h(L,e,a),r(e,Zt,a),r(e,I,a),l(I,Yl),l(I,Ze),l(Ze,Nl),l(I,zl),l(I,tt),l(tt,Hl),l(I,Fl),l(I,lt),l(lt,Sl),l(I,xl),l(I,at),l(at,Ql),l(I,Dl),r(e,jt,a),r(e,je,a),l(je,Al),r(e,_t,a),h(K,e,a),r(e,vt,a),r(e,_e,a),l(_e,ql),r(e,Gt,a),h(O,e,a),r(e,Et,a),r(e,v,a),l(v,Pl),l(v,ve),l(ve,Ll),l(v,Kl),l(v,st),l(st,Ol),l(v,ea),l(v,it),l(it,ta),l(v,la),r(e,Wt,a),h(ee,e,a),r(e,Rt,a),r(e,Ge,a),l(Ge,aa),r(e,Bt,a),h(te,e,a),r(e,Xt,a),r(e,U,a),l(U,sa),l(U,nt),l(nt,ia),l(U,na),l(U,ot),l(ot,oa),l(U,ra),l(U,rt),l(rt,pa),l(U,ma),l(U,pt),l(pt,ca),l(U,da),r(e,kt,a),h(le,e,a),r(e,Vt,a),r(e,V,a),l(V,ae),l(ae,Ee),l(ae,fa),l(ae,We),l(We,ua),l(V,ha),l(V,se),l(se,Re),l(se,Ja),l(se,Be),l(Be,Ma),r(e,Ct,a),r(e,C,a),l(C,z),l(z,mt),h(ie,mt,null),l(C,ya),l(C,ct),l(ct,wa),r(e,$t,a),r(e,H,a),l(H,ba),l(H,ne),l(ne,Ta),l(H,ga),r(e,Yt,a),r(e,Xe,a),l(Xe,Ia),r(e,Nt,a),h(oe,e,a),r(e,zt,a),r(e,ke,a),l(ke,Ua),r(e,Ht,a),h(re,e,a),r(e,Ft,a),r(e,Ve,a),l(Ve,Za),r(e,St,a),h(pe,e,a),r(e,xt,a),h(F,e,a),r(e,Qt,a),r(e,S,a),l(S,ja),l(S,Ce),l(Ce,_a),l(S,va),r(e,Dt,a),h(me,e,a),r(e,At,a),r(e,W,a),l(W,Ga),l(W,$e),l($e,Ea),l(W,Wa),l(W,Ye),l(Ye,Ra),l(W,Ba),r(e,qt,a),h(ce,e,a),r(e,Pt,a),r(e,$,a),l($,x),l(x,dt),h(de,dt,null),l($,Xa),l($,ft),l(ft,ka),r(e,Lt,a),r(e,R,a),l(R,Va),l(R,ut),l(ut,Ca),l(R,$a),l(R,fe),l(fe,Ya),l(R,Na),r(e,Kt,a),r(e,Ne,a),l(Ne,za),r(e,Ot,a),h(ue,e,a),r(e,el,a),r(e,ze,a),l(ze,Ha),r(e,tl,a),h(he,e,a),r(e,ll,a),r(e,Q,a),l(Q,Fa),l(Q,ht),l(ht,Sa),l(Q,xa),r(e,al,a),h(Je,e,a),r(e,sl,a),r(e,Me,a),l(Me,ye),l(ye,He),l(ye,Qa),l(ye,Fe),l(Fe,Da),r(e,il,a),r(e,D,a),l(D,Aa),l(D,Se),l(Se,qa),l(D,Pa),nl=!0},p(e,[a]){const we={};a&2&&(we.$$scope={dirty:a,ctx:e}),F.$set(we)},i(e){nl||(J(j.$$.fragment,e),J(q.$$.fragment,e),J(P.$$.fragment,e),J(L.$$.fragment,e),J(K.$$.fragment,e),J(O.$$.fragment,e),J(ee.$$.fragment,e),J(te.$$.fragment,e),J(le.$$.fragment,e),J(ie.$$.fragment,e),J(oe.$$.fragment,e),J(re.$$.fragment,e),J(pe.$$.fragment,e),J(F.$$.fragment,e),J(me.$$.fragment,e),J(ce.$$.fragment,e),J(de.$$.fragment,e),J(ue.$$.fragment,e),J(he.$$.fragment,e),J(Je.$$.fragment,e),nl=!0)},o(e){M(j.$$.fragment,e),M(q.$$.fragment,e),M(P.$$.fragment,e),M(L.$$.fragment,e),M(K.$$.fragment,e),M(O.$$.fragment,e),M(ee.$$.fragment,e),M(te.$$.fragment,e),M(le.$$.fragment,e),M(ie.$$.fragment,e),M(oe.$$.fragment,e),M(re.$$.fragment,e),M(pe.$$.fragment,e),M(F.$$.fragment,e),M(me.$$.fragment,e),M(ce.$$.fragment,e),M(de.$$.fragment,e),M(ue.$$.fragment,e),M(he.$$.fragment,e),M(Je.$$.fragment,e),nl=!1},d(e){t(w),e&&t(Y),e&&t(T),y(j),e&&t(Mt),y(q,e),e&&t(yt),e&&t(be),e&&t(wt),e&&t(E),e&&t(bt),e&&t(Te),e&&t(Tt),e&&t(ge),e&&t(gt),y(P,e),e&&t(It),e&&t(b),e&&t(Ut),y(L,e),e&&t(Zt),e&&t(I),e&&t(jt),e&&t(je),e&&t(_t),y(K,e),e&&t(vt),e&&t(_e),e&&t(Gt),y(O,e),e&&t(Et),e&&t(v),e&&t(Wt),y(ee,e),e&&t(Rt),e&&t(Ge),e&&t(Bt),y(te,e),e&&t(Xt),e&&t(U),e&&t(kt),y(le,e),e&&t(Vt),e&&t(V),e&&t(Ct),e&&t(C),y(ie),e&&t($t),e&&t(H),e&&t(Yt),e&&t(Xe),e&&t(Nt),y(oe,e),e&&t(zt),e&&t(ke),e&&t(Ht),y(re,e),e&&t(Ft),e&&t(Ve),e&&t(St),y(pe,e),e&&t(xt),y(F,e),e&&t(Qt),e&&t(S),e&&t(Dt),y(me,e),e&&t(At),e&&t(W),e&&t(qt),y(ce,e),e&&t(Pt),e&&t($),y(de),e&&t(Lt),e&&t(R),e&&t(Kt),e&&t(Ne),e&&t(Ot),y(ue,e),e&&t(el),e&&t(ze),e&&t(tl),y(he,e),e&&t(ll),e&&t(Q),e&&t(al),y(Je,e),e&&t(sl),e&&t(Me),e&&t(il),e&&t(D)}}}const pi={local:"diffedit",sections:[{local:"generate-source-and-target-embeddings",title:"Generate source and target embeddings"},{local:"generate-a-caption-for-inversion",title:"Generate a caption for inversion"}],title:"DiffEdit"};function mi(Jt){return si(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ji extends ei{constructor(w){super();ti(this,w,mi,ri,li,{})}}export{Ji as default,pi as metadata}; | |
Xet Storage Details
- Size:
- 43.8 kB
- Xet hash:
- c216f84e931b1f865d2cba7cfceac752369c3022da2b4c2642766f0100127e22
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.