Buckets:

hf-doc-build/doc / diffusers /main /en /_app /pages /api /pipelines /blip_diffusion.mdx-hf-doc-builder.js
rtrm's picture
download
raw
44.1 kB
import{S as nn,i as sn,s as on,e as o,k as m,w as j,t as r,M as an,c as i,d as n,m as g,a,x as J,h as l,b as d,G as t,g as h,y as T,q as B,o as U,B as C,v as rn,L as en}from"../../../chunks/vendor-hf-doc-builder.js";import{T as ln}from"../../../chunks/Tip-hf-doc-builder.js";import{D as Re}from"../../../chunks/Docstring-hf-doc-builder.js";import{C as tn}from"../../../chunks/CodeBlock-hf-doc-builder.js";import{I as xt}from"../../../chunks/IconCopyLink-hf-doc-builder.js";import{E as Kt}from"../../../chunks/ExampleCodeBlock-hf-doc-builder.js";function pn(E){let p,b,u,f,y,s,_,N;return{c(){p=o("p"),b=r("Make sure to check out the Schedulers "),u=o("a"),f=r("guide"),y=r(" to learn how to explore the tradeoff between scheduler speed and quality, and see the "),s=o("a"),_=r("reuse components across pipelines"),N=r(" section to learn how to efficiently load the same components into multiple pipelines."),this.h()},l(k){p=i(k,"P",{});var w=a(p);b=l(w,"Make sure to check out the Schedulers "),u=i(w,"A",{href:!0});var x=a(u);f=l(x,"guide"),x.forEach(n),y=l(w," to learn how to explore the tradeoff between scheduler speed and quality, and see the "),s=i(w,"A",{href:!0});var ae=a(s);_=l(ae,"reuse components across pipelines"),ae.forEach(n),N=l(w," section to learn how to efficiently load the same components into multiple pipelines."),w.forEach(n),this.h()},h(){d(u,"href","/using-diffusers/schedulers"),d(s,"href","/using-diffusers/loading#reuse-components-across-pipelines")},m(k,w){h(k,p,w),t(p,b),t(p,u),t(u,f),t(p,y),t(p,s),t(s,_),t(p,N)},d(k){k&&n(p)}}}function cn(E){let p,b,u,f,y;return f=new tn({props:{code:"ZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMlMjBpbXBvcnQlMjBCbGlwRGlmZnVzaW9uUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwbG9hZF9pbWFnZSUwQWltcG9ydCUyMHRvcmNoJTBBJTBBYmxpcF9kaWZmdXNpb25fcGlwZSUyMCUzRCUyMEJsaXBEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyU2FsZXNmb3JjZSUyRmJsaXBkaWZmdXNpb24lMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTBBY29uZF9zdWJqZWN0JTIwJTNEJTIwJTIyZG9nJTIyJTBBdGd0X3N1YmplY3QlMjAlM0QlMjAlMjJkb2clMjIlMEF0ZXh0X3Byb21wdF9pbnB1dCUyMCUzRCUyMCUyMnN3aW1taW5nJTIwdW5kZXJ3YXRlciUyMiUwQSUwQWNvbmRfaW1hZ2UlMjAlM0QlMjBsb2FkX2ltYWdlKCUwQSUyMCUyMCUyMCUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZkYXRhc2V0cyUyRmF5dXNodHVlcyUyRmJsaXBkaWZmdXNpb25faW1hZ2VzJTJGcmVzb2x2ZSUyRm1haW4lMkZkb2cuanBnJTIyJTBBKSUwQWd1aWRhbmNlX3NjYWxlJTIwJTNEJTIwNy41JTBBbnVtX2luZmVyZW5jZV9zdGVwcyUyMCUzRCUyMDI1JTBBbmVnYXRpdmVfcHJvbXB0JTIwJTNEJTIwJTIyb3Zlci1leHBvc3VyZSUyQyUyMHVuZGVyLWV4cG9zdXJlJTJDJTIwc2F0dXJhdGVkJTJDJTIwZHVwbGljYXRlJTJDJTIwb3V0JTIwb2YlMjBmcmFtZSUyQyUyMGxvd3JlcyUyQyUyMGNyb3BwZWQlMkMlMjB3b3JzdCUyMHF1YWxpdHklMkMlMjBsb3clMjBxdWFsaXR5JTJDJTIwanBlZyUyMGFydGlmYWN0cyUyQyUyMG1vcmJpZCUyQyUyMG11dGlsYXRlZCUyQyUyMG91dCUyMG9mJTIwZnJhbWUlMkMlMjB1Z2x5JTJDJTIwYmFkJTIwYW5hdG9teSUyQyUyMGJhZCUyMHByb3BvcnRpb25zJTJDJTIwZGVmb3JtZWQlMkMlMjBibHVycnklMkMlMjBkdXBsaWNhdGUlMjIlMEElMEElMEFvdXRwdXQlMjAlM0QlMjBibGlwX2RpZmZ1c2lvbl9waXBlKCUwQSUyMCUyMCUyMCUyMHRleHRfcHJvbXB0X2lucHV0JTJDJTBBJTIwJTIwJTIwJTIwY29uZF9pbWFnZSUyQyUwQSUyMCUyMCUyMCUyMGNvbmRfc3ViamVjdCUyQyUwQSUyMCUyMCUyMCUyMHRndF9zdWJqZWN0JTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0RndWlkYW5jZV9zY2FsZSUyQyUwQSUyMCUyMCUyMCUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0RudW1faW5mZXJlbmNlX3N0ZXBzJTJDJTBBJTIwJTIwJTIwJTIwbmVnX3Byb21wdCUzRG5lZ2F0aXZlX3Byb21wdCUyQyUwQSUyMCUyMCUyMCUyMGhlaWdodCUzRDUxMiUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNENTEyJTJDJTBBKS5pbWFnZXMlMEFvdXRwdXQlNUIwJTVELnNhdmUoJTIyaW1hZ2UucG5nJTIyKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers.pipelines <span class="hljs-keyword">import</span> BlipDiffusionPipeline
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span>blip_diffusion_pipe = BlipDiffusionPipeline.from_pretrained(
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;Salesforce/blipdiffusion&quot;</span>, torch_dtype=torch.float16
<span class="hljs-meta">... </span>).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>cond_subject = <span class="hljs-string">&quot;dog&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>tgt_subject = <span class="hljs-string">&quot;dog&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>text_prompt_input = <span class="hljs-string">&quot;swimming underwater&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>cond_image = load_image(
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/dog.jpg&quot;</span>
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>guidance_scale = <span class="hljs-number">7.5</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>num_inference_steps = <span class="hljs-number">25</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>negative_prompt = <span class="hljs-string">&quot;over-exposure, under-exposure, saturated, duplicate, out of frame, lowres, cropped, worst quality, low quality, jpeg artifacts, morbid, mutilated, out of frame, ugly, bad anatomy, bad proportions, deformed, blurry, duplicate&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>output = blip_diffusion_pipe(
<span class="hljs-meta">... </span> text_prompt_input,
<span class="hljs-meta">... </span> cond_image,
<span class="hljs-meta">... </span> cond_subject,
<span class="hljs-meta">... </span> tgt_subject,
<span class="hljs-meta">... </span> guidance_scale=guidance_scale,
<span class="hljs-meta">... </span> num_inference_steps=num_inference_steps,
<span class="hljs-meta">... </span> neg_prompt=negative_prompt,
<span class="hljs-meta">... </span> height=<span class="hljs-number">512</span>,
<span class="hljs-meta">... </span> width=<span class="hljs-number">512</span>,
<span class="hljs-meta">... </span>).images
<span class="hljs-meta">&gt;&gt;&gt; </span>output[<span class="hljs-number">0</span>].save(<span class="hljs-string">&quot;image.png&quot;</span>)`}}),{c(){p=o("p"),b=r("Examples:"),u=m(),j(f.$$.fragment)},l(s){p=i(s,"P",{});var _=a(p);b=l(_,"Examples:"),_.forEach(n),u=g(s),J(f.$$.fragment,s)},m(s,_){h(s,p,_),t(p,b),h(s,u,_),T(f,s,_),y=!0},p:en,i(s){y||(B(f.$$.fragment,s),y=!0)},o(s){U(f.$$.fragment,s),y=!1},d(s){s&&n(p),s&&n(u),C(f,s)}}}function dn(E){let p,b,u,f,y;return f=new tn({props:{code:"ZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMlMjBpbXBvcnQlMjBCbGlwRGlmZnVzaW9uQ29udHJvbE5ldFBpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGxvYWRfaW1hZ2UlMEFmcm9tJTIwY29udHJvbG5ldF9hdXglMjBpbXBvcnQlMjBDYW5ueURldGVjdG9yJTBBaW1wb3J0JTIwdG9yY2glMEElMEFibGlwX2RpZmZ1c2lvbl9waXBlJTIwJTNEJTIwQmxpcERpZmZ1c2lvbkNvbnRyb2xOZXRQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyU2FsZXNmb3JjZSUyRmJsaXBkaWZmdXNpb24tY29udHJvbG5ldCUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUwQSkudG8oJTIyY3VkYSUyMiklMEElMEFzdHlsZV9zdWJqZWN0JTIwJTNEJTIwJTIyZmxvd2VyJTIyJTBBdGd0X3N1YmplY3QlMjAlM0QlMjAlMjJ0ZWFwb3QlMjIlMEF0ZXh0X3Byb21wdCUyMCUzRCUyMCUyMm9uJTIwYSUyMG1hcmJsZSUyMHRhYmxlJTIyJTBBJTBBY2xkbV9jb25kX2ltYWdlJTIwJTNEJTIwbG9hZF9pbWFnZSglMEElMjAlMjAlMjAlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZheXVzaHR1ZXMlMkZibGlwZGlmZnVzaW9uX2ltYWdlcyUyRnJlc29sdmUlMkZtYWluJTJGa2V0dGxlLmpwZyUyMiUwQSkucmVzaXplKCg1MTIlMkMlMjA1MTIpKSUwQWNhbm55JTIwJTNEJTIwQ2FubnlEZXRlY3RvcigpJTBBY2xkbV9jb25kX2ltYWdlJTIwJTNEJTIwY2FubnkoY2xkbV9jb25kX2ltYWdlJTJDJTIwMzAlMkMlMjA3MCUyQyUyMG91dHB1dF90eXBlJTNEJTIycGlsJTIyKSUwQXN0eWxlX2ltYWdlJTIwJTNEJTIwbG9hZF9pbWFnZSglMEElMjAlMjAlMjAlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZheXVzaHR1ZXMlMkZibGlwZGlmZnVzaW9uX2ltYWdlcyUyRnJlc29sdmUlMkZtYWluJTJGZmxvd2VyLmpwZyUyMiUwQSklMEFndWlkYW5jZV9zY2FsZSUyMCUzRCUyMDcuNSUwQW51bV9pbmZlcmVuY2Vfc3RlcHMlMjAlM0QlMjA1MCUwQW5lZ2F0aXZlX3Byb21wdCUyMCUzRCUyMCUyMm92ZXItZXhwb3N1cmUlMkMlMjB1bmRlci1leHBvc3VyZSUyQyUyMHNhdHVyYXRlZCUyQyUyMGR1cGxpY2F0ZSUyQyUyMG91dCUyMG9mJTIwZnJhbWUlMkMlMjBsb3dyZXMlMkMlMjBjcm9wcGVkJTJDJTIwd29yc3QlMjBxdWFsaXR5JTJDJTIwbG93JTIwcXVhbGl0eSUyQyUyMGpwZWclMjBhcnRpZmFjdHMlMkMlMjBtb3JiaWQlMkMlMjBtdXRpbGF0ZWQlMkMlMjBvdXQlMjBvZiUyMGZyYW1lJTJDJTIwdWdseSUyQyUyMGJhZCUyMGFuYXRvbXklMkMlMjBiYWQlMjBwcm9wb3J0aW9ucyUyQyUyMGRlZm9ybWVkJTJDJTIwYmx1cnJ5JTJDJTIwZHVwbGljYXRlJTIyJTBBJTBBJTBBb3V0cHV0JTIwJTNEJTIwYmxpcF9kaWZmdXNpb25fcGlwZSglMEElMjAlMjAlMjAlMjB0ZXh0X3Byb21wdCUyQyUwQSUyMCUyMCUyMCUyMHN0eWxlX2ltYWdlJTJDJTBBJTIwJTIwJTIwJTIwY2xkbV9jb25kX2ltYWdlJTJDJTBBJTIwJTIwJTIwJTIwc3R5bGVfc3ViamVjdCUyQyUwQSUyMCUyMCUyMCUyMHRndF9zdWJqZWN0JTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0RndWlkYW5jZV9zY2FsZSUyQyUwQSUyMCUyMCUyMCUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0RudW1faW5mZXJlbmNlX3N0ZXBzJTJDJTBBJTIwJTIwJTIwJTIwbmVnX3Byb21wdCUzRG5lZ2F0aXZlX3Byb21wdCUyQyUwQSUyMCUyMCUyMCUyMGhlaWdodCUzRDUxMiUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNENTEyJTJDJTBBKS5pbWFnZXMlMEFvdXRwdXQlNUIwJTVELnNhdmUoJTIyaW1hZ2UucG5nJTIyKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers.pipelines <span class="hljs-keyword">import</span> BlipDiffusionControlNetPipeline
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> controlnet_aux <span class="hljs-keyword">import</span> CannyDetector
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span>blip_diffusion_pipe = BlipDiffusionControlNetPipeline.from_pretrained(
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;Salesforce/blipdiffusion-controlnet&quot;</span>, torch_dtype=torch.float16
<span class="hljs-meta">... </span>).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>style_subject = <span class="hljs-string">&quot;flower&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>tgt_subject = <span class="hljs-string">&quot;teapot&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>text_prompt = <span class="hljs-string">&quot;on a marble table&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>cldm_cond_image = load_image(
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/kettle.jpg&quot;</span>
<span class="hljs-meta">... </span>).resize((<span class="hljs-number">512</span>, <span class="hljs-number">512</span>))
<span class="hljs-meta">&gt;&gt;&gt; </span>canny = CannyDetector()
<span class="hljs-meta">&gt;&gt;&gt; </span>cldm_cond_image = canny(cldm_cond_image, <span class="hljs-number">30</span>, <span class="hljs-number">70</span>, output_type=<span class="hljs-string">&quot;pil&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>style_image = load_image(
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/flower.jpg&quot;</span>
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>guidance_scale = <span class="hljs-number">7.5</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>num_inference_steps = <span class="hljs-number">50</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>negative_prompt = <span class="hljs-string">&quot;over-exposure, under-exposure, saturated, duplicate, out of frame, lowres, cropped, worst quality, low quality, jpeg artifacts, morbid, mutilated, out of frame, ugly, bad anatomy, bad proportions, deformed, blurry, duplicate&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>output = blip_diffusion_pipe(
<span class="hljs-meta">... </span> text_prompt,
<span class="hljs-meta">... </span> style_image,
<span class="hljs-meta">... </span> cldm_cond_image,
<span class="hljs-meta">... </span> style_subject,
<span class="hljs-meta">... </span> tgt_subject,
<span class="hljs-meta">... </span> guidance_scale=guidance_scale,
<span class="hljs-meta">... </span> num_inference_steps=num_inference_steps,
<span class="hljs-meta">... </span> neg_prompt=negative_prompt,
<span class="hljs-meta">... </span> height=<span class="hljs-number">512</span>,
<span class="hljs-meta">... </span> width=<span class="hljs-number">512</span>,
<span class="hljs-meta">... </span>).images
<span class="hljs-meta">&gt;&gt;&gt; </span>output[<span class="hljs-number">0</span>].save(<span class="hljs-string">&quot;image.png&quot;</span>)`}}),{c(){p=o("p"),b=r("Examples:"),u=m(),j(f.$$.fragment)},l(s){p=i(s,"P",{});var _=a(p);b=l(_,"Examples:"),_.forEach(n),u=g(s),J(f.$$.fragment,s)},m(s,_){h(s,p,_),t(p,b),h(s,u,_),T(f,s,_),y=!0},p:en,i(s){y||(B(f.$$.fragment,s),y=!0)},o(s){U(f.$$.fragment,s),y=!1},d(s){s&&n(p),s&&n(u),C(f,s)}}}function fn(E){let p,b,u,f,y,s,_,N,k,w,x,ae,F,Ve,qe,Ce,re,Fe,xe,le,me,Ye,Ie,Z,Le,Y,ze,Ae,L,He,Oe,Ze,I,ge,Ke,et,he,tt,nt,z,_e,st,ot,De,X,Pe,G,W,ye,A,it,be,at,Ne,M,H,rt,we,lt,pt,O,ct,pe,dt,ft,ut,D,K,mt,Me,gt,ht,S,ke,$,Q,ve,ee,_t,je,yt,Ge,v,te,bt,Je,wt,Mt,ne,vt,ce,jt,Jt,Tt,P,se,Bt,Te,Ut,Ct,R,$e;return s=new xt({}),X=new ln({props:{$$slots:{default:[pn]},$$scope:{ctx:E}}}),A=new xt({}),H=new Re({props:{name:"class diffusers.BlipDiffusionPipeline",anchor:"diffusers.BlipDiffusionPipeline",parameters:[{name:"tokenizer",val:": CLIPTokenizer"},{name:"text_encoder",val:": ContextCLIPTextModel"},{name:"vae",val:": AutoencoderKL"},{name:"unet",val:": UNet2DConditionModel"},{name:"scheduler",val:": PNDMScheduler"},{name:"qformer",val:": Blip2QFormerModel"},{name:"image_processor",val:": BlipImageProcessor"},{name:"ctx_begin_pos",val:": int = 2"},{name:"mean",val:": typing.List[float] = None"},{name:"std",val:": typing.List[float] = None"}],parametersDescription:[{anchor:"diffusers.BlipDiffusionPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) &#x2014;
Tokenizer for the text encoder`,name:"tokenizer"},{anchor:"diffusers.BlipDiffusionPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>ContextCLIPTextModel</code>) &#x2014;
Text encoder to encode the text prompt`,name:"text_encoder"},{anchor:"diffusers.BlipDiffusionPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/main/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) &#x2014;
VAE model to map the latents to the image`,name:"vae"},{anchor:"diffusers.BlipDiffusionPipeline.unet",description:`<strong>unet</strong> (<a href="/docs/diffusers/main/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a>) &#x2014;
Conditional U-Net architecture to denoise the image embedding.`,name:"unet"},{anchor:"diffusers.BlipDiffusionPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/main/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a>) &#x2014;
A scheduler to be used in combination with <code>unet</code> to generate image latents.`,name:"scheduler"},{anchor:"diffusers.BlipDiffusionPipeline.qformer",description:`<strong>qformer</strong> (<code>Blip2QFormerModel</code>) &#x2014;
QFormer model to get multi-modal embeddings from the text and image.`,name:"qformer"},{anchor:"diffusers.BlipDiffusionPipeline.image_processor",description:`<strong>image_processor</strong> (<code>BlipImageProcessor</code>) &#x2014;
Image Processor to preprocess and postprocess the image.`,name:"image_processor"},{anchor:"diffusers.BlipDiffusionPipeline.ctx_begin_pos",description:`<strong>ctx_begin_pos</strong> (int, <code>optional</code>, defaults to 2) &#x2014;
Position of the context token in the text encoder.`,name:"ctx_begin_pos"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py#L75"}}),K=new Re({props:{name:"__call__",anchor:"diffusers.BlipDiffusionPipeline.__call__",parameters:[{name:"prompt",val:": typing.List[str]"},{name:"reference_image",val:": Image"},{name:"source_subject_category",val:": typing.List[str]"},{name:"target_subject_category",val:": typing.List[str]"},{name:"latents",val:": typing.Optional[torch.FloatTensor] = None"},{name:"guidance_scale",val:": float = 7.5"},{name:"height",val:": int = 512"},{name:"width",val:": int = 512"},{name:"num_inference_steps",val:": int = 50"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"neg_prompt",val:": typing.Optional[str] = ''"},{name:"prompt_strength",val:": float = 1.0"},{name:"prompt_reps",val:": int = 20"},{name:"output_type",val:": typing.Optional[str] = 'pil'"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.BlipDiffusionPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>List[str]</code>) &#x2014;
The prompt or prompts to guide the image generation.`,name:"prompt"},{anchor:"diffusers.BlipDiffusionPipeline.__call__.reference_image",description:`<strong>reference_image</strong> (<code>PIL.Image.Image</code>) &#x2014;
The reference image to condition the generation on.`,name:"reference_image"},{anchor:"diffusers.BlipDiffusionPipeline.__call__.source_subject_category",description:`<strong>source_subject_category</strong> (<code>List[str]</code>) &#x2014;
The source subject category.`,name:"source_subject_category"},{anchor:"diffusers.BlipDiffusionPipeline.__call__.target_subject_category",description:`<strong>target_subject_category</strong> (<code>List[str]</code>) &#x2014;
The target subject category.`,name:"target_subject_category"},{anchor:"diffusers.BlipDiffusionPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) &#x2014;
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by random sampling.`,name:"latents"},{anchor:"diffusers.BlipDiffusionPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) &#x2014;
Guidance scale as defined in <a href="https://arxiv.org/abs/2207.12598" rel="nofollow">Classifier-Free Diffusion Guidance</a>.
<code>guidance_scale</code> is defined as <code>w</code> of equation 2. of <a href="https://arxiv.org/pdf/2205.11487.pdf" rel="nofollow">Imagen
Paper</a>. Guidance scale is enabled by setting <code>guidance_scale &gt; 1</code>. Higher guidance scale encourages to generate images that are closely linked to the text <code>prompt</code>,
usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.BlipDiffusionPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to 512) &#x2014;
The height of the generated image.`,name:"height"},{anchor:"diffusers.BlipDiffusionPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to 512) &#x2014;
The width of the generated image.`,name:"width"},{anchor:"diffusers.BlipDiffusionPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) &#x2014;
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.BlipDiffusionPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) &#x2014;
One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a>
to make generation deterministic.`,name:"generator"},{anchor:"diffusers.BlipDiffusionPipeline.__call__.neg_prompt",description:`<strong>neg_prompt</strong> (<code>str</code>, <em>optional</em>, defaults to &quot;&quot;) &#x2014;
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
if <code>guidance_scale</code> is less than <code>1</code>).`,name:"neg_prompt"},{anchor:"diffusers.BlipDiffusionPipeline.__call__.prompt_strength",description:`<strong>prompt_strength</strong> (<code>float</code>, <em>optional</em>, defaults to 1.0) &#x2014;
The strength of the prompt. Specifies the number of times the prompt is repeated along with prompt_reps
to amplify the prompt.`,name:"prompt_strength"},{anchor:"diffusers.BlipDiffusionPipeline.__call__.prompt_reps",description:`<strong>prompt_reps</strong> (<code>int</code>, <em>optional</em>, defaults to 20) &#x2014;
The number of times the prompt is repeated along with prompt_strength to amplify the prompt.`,name:"prompt_reps"},{anchor:"diffusers.BlipDiffusionPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;pil&quot;</code>) &#x2014;
The output format of the generate image. Choose between: <code>&quot;pil&quot;</code> (<code>PIL.Image.Image</code>), <code>&quot;np&quot;</code>
(<code>np.array</code>) or <code>&quot;pt&quot;</code> (<code>torch.Tensor</code>).`,name:"output_type"},{anchor:"diffusers.BlipDiffusionPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether or not to return a <a href="/docs/diffusers/main/en/api/pipelines/vq_diffusion#diffusers.ImagePipelineOutput">ImagePipelineOutput</a> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py#L182",returnType:`
<p><a
href="/docs/diffusers/main/en/api/pipelines/vq_diffusion#diffusers.ImagePipelineOutput"
>ImagePipelineOutput</a> or <code>tuple</code></p>
`}}),S=new Kt({props:{anchor:"diffusers.BlipDiffusionPipeline.__call__.example",$$slots:{default:[cn]},$$scope:{ctx:E}}}),ee=new xt({}),te=new Re({props:{name:"class diffusers.BlipDiffusionControlNetPipeline",anchor:"diffusers.BlipDiffusionControlNetPipeline",parameters:[{name:"tokenizer",val:": CLIPTokenizer"},{name:"text_encoder",val:": ContextCLIPTextModel"},{name:"vae",val:": AutoencoderKL"},{name:"unet",val:": UNet2DConditionModel"},{name:"scheduler",val:": PNDMScheduler"},{name:"qformer",val:": Blip2QFormerModel"},{name:"controlnet",val:": ControlNetModel"},{name:"image_processor",val:": BlipImageProcessor"},{name:"ctx_begin_pos",val:": int = 2"},{name:"mean",val:": typing.List[float] = None"},{name:"std",val:": typing.List[float] = None"}],parametersDescription:[{anchor:"diffusers.BlipDiffusionControlNetPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) &#x2014;
Tokenizer for the text encoder`,name:"tokenizer"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>ContextCLIPTextModel</code>) &#x2014;
Text encoder to encode the text prompt`,name:"text_encoder"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/main/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) &#x2014;
VAE model to map the latents to the image`,name:"vae"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.unet",description:`<strong>unet</strong> (<a href="/docs/diffusers/main/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a>) &#x2014;
Conditional U-Net architecture to denoise the image embedding.`,name:"unet"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/main/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a>) &#x2014;
A scheduler to be used in combination with <code>unet</code> to generate image latents.`,name:"scheduler"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.qformer",description:`<strong>qformer</strong> (<code>Blip2QFormerModel</code>) &#x2014;
QFormer model to get multi-modal embeddings from the text and image.`,name:"qformer"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.controlnet",description:`<strong>controlnet</strong> (<a href="/docs/diffusers/main/en/api/models/controlnet#diffusers.ControlNetModel">ControlNetModel</a>) &#x2014;
ControlNet model to get the conditioning image embedding.`,name:"controlnet"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.image_processor",description:`<strong>image_processor</strong> (<code>BlipImageProcessor</code>) &#x2014;
Image Processor to preprocess and postprocess the image.`,name:"image_processor"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.ctx_begin_pos",description:`<strong>ctx_begin_pos</strong> (int, <code>optional</code>, defaults to 2) &#x2014;
Position of the context token in the text encoder.`,name:"ctx_begin_pos"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py#L82"}}),se=new Re({props:{name:"__call__",anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__",parameters:[{name:"prompt",val:": typing.List[str]"},{name:"reference_image",val:": Image"},{name:"condtioning_image",val:": Image"},{name:"source_subject_category",val:": typing.List[str]"},{name:"target_subject_category",val:": typing.List[str]"},{name:"latents",val:": typing.Optional[torch.FloatTensor] = None"},{name:"guidance_scale",val:": float = 7.5"},{name:"height",val:": int = 512"},{name:"width",val:": int = 512"},{name:"num_inference_steps",val:": int = 50"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"neg_prompt",val:": typing.Optional[str] = ''"},{name:"prompt_strength",val:": float = 1.0"},{name:"prompt_reps",val:": int = 20"},{name:"output_type",val:": typing.Optional[str] = 'pil'"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>List[str]</code>) &#x2014;
The prompt or prompts to guide the image generation.`,name:"prompt"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.reference_image",description:`<strong>reference_image</strong> (<code>PIL.Image.Image</code>) &#x2014;
The reference image to condition the generation on.`,name:"reference_image"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.condtioning_image",description:`<strong>condtioning_image</strong> (<code>PIL.Image.Image</code>) &#x2014;
The conditioning canny edge image to condition the generation on.`,name:"condtioning_image"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.source_subject_category",description:`<strong>source_subject_category</strong> (<code>List[str]</code>) &#x2014;
The source subject category.`,name:"source_subject_category"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.target_subject_category",description:`<strong>target_subject_category</strong> (<code>List[str]</code>) &#x2014;
The target subject category.`,name:"target_subject_category"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) &#x2014;
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by random sampling.`,name:"latents"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) &#x2014;
Guidance scale as defined in <a href="https://arxiv.org/abs/2207.12598" rel="nofollow">Classifier-Free Diffusion Guidance</a>.
<code>guidance_scale</code> is defined as <code>w</code> of equation 2. of <a href="https://arxiv.org/pdf/2205.11487.pdf" rel="nofollow">Imagen
Paper</a>. Guidance scale is enabled by setting <code>guidance_scale &gt; 1</code>. Higher guidance scale encourages to generate images that are closely linked to the text <code>prompt</code>,
usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to 512) &#x2014;
The height of the generated image.`,name:"height"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to 512) &#x2014;
The width of the generated image.`,name:"width"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.seed",description:`<strong>seed</strong> (<code>int</code>, <em>optional</em>, defaults to 42) &#x2014;
The seed to use for random generation.`,name:"seed"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) &#x2014;
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) &#x2014;
One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a>
to make generation deterministic.`,name:"generator"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.neg_prompt",description:`<strong>neg_prompt</strong> (<code>str</code>, <em>optional</em>, defaults to &quot;&quot;) &#x2014;
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
if <code>guidance_scale</code> is less than <code>1</code>).`,name:"neg_prompt"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.prompt_strength",description:`<strong>prompt_strength</strong> (<code>float</code>, <em>optional</em>, defaults to 1.0) &#x2014;
The strength of the prompt. Specifies the number of times the prompt is repeated along with prompt_reps
to amplify the prompt.`,name:"prompt_strength"},{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.prompt_reps",description:`<strong>prompt_reps</strong> (<code>int</code>, <em>optional</em>, defaults to 20) &#x2014;
The number of times the prompt is repeated along with prompt_strength to amplify the prompt.`,name:"prompt_reps"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py#L230",returnType:`
<p><a
href="/docs/diffusers/main/en/api/pipelines/vq_diffusion#diffusers.ImagePipelineOutput"
>ImagePipelineOutput</a> or <code>tuple</code></p>
`}}),R=new Kt({props:{anchor:"diffusers.BlipDiffusionControlNetPipeline.__call__.example",$$slots:{default:[dn]},$$scope:{ctx:E}}}),{c(){p=o("meta"),b=m(),u=o("h1"),f=o("a"),y=o("span"),j(s.$$.fragment),_=m(),N=o("span"),k=r("Blip Diffusion"),w=m(),x=o("p"),ae=r("Blip Diffusion was proposed in "),F=o("a"),Ve=r("BLIP-Diffusion: Pre-trained Subject Representation for Controllable Text-to-Image Generation and Editing"),qe=r(". It enables zero-shot subject-driven generation and control-guided zero-shot generation."),Ce=m(),re=o("p"),Fe=r("The abstract from the paper is:"),xe=m(),le=o("p"),me=o("em"),Ye=r("Subject-driven text-to-image generation models create novel renditions of an input subject based on text prompts. Existing models suffer from lengthy fine-tuning and difficulties preserving the subject fidelity. To overcome these limitations, we introduce BLIP-Diffusion, a new subject-driven image generation model that supports multimodal control which consumes inputs of subject images and text prompts. Unlike other subject-driven generation models, BLIP-Diffusion introduces a new multimodal encoder which is pre-trained to provide subject representation. We first pre-train the multimodal encoder following BLIP-2 to produce visual representation aligned with the text. Then we design a subject representation learning task which enables a diffusion model to leverage such visual representation and generates new subject renditions. Compared with previous methods such as DreamBooth, our model enables zero-shot subject-driven generation, and efficient fine-tuning for customized subject with up to 20x speedup. We also demonstrate that BLIP-Diffusion can be flexibly combined with existing techniques such as ControlNet and prompt-to-prompt to enable novel subject-driven generation and editing applications."),Ie=m(),Z=o("p"),Le=r("The original codebase can be found at "),Y=o("a"),ze=r("salesforce/LAVIS"),Ae=r(". You can find the official BLIP Diffusion checkpoints under the "),L=o("a"),He=r("hf.co/SalesForce"),Oe=r(" organization."),Ze=m(),I=o("p"),ge=o("code"),Ke=r("BlipDiffusionPipeline"),et=r(" and "),he=o("code"),tt=r("BlipDiffusionControlNetPipeline"),nt=r(" were contributed by "),z=o("a"),_e=o("code"),st=r("ayushtues"),ot=r("."),De=m(),j(X.$$.fragment),Pe=m(),G=o("h2"),W=o("a"),ye=o("span"),j(A.$$.fragment),it=m(),be=o("span"),at=r("BlipDiffusionPipeline"),Ne=m(),M=o("div"),j(H.$$.fragment),rt=m(),we=o("p"),lt=r("Pipeline for Zero-Shot Subject Driven Generation using Blip Diffusion."),pt=m(),O=o("p"),ct=r("This model inherits from "),pe=o("a"),dt=r("DiffusionPipeline"),ft=r(`. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)`),ut=m(),D=o("div"),j(K.$$.fragment),mt=m(),Me=o("p"),gt=r("Function invoked when calling the pipeline for generation."),ht=m(),j(S.$$.fragment),ke=m(),$=o("h2"),Q=o("a"),ve=o("span"),j(ee.$$.fragment),_t=m(),je=o("span"),yt=r("BlipDiffusionControlNetPipeline"),Ge=m(),v=o("div"),j(te.$$.fragment),bt=m(),Je=o("p"),wt=r("Pipeline for Canny Edge based Controlled subject-driven generation using Blip Diffusion."),Mt=m(),ne=o("p"),vt=r("This model inherits from "),ce=o("a"),jt=r("DiffusionPipeline"),Jt=r(`. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)`),Tt=m(),P=o("div"),j(se.$$.fragment),Bt=m(),Te=o("p"),Ut=r("Function invoked when calling the pipeline for generation."),Ct=m(),j(R.$$.fragment),this.h()},l(e){const c=an('[data-svelte="svelte-1phssyn"]',document.head);p=i(c,"META",{name:!0,content:!0}),c.forEach(n),b=g(e),u=i(e,"H1",{class:!0});var oe=a(u);f=i(oe,"A",{id:!0,class:!0,href:!0});var Be=a(f);y=i(Be,"SPAN",{});var Ue=a(y);J(s.$$.fragment,Ue),Ue.forEach(n),Be.forEach(n),_=g(oe),N=i(oe,"SPAN",{});var It=a(N);k=l(It,"Blip Diffusion"),It.forEach(n),oe.forEach(n),w=g(e),x=i(e,"P",{});var Ee=a(x);ae=l(Ee,"Blip Diffusion was proposed in "),F=i(Ee,"A",{href:!0,rel:!0});var Zt=a(F);Ve=l(Zt,"BLIP-Diffusion: Pre-trained Subject Representation for Controllable Text-to-Image Generation and Editing"),Zt.forEach(n),qe=l(Ee,". It enables zero-shot subject-driven generation and control-guided zero-shot generation."),Ee.forEach(n),Ce=g(e),re=i(e,"P",{});var Dt=a(re);Fe=l(Dt,"The abstract from the paper is:"),Dt.forEach(n),xe=g(e),le=i(e,"P",{});var Pt=a(le);me=i(Pt,"EM",{});var Nt=a(me);Ye=l(Nt,"Subject-driven text-to-image generation models create novel renditions of an input subject based on text prompts. Existing models suffer from lengthy fine-tuning and difficulties preserving the subject fidelity. To overcome these limitations, we introduce BLIP-Diffusion, a new subject-driven image generation model that supports multimodal control which consumes inputs of subject images and text prompts. Unlike other subject-driven generation models, BLIP-Diffusion introduces a new multimodal encoder which is pre-trained to provide subject representation. We first pre-train the multimodal encoder following BLIP-2 to produce visual representation aligned with the text. Then we design a subject representation learning task which enables a diffusion model to leverage such visual representation and generates new subject renditions. Compared with previous methods such as DreamBooth, our model enables zero-shot subject-driven generation, and efficient fine-tuning for customized subject with up to 20x speedup. We also demonstrate that BLIP-Diffusion can be flexibly combined with existing techniques such as ControlNet and prompt-to-prompt to enable novel subject-driven generation and editing applications."),Nt.forEach(n),Pt.forEach(n),Ie=g(e),Z=i(e,"P",{});var de=a(Z);Le=l(de,"The original codebase can be found at "),Y=i(de,"A",{href:!0,rel:!0});var kt=a(Y);ze=l(kt,"salesforce/LAVIS"),kt.forEach(n),Ae=l(de,". You can find the official BLIP Diffusion checkpoints under the "),L=i(de,"A",{href:!0,rel:!0});var Gt=a(L);He=l(Gt,"hf.co/SalesForce"),Gt.forEach(n),Oe=l(de," organization."),de.forEach(n),Ze=g(e),I=i(e,"P",{});var ie=a(I);ge=i(ie,"CODE",{});var $t=a(ge);Ke=l($t,"BlipDiffusionPipeline"),$t.forEach(n),et=l(ie," and "),he=i(ie,"CODE",{});var Et=a(he);tt=l(Et,"BlipDiffusionControlNetPipeline"),Et.forEach(n),nt=l(ie," were contributed by "),z=i(ie,"A",{href:!0,rel:!0});var Xt=a(z);_e=i(Xt,"CODE",{});var Wt=a(_e);st=l(Wt,"ayushtues"),Wt.forEach(n),Xt.forEach(n),ot=l(ie,"."),ie.forEach(n),De=g(e),J(X.$$.fragment,e),Pe=g(e),G=i(e,"H2",{class:!0});var Xe=a(G);W=i(Xe,"A",{id:!0,class:!0,href:!0});var St=a(W);ye=i(St,"SPAN",{});var Qt=a(ye);J(A.$$.fragment,Qt),Qt.forEach(n),St.forEach(n),it=g(Xe),be=i(Xe,"SPAN",{});var Rt=a(be);at=l(Rt,"BlipDiffusionPipeline"),Rt.forEach(n),Xe.forEach(n),Ne=g(e),M=i(e,"DIV",{class:!0});var V=a(M);J(H.$$.fragment,V),rt=g(V),we=i(V,"P",{});var Vt=a(we);lt=l(Vt,"Pipeline for Zero-Shot Subject Driven Generation using Blip Diffusion."),Vt.forEach(n),pt=g(V),O=i(V,"P",{});var We=a(O);ct=l(We,"This model inherits from "),pe=i(We,"A",{href:!0});var qt=a(pe);dt=l(qt,"DiffusionPipeline"),qt.forEach(n),ft=l(We,`. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)`),We.forEach(n),ut=g(V),D=i(V,"DIV",{class:!0});var fe=a(D);J(K.$$.fragment,fe),mt=g(fe),Me=i(fe,"P",{});var Ft=a(Me);gt=l(Ft,"Function invoked when calling the pipeline for generation."),Ft.forEach(n),ht=g(fe),J(S.$$.fragment,fe),fe.forEach(n),V.forEach(n),ke=g(e),$=i(e,"H2",{class:!0});var Se=a($);Q=i(Se,"A",{id:!0,class:!0,href:!0});var Yt=a(Q);ve=i(Yt,"SPAN",{});var Lt=a(ve);J(ee.$$.fragment,Lt),Lt.forEach(n),Yt.forEach(n),_t=g(Se),je=i(Se,"SPAN",{});var zt=a(je);yt=l(zt,"BlipDiffusionControlNetPipeline"),zt.forEach(n),Se.forEach(n),Ge=g(e),v=i(e,"DIV",{class:!0});var q=a(v);J(te.$$.fragment,q),bt=g(q),Je=i(q,"P",{});var At=a(Je);wt=l(At,"Pipeline for Canny Edge based Controlled subject-driven generation using Blip Diffusion."),At.forEach(n),Mt=g(q),ne=i(q,"P",{});var Qe=a(ne);vt=l(Qe,"This model inherits from "),ce=i(Qe,"A",{href:!0});var Ht=a(ce);jt=l(Ht,"DiffusionPipeline"),Ht.forEach(n),Jt=l(Qe,`. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)`),Qe.forEach(n),Tt=g(q),P=i(q,"DIV",{class:!0});var ue=a(P);J(se.$$.fragment,ue),Bt=g(ue),Te=i(ue,"P",{});var Ot=a(Te);Ut=l(Ot,"Function invoked when calling the pipeline for generation."),Ot.forEach(n),Ct=g(ue),J(R.$$.fragment,ue),ue.forEach(n),q.forEach(n),this.h()},h(){d(p,"name","hf:doc:metadata"),d(p,"content",JSON.stringify(un)),d(f,"id","blip-diffusion"),d(f,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),d(f,"href","#blip-diffusion"),d(u,"class","relative group"),d(F,"href","https://arxiv.org/abs/2305.14720"),d(F,"rel","nofollow"),d(Y,"href","https://github.com/salesforce/LAVIS/tree/main/projects/blip-diffusion"),d(Y,"rel","nofollow"),d(L,"href","https://hf.co/SalesForce"),d(L,"rel","nofollow"),d(z,"href","https://github.com/ayushtues/"),d(z,"rel","nofollow"),d(W,"id","diffusers.BlipDiffusionPipeline"),d(W,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),d(W,"href","#diffusers.BlipDiffusionPipeline"),d(G,"class","relative group"),d(pe,"href","/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline"),d(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),d(M,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),d(Q,"id","diffusers.BlipDiffusionControlNetPipeline"),d(Q,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),d(Q,"href","#diffusers.BlipDiffusionControlNetPipeline"),d($,"class","relative group"),d(ce,"href","/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline"),d(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),d(v,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,c){t(document.head,p),h(e,b,c),h(e,u,c),t(u,f),t(f,y),T(s,y,null),t(u,_),t(u,N),t(N,k),h(e,w,c),h(e,x,c),t(x,ae),t(x,F),t(F,Ve),t(x,qe),h(e,Ce,c),h(e,re,c),t(re,Fe),h(e,xe,c),h(e,le,c),t(le,me),t(me,Ye),h(e,Ie,c),h(e,Z,c),t(Z,Le),t(Z,Y),t(Y,ze),t(Z,Ae),t(Z,L),t(L,He),t(Z,Oe),h(e,Ze,c),h(e,I,c),t(I,ge),t(ge,Ke),t(I,et),t(I,he),t(he,tt),t(I,nt),t(I,z),t(z,_e),t(_e,st),t(I,ot),h(e,De,c),T(X,e,c),h(e,Pe,c),h(e,G,c),t(G,W),t(W,ye),T(A,ye,null),t(G,it),t(G,be),t(be,at),h(e,Ne,c),h(e,M,c),T(H,M,null),t(M,rt),t(M,we),t(we,lt),t(M,pt),t(M,O),t(O,ct),t(O,pe),t(pe,dt),t(O,ft),t(M,ut),t(M,D),T(K,D,null),t(D,mt),t(D,Me),t(Me,gt),t(D,ht),T(S,D,null),h(e,ke,c),h(e,$,c),t($,Q),t(Q,ve),T(ee,ve,null),t($,_t),t($,je),t(je,yt),h(e,Ge,c),h(e,v,c),T(te,v,null),t(v,bt),t(v,Je),t(Je,wt),t(v,Mt),t(v,ne),t(ne,vt),t(ne,ce),t(ce,jt),t(ne,Jt),t(v,Tt),t(v,P),T(se,P,null),t(P,Bt),t(P,Te),t(Te,Ut),t(P,Ct),T(R,P,null),$e=!0},p(e,[c]){const oe={};c&2&&(oe.$$scope={dirty:c,ctx:e}),X.$set(oe);const Be={};c&2&&(Be.$$scope={dirty:c,ctx:e}),S.$set(Be);const Ue={};c&2&&(Ue.$$scope={dirty:c,ctx:e}),R.$set(Ue)},i(e){$e||(B(s.$$.fragment,e),B(X.$$.fragment,e),B(A.$$.fragment,e),B(H.$$.fragment,e),B(K.$$.fragment,e),B(S.$$.fragment,e),B(ee.$$.fragment,e),B(te.$$.fragment,e),B(se.$$.fragment,e),B(R.$$.fragment,e),$e=!0)},o(e){U(s.$$.fragment,e),U(X.$$.fragment,e),U(A.$$.fragment,e),U(H.$$.fragment,e),U(K.$$.fragment,e),U(S.$$.fragment,e),U(ee.$$.fragment,e),U(te.$$.fragment,e),U(se.$$.fragment,e),U(R.$$.fragment,e),$e=!1},d(e){n(p),e&&n(b),e&&n(u),C(s),e&&n(w),e&&n(x),e&&n(Ce),e&&n(re),e&&n(xe),e&&n(le),e&&n(Ie),e&&n(Z),e&&n(Ze),e&&n(I),e&&n(De),C(X,e),e&&n(Pe),e&&n(G),C(A),e&&n(Ne),e&&n(M),C(H),C(K),C(S),e&&n(ke),e&&n($),C(ee),e&&n(Ge),e&&n(v),C(te),C(se),C(R)}}}const un={local:"blip-diffusion",sections:[{local:"diffusers.BlipDiffusionPipeline",title:"BlipDiffusionPipeline"},{local:"diffusers.BlipDiffusionControlNetPipeline",title:"BlipDiffusionControlNetPipeline"}],title:"Blip Diffusion"};function mn(E){return rn(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Mn extends nn{constructor(p){super();sn(this,p,mn,fn,on,{})}}export{Mn as default,un as metadata};

Xet Storage Details

Size:
44.1 kB
·
Xet hash:
a55f63ead57cc41cf70159b1f49617d7987a07645548621422b3ca794ec6ff00

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.