Buckets:

hf-doc-build/doc / diffusers /main /en /_app /pages /api /pipelines /cycle_diffusion.mdx-hf-doc-builder.js
rtrm's picture
download
raw
36.8 kB
import{S as hn,i as gn,s as yn,e as o,k as d,w as J,t as m,M as _n,c as s,d as t,m as f,a as i,x as C,h as u,b as c,G as n,g,y as I,q as D,o as x,B as P,v as bn,L as wn}from"../../../chunks/vendor-hf-doc-builder.js";import{T as Mn}from"../../../chunks/Tip-hf-doc-builder.js";import{D as Ce}from"../../../chunks/Docstring-hf-doc-builder.js";import{C as vn}from"../../../chunks/CodeBlock-hf-doc-builder.js";import{I as qe}from"../../../chunks/IconCopyLink-hf-doc-builder.js";import{E as Tn}from"../../../chunks/ExampleCodeBlock-hf-doc-builder.js";function Un(q){let r,M,p,h,_,a,b,k;return{c(){r=o("p"),M=m("Make sure to check out the Schedulers "),p=o("a"),h=m("guide"),_=m(" to learn how to explore the tradeoff between scheduler speed and quality, and see the "),a=o("a"),b=m("reuse components across pipelines"),k=m(" section to learn how to efficiently load the same components into multiple pipelines."),this.h()},l(j){r=s(j,"P",{});var w=i(r);M=u(w,"Make sure to check out the Schedulers "),p=s(w,"A",{href:!0});var v=i(p);h=u(v,"guide"),v.forEach(t),_=u(w," to learn how to explore the tradeoff between scheduler speed and quality, and see the "),a=s(w,"A",{href:!0});var H=i(a);b=u(H,"reuse components across pipelines"),H.forEach(t),k=u(w," section to learn how to efficiently load the same components into multiple pipelines."),w.forEach(t),this.h()},h(){c(p,"href","/using-diffusers/schedulers"),c(a,"href","/using-diffusers/loading#reuse-components-across-pipelines")},m(j,w){g(j,r,w),n(r,M),n(r,p),n(p,h),n(r,_),n(r,a),n(a,b),n(r,k)},d(j){j&&t(r)}}}function Jn(q){let r,M,p,h,_;return h=new vn({props:{code:"aW1wb3J0JTIwcmVxdWVzdHMlMEFpbXBvcnQlMjB0b3JjaCUwQWZyb20lMjBQSUwlMjBpbXBvcnQlMjBJbWFnZSUwQWZyb20lMjBpbyUyMGltcG9ydCUyMEJ5dGVzSU8lMEElMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwQ3ljbGVEaWZmdXNpb25QaXBlbGluZSUyQyUyMERESU1TY2hlZHVsZXIlMEElMEElMjMlMjBsb2FkJTIwdGhlJTIwcGlwZWxpbmUlMEElMjMlMjBtYWtlJTIwc3VyZSUyMHlvdSdyZSUyMGxvZ2dlZCUyMGluJTIwd2l0aCUyMCU2MGh1Z2dpbmdmYWNlLWNsaSUyMGxvZ2luJTYwJTBBbW9kZWxfaWRfb3JfcGF0aCUyMCUzRCUyMCUyMkNvbXBWaXMlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTQlMjIlMEFzY2hlZHVsZXIlMjAlM0QlMjBERElNU2NoZWR1bGVyLmZyb21fcHJldHJhaW5lZChtb2RlbF9pZF9vcl9wYXRoJTJDJTIwc3ViZm9sZGVyJTNEJTIyc2NoZWR1bGVyJTIyKSUwQXBpcGUlMjAlM0QlMjBDeWNsZURpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZChtb2RlbF9pZF9vcl9wYXRoJTJDJTIwc2NoZWR1bGVyJTNEc2NoZWR1bGVyKS50byglMjJjdWRhJTIyKSUwQSUwQSUyMyUyMGxldCdzJTIwZG93bmxvYWQlMjBhbiUyMGluaXRpYWwlMjBpbWFnZSUwQXVybCUyMCUzRCUyMCUyMmh0dHBzJTNBJTJGJTJGcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSUyRkNoZW5XdTk4JTJGY3ljbGUtZGlmZnVzaW9uJTJGbWFpbiUyRmRhdGElMkZkYWxsZTIlMkZBbiUyNTIwYXN0cm9uYXV0JTI1MjByaWRpbmclMjUyMGElMjUyMGhvcnNlLnBuZyUyMiUwQXJlc3BvbnNlJTIwJTNEJTIwcmVxdWVzdHMuZ2V0KHVybCklMEFpbml0X2ltYWdlJTIwJTNEJTIwSW1hZ2Uub3BlbihCeXRlc0lPKHJlc3BvbnNlLmNvbnRlbnQpKS5jb252ZXJ0KCUyMlJHQiUyMiklMEFpbml0X2ltYWdlJTIwJTNEJTIwaW5pdF9pbWFnZS5yZXNpemUoKDUxMiUyQyUyMDUxMikpJTBBaW5pdF9pbWFnZS5zYXZlKCUyMmhvcnNlLnBuZyUyMiklMEElMEElMjMlMjBsZXQncyUyMHNwZWNpZnklMjBhJTIwcHJvbXB0JTBBc291cmNlX3Byb21wdCUyMCUzRCUyMCUyMkFuJTIwYXN0cm9uYXV0JTIwcmlkaW5nJTIwYSUyMGhvcnNlJTIyJTBBcHJvbXB0JTIwJTNEJTIwJTIyQW4lMjBhc3Ryb25hdXQlMjByaWRpbmclMjBhbiUyMGVsZXBoYW50JTIyJTBBJTBBJTIzJTIwY2FsbCUyMHRoZSUyMHBpcGVsaW5lJTBBaW1hZ2UlMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMHByb21wdCUzRHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMHNvdXJjZV9wcm9tcHQlM0Rzb3VyY2VfcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwaW1hZ2UlM0Rpbml0X2ltYWdlJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDEwMCUyQyUwQSUyMCUyMCUyMCUyMGV0YSUzRDAuMSUyQyUwQSUyMCUyMCUyMCUyMHN0cmVuZ3RoJTNEMC44JTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0QyJTJDJTBBJTIwJTIwJTIwJTIwc291cmNlX2d1aWRhbmNlX3NjYWxlJTNEMSUyQyUwQSkuaW1hZ2VzJTVCMCU1RCUwQSUwQWltYWdlLnNhdmUoJTIyaG9yc2VfdG9fZWxlcGhhbnQucG5nJTIyKSUwQSUwQSUyMyUyMGxldCdzJTIwdHJ5JTIwYW5vdGhlciUyMGV4YW1wbGUlMEElMjMlMjBTZWUlMjBtb3JlJTIwc2FtcGxlcyUyMGF0JTIwdGhlJTIwb3JpZ2luYWwlMjByZXBvJTNBJTIwaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGQ2hlbld1OTglMkZjeWNsZS1kaWZmdXNpb24lMEF1cmwlMjAlM0QlMjAoJTBBJTIwJTIwJTIwJTIwJTIyaHR0cHMlM0ElMkYlMkZyYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tJTJGQ2hlbld1OTglMkZjeWNsZS1kaWZmdXNpb24lMkZtYWluJTJGZGF0YSUyRmRhbGxlMiUyRkElMjUyMGJsYWNrJTI1MjBjb2xvcmVkJTI1MjBjYXIucG5nJTIyJTBBKSUwQXJlc3BvbnNlJTIwJTNEJTIwcmVxdWVzdHMuZ2V0KHVybCklMEFpbml0X2ltYWdlJTIwJTNEJTIwSW1hZ2Uub3BlbihCeXRlc0lPKHJlc3BvbnNlLmNvbnRlbnQpKS5jb252ZXJ0KCUyMlJHQiUyMiklMEFpbml0X2ltYWdlJTIwJTNEJTIwaW5pdF9pbWFnZS5yZXNpemUoKDUxMiUyQyUyMDUxMikpJTBBaW5pdF9pbWFnZS5zYXZlKCUyMmJsYWNrLnBuZyUyMiklMEElMEFzb3VyY2VfcHJvbXB0JTIwJTNEJTIwJTIyQSUyMGJsYWNrJTIwY29sb3JlZCUyMGNhciUyMiUwQXByb21wdCUyMCUzRCUyMCUyMkElMjBibHVlJTIwY29sb3JlZCUyMGNhciUyMiUwQSUwQSUyMyUyMGNhbGwlMjB0aGUlMjBwaXBlbGluZSUwQXRvcmNoLm1hbnVhbF9zZWVkKDApJTBBaW1hZ2UlMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMHByb21wdCUzRHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMHNvdXJjZV9wcm9tcHQlM0Rzb3VyY2VfcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwaW1hZ2UlM0Rpbml0X2ltYWdlJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDEwMCUyQyUwQSUyMCUyMCUyMCUyMGV0YSUzRDAuMSUyQyUwQSUyMCUyMCUyMCUyMHN0cmVuZ3RoJTNEMC44NSUyQyUwQSUyMCUyMCUyMCUyMGd1aWRhbmNlX3NjYWxlJTNEMyUyQyUwQSUyMCUyMCUyMCUyMHNvdXJjZV9ndWlkYW5jZV9zY2FsZSUzRDElMkMlMEEpLmltYWdlcyU1QjAlNUQlMEElMEFpbWFnZS5zYXZlKCUyMmJsYWNrX3RvX2JsdWUucG5nJTIyKQ==",highlighted:`<span class="hljs-keyword">import</span> requests
<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
<span class="hljs-keyword">from</span> io <span class="hljs-keyword">import</span> BytesIO
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> CycleDiffusionPipeline, DDIMScheduler
<span class="hljs-comment"># load the pipeline</span>
<span class="hljs-comment"># make sure you&#x27;re logged in with \`huggingface-cli login\`</span>
model_id_or_path = <span class="hljs-string">&quot;CompVis/stable-diffusion-v1-4&quot;</span>
scheduler = DDIMScheduler.from_pretrained(model_id_or_path, subfolder=<span class="hljs-string">&quot;scheduler&quot;</span>)
pipe = CycleDiffusionPipeline.from_pretrained(model_id_or_path, scheduler=scheduler).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-comment"># let&#x27;s download an initial image</span>
url = <span class="hljs-string">&quot;https://raw.githubusercontent.com/ChenWu98/cycle-diffusion/main/data/dalle2/An%20astronaut%20riding%20a%20horse.png&quot;</span>
response = requests.get(url)
init_image = Image.<span class="hljs-built_in">open</span>(BytesIO(response.content)).convert(<span class="hljs-string">&quot;RGB&quot;</span>)
init_image = init_image.resize((<span class="hljs-number">512</span>, <span class="hljs-number">512</span>))
init_image.save(<span class="hljs-string">&quot;horse.png&quot;</span>)
<span class="hljs-comment"># let&#x27;s specify a prompt</span>
source_prompt = <span class="hljs-string">&quot;An astronaut riding a horse&quot;</span>
prompt = <span class="hljs-string">&quot;An astronaut riding an elephant&quot;</span>
<span class="hljs-comment"># call the pipeline</span>
image = pipe(
prompt=prompt,
source_prompt=source_prompt,
image=init_image,
num_inference_steps=<span class="hljs-number">100</span>,
eta=<span class="hljs-number">0.1</span>,
strength=<span class="hljs-number">0.8</span>,
guidance_scale=<span class="hljs-number">2</span>,
source_guidance_scale=<span class="hljs-number">1</span>,
).images[<span class="hljs-number">0</span>]
image.save(<span class="hljs-string">&quot;horse_to_elephant.png&quot;</span>)
<span class="hljs-comment"># let&#x27;s try another example</span>
<span class="hljs-comment"># See more samples at the original repo: https://github.com/ChenWu98/cycle-diffusion</span>
url = (
<span class="hljs-string">&quot;https://raw.githubusercontent.com/ChenWu98/cycle-diffusion/main/data/dalle2/A%20black%20colored%20car.png&quot;</span>
)
response = requests.get(url)
init_image = Image.<span class="hljs-built_in">open</span>(BytesIO(response.content)).convert(<span class="hljs-string">&quot;RGB&quot;</span>)
init_image = init_image.resize((<span class="hljs-number">512</span>, <span class="hljs-number">512</span>))
init_image.save(<span class="hljs-string">&quot;black.png&quot;</span>)
source_prompt = <span class="hljs-string">&quot;A black colored car&quot;</span>
prompt = <span class="hljs-string">&quot;A blue colored car&quot;</span>
<span class="hljs-comment"># call the pipeline</span>
torch.manual_seed(<span class="hljs-number">0</span>)
image = pipe(
prompt=prompt,
source_prompt=source_prompt,
image=init_image,
num_inference_steps=<span class="hljs-number">100</span>,
eta=<span class="hljs-number">0.1</span>,
strength=<span class="hljs-number">0.85</span>,
guidance_scale=<span class="hljs-number">3</span>,
source_guidance_scale=<span class="hljs-number">1</span>,
).images[<span class="hljs-number">0</span>]
image.save(<span class="hljs-string">&quot;black_to_blue.png&quot;</span>)`}}),{c(){r=o("p"),M=m("Example:"),p=d(),J(h.$$.fragment)},l(a){r=s(a,"P",{});var b=i(r);M=u(b,"Example:"),b.forEach(t),p=f(a),C(h.$$.fragment,a)},m(a,b){g(a,r,b),n(r,M),g(a,p,b),I(h,a,b),_=!0},p:wn,i(a){_||(D(h.$$.fragment,a),_=!0)},o(a){x(h.$$.fragment,a),_=!1},d(a){a&&t(r),a&&t(p),P(h,a)}}}function Cn(q){let r,M,p,h,_,a,b,k,j,w,v,H,L,Ie,De,fe,O,xe,me,K,te,Pe,ue,B,he,Z,S,oe,V,ke,se,je,ge,y,Q,Ze,ie,Ee,Ne,X,Be,ee,Se,We,$e,T,A,Ge,ae,Le,Ve,W,Qe,$,R,Xe,re,Ae,ye,E,G,le,Y,Re,ce,Ye,_e,N,F,Fe,pe,ze,be;return a=new qe({}),B=new Mn({props:{$$slots:{default:[Un]},$$scope:{ctx:q}}}),V=new qe({}),Q=new Ce({props:{name:"class diffusers.CycleDiffusionPipeline",anchor:"diffusers.CycleDiffusionPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": CLIPTextModel"},{name:"tokenizer",val:": CLIPTokenizer"},{name:"unet",val:": UNet2DConditionModel"},{name:"scheduler",val:": DDIMScheduler"},{name:"safety_checker",val:": StableDiffusionSafetyChecker"},{name:"feature_extractor",val:": CLIPImageProcessor"},{name:"requires_safety_checker",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.CycleDiffusionPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/main/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) &#x2014;
Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.CycleDiffusionPipeline.text_encoder",description:`<strong>text_encoder</strong> (<a href="https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTextModel" rel="nofollow">CLIPTextModel</a>) &#x2014;
Frozen text-encoder (<a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a>).`,name:"text_encoder"},{anchor:"diffusers.CycleDiffusionPipeline.tokenizer",description:`<strong>tokenizer</strong> (<a href="https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a>) &#x2014;
A <code>CLIPTokenizer</code> to tokenize text.`,name:"tokenizer"},{anchor:"diffusers.CycleDiffusionPipeline.unet",description:`<strong>unet</strong> (<a href="/docs/diffusers/main/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a>) &#x2014;
A <code>UNet2DConditionModel</code> to denoise the encoded image latents.`,name:"unet"},{anchor:"diffusers.CycleDiffusionPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/main/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) &#x2014;
A scheduler to be used in combination with <code>unet</code> to denoise the encoded image latents. Can only be an
instance of <a href="/docs/diffusers/main/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>.`,name:"scheduler"},{anchor:"diffusers.CycleDiffusionPipeline.safety_checker",description:`<strong>safety_checker</strong> (<code>StableDiffusionSafetyChecker</code>) &#x2014;
Classification module that estimates whether generated images could be considered offensive or harmful.
Please refer to the <a href="https://huggingface.co/runwayml/stable-diffusion-v1-5" rel="nofollow">model card</a> for more details
about a model&#x2019;s potential harms.`,name:"safety_checker"},{anchor:"diffusers.CycleDiffusionPipeline.feature_extractor",description:`<strong>feature_extractor</strong> (<a href="https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPImageProcessor" rel="nofollow">CLIPImageProcessor</a>) &#x2014;
A <code>CLIPImageProcessor</code> to extract features from generated images; used as inputs to the <code>safety_checker</code>.`,name:"feature_extractor"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py#L125"}}),A=new Ce({props:{name:"__call__",anchor:"diffusers.CycleDiffusionPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"source_prompt",val:": typing.Union[str, typing.List[str]]"},{name:"image",val:": typing.Union[PIL.Image.Image, numpy.ndarray, torch.FloatTensor, typing.List[PIL.Image.Image], typing.List[numpy.ndarray], typing.List[torch.FloatTensor]] = None"},{name:"strength",val:": float = 0.8"},{name:"num_inference_steps",val:": typing.Optional[int] = 50"},{name:"guidance_scale",val:": typing.Optional[float] = 7.5"},{name:"source_guidance_scale",val:": typing.Optional[float] = 1"},{name:"num_images_per_prompt",val:": typing.Optional[int] = 1"},{name:"eta",val:": typing.Optional[float] = 0.1"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.FloatTensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": typing.Union[typing.Callable[[int, int, torch.FloatTensor], NoneType], NoneType] = None"},{name:"callback_steps",val:": int = 1"},{name:"cross_attention_kwargs",val:": typing.Union[typing.Dict[str, typing.Any], NoneType] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.CycleDiffusionPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>) &#x2014;
The prompt or prompts to guide the image generation.`,name:"prompt"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.image",description:`<strong>image</strong> (<code>torch.FloatTensor</code> <code>np.ndarray</code>, <code>PIL.Image.Image</code>, <code>List[torch.FloatTensor]</code>, <code>List[PIL.Image.Image]</code>, or <code>List[np.ndarray]</code>) &#x2014;
<code>Image</code> or tensor representing an image batch to be used as the starting point. Can also accept image
latents as <code>image</code>, but if passing latents directly it is not encoded again.`,name:"image"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.strength",description:`<strong>strength</strong> (<code>float</code>, <em>optional</em>, defaults to 0.8) &#x2014;
Indicates extent to transform the reference <code>image</code>. Must be between 0 and 1. <code>image</code> is used as a
starting point and more noise is added the higher the <code>strength</code>. The number of denoising steps depends
on the amount of noise initially added. When <code>strength</code> is 1, added noise is maximum and the denoising
process runs for the full number of iterations specified in <code>num_inference_steps</code>. A value of 1
essentially ignores <code>image</code>.`,name:"strength"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) &#x2014;
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
expense of slower inference. This parameter is modulated by <code>strength</code>.`,name:"num_inference_steps"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) &#x2014;
A higher guidance scale value encourages the model to generate images closely linked to the text
<code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale &gt; 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.source_guidance_scale",description:`<strong>source_guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 1) &#x2014;
Guidance scale for the source prompt. This is useful to control the amount of influence the source
prompt has for encoding.`,name:"source_guidance_scale"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) &#x2014;
The number of images to generate per prompt.`,name:"num_images_per_prompt"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) &#x2014;
Corresponds to parameter eta (&#x3B7;) from the <a href="https://arxiv.org/abs/2010.02502" rel="nofollow">DDIM</a> paper. Only applies
to the <a href="/docs/diffusers/main/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, and is ignored in other schedulers.`,name:"eta"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) &#x2014;
A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make
generation deterministic.`,name:"generator"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) &#x2014;
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
provided, text embeddings are generated from the <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) &#x2014;
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
not provided, <code>negative_prompt_embeds</code> are generated from the <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;pil&quot;</code>) &#x2014;
The output format of the generated image. Choose between <code>PIL.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether or not to return a <a href="/docs/diffusers/main/en/api/pipelines/stable_diffusion/image_variation#diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput">StableDiffusionPipelineOutput</a> instead of a
plain tuple.`,name:"return_dict"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) &#x2014;
A function that calls every <code>callback_steps</code> steps during inference. The function is called with the
following arguments: <code>callback(step: int, timestep: int, latents: torch.FloatTensor)</code>.`,name:"callback"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) &#x2014;
The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at
every step.`,name:"callback_steps"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) &#x2014;
A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined in
<a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow"><code>self.processor</code></a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.CycleDiffusionPipeline.__call__.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) &#x2014;
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py#L588",returnDescription:`
<p>If <code>return_dict</code> is <code>True</code>, <a
href="/docs/diffusers/main/en/api/pipelines/stable_diffusion/image_variation#diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput"
>StableDiffusionPipelineOutput</a> is returned,
otherwise a <code>tuple</code> is returned where the first element is a list with the generated images and the
second element is a list of <code>bool</code>s indicating whether the corresponding generated image contains
\u201Cnot-safe-for-work\u201D (nsfw) content.</p>
`,returnType:`
<p><a
href="/docs/diffusers/main/en/api/pipelines/stable_diffusion/image_variation#diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput"
>StableDiffusionPipelineOutput</a> or <code>tuple</code></p>
`}}),W=new Tn({props:{anchor:"diffusers.CycleDiffusionPipeline.__call__.example",$$slots:{default:[Jn]},$$scope:{ctx:q}}}),R=new Ce({props:{name:"encode_prompt",anchor:"diffusers.CycleDiffusionPipeline.encode_prompt",parameters:[{name:"prompt",val:""},{name:"device",val:""},{name:"num_images_per_prompt",val:""},{name:"do_classifier_free_guidance",val:""},{name:"negative_prompt",val:" = None"},{name:"prompt_embeds",val:": typing.Optional[torch.FloatTensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.FloatTensor] = None"},{name:"lora_scale",val:": typing.Optional[float] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.CycleDiffusionPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) &#x2014;
prompt to be encoded
device &#x2014; (<code>torch.device</code>):
torch device`,name:"prompt"},{anchor:"diffusers.CycleDiffusionPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) &#x2014;
number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.CycleDiffusionPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) &#x2014;
whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.CycleDiffusionPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) &#x2014;
The prompt or prompts not to guide the image generation. If not defined, one has to pass
<code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is
less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.CycleDiffusionPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) &#x2014;
Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.CycleDiffusionPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) &#x2014;
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt
weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input
argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.CycleDiffusionPipeline.encode_prompt.lora_scale",description:`<strong>lora_scale</strong> (<code>float</code>, <em>optional</em>) &#x2014;
A LoRA scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.`,name:"lora_scale"},{anchor:"diffusers.CycleDiffusionPipeline.encode_prompt.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) &#x2014;
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py#L264"}}),Y=new qe({}),F=new Ce({props:{name:"class diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput",anchor:"diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput",parameters:[{name:"images",val:": typing.Union[typing.List[PIL.Image.Image], numpy.ndarray]"},{name:"nsfw_content_detected",val:": typing.Optional[typing.List[bool]]"}],parametersDescription:[{anchor:"diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput.images",description:`<strong>images</strong> (<code>List[PIL.Image.Image]</code> or <code>np.ndarray</code>) &#x2014;
List of denoised PIL images of length <code>batch_size</code> or NumPy array of shape <code>(batch_size, height, width, num_channels)</code>.`,name:"images"},{anchor:"diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput.nsfw_content_detected",description:`<strong>nsfw_content_detected</strong> (<code>List[bool]</code>) &#x2014;
List indicating whether the corresponding generated image contains &#x201C;not-safe-for-work&#x201D; (nsfw) content or
<code>None</code> if safety checking could not be performed.`,name:"nsfw_content_detected"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_output.py#L15"}}),{c(){r=o("meta"),M=d(),p=o("h1"),h=o("a"),_=o("span"),J(a.$$.fragment),b=d(),k=o("span"),j=m("Cycle Diffusion"),w=d(),v=o("p"),H=m("Cycle Diffusion is a text guided image-to-image generation model proposed in "),L=o("a"),Ie=m("Unifying Diffusion Models\u2019 Latent Space, with Applications to CycleDiffusion and Guidance"),De=m(" by Chen Henry Wu, Fernando De la Torre."),fe=d(),O=o("p"),xe=m("The abstract from the paper is:"),me=d(),K=o("p"),te=o("em"),Pe=m("Diffusion models have achieved unprecedented performance in generative modeling. The commonly-adopted formulation of the latent code of diffusion models is a sequence of gradually denoised samples, as opposed to the simpler (e.g., Gaussian) latent space of GANs, VAEs, and normalizing flows. This paper provides an alternative, Gaussian formulation of the latent space of various diffusion models, as well as an invertible DPM-Encoder that maps images into the latent space. While our formulation is purely based on the definition of diffusion models, we demonstrate several intriguing consequences. (1) Empirically, we observe that a common latent space emerges from two diffusion models trained independently on related domains. In light of this finding, we propose CycleDiffusion, which uses DPM-Encoder for unpaired image-to-image translation. Furthermore, applying CycleDiffusion to text-to-image diffusion models, we show that large-scale text-to-image diffusion models can be used as zero-shot image-to-image editors. (2) One can guide pre-trained diffusion models and GANs by controlling the latent codes in a unified, plug-and-play formulation based on energy-based models. Using the CLIP model and a face recognition model as guidance, we demonstrate that diffusion models have better coverage of low-density sub-populations and individuals than GANs."),ue=d(),J(B.$$.fragment),he=d(),Z=o("h2"),S=o("a"),oe=o("span"),J(V.$$.fragment),ke=d(),se=o("span"),je=m("CycleDiffusionPipeline"),ge=d(),y=o("div"),J(Q.$$.fragment),Ze=d(),ie=o("p"),Ee=m("Pipeline for text-guided image to image generation using Stable Diffusion."),Ne=d(),X=o("p"),Be=m("This model inherits from "),ee=o("a"),Se=m("DiffusionPipeline"),We=m(`. Check the superclass documentation for the generic methods
implemented for all pipelines (downloading, saving, running on a particular device, etc.).`),$e=d(),T=o("div"),J(A.$$.fragment),Ge=d(),ae=o("p"),Le=m("The call function to the pipeline for generation."),Ve=d(),J(W.$$.fragment),Qe=d(),$=o("div"),J(R.$$.fragment),Xe=d(),re=o("p"),Ae=m("Encodes the prompt into text encoder hidden states."),ye=d(),E=o("h2"),G=o("a"),le=o("span"),J(Y.$$.fragment),Re=d(),ce=o("span"),Ye=m("StableDiffusionPiplineOutput"),_e=d(),N=o("div"),J(F.$$.fragment),Fe=d(),pe=o("p"),ze=m("Output class for Stable Diffusion pipelines."),this.h()},l(e){const l=_n('[data-svelte="svelte-1phssyn"]',document.head);r=s(l,"META",{name:!0,content:!0}),l.forEach(t),M=f(e),p=s(e,"H1",{class:!0});var z=i(p);h=s(z,"A",{id:!0,class:!0,href:!0});var de=i(h);_=s(de,"SPAN",{});var He=i(_);C(a.$$.fragment,He),He.forEach(t),de.forEach(t),b=f(z),k=s(z,"SPAN",{});var Oe=i(k);j=u(Oe,"Cycle Diffusion"),Oe.forEach(t),z.forEach(t),w=f(e),v=s(e,"P",{});var we=i(v);H=u(we,"Cycle Diffusion is a text guided image-to-image generation model proposed in "),L=s(we,"A",{href:!0,rel:!0});var Ke=i(L);Ie=u(Ke,"Unifying Diffusion Models\u2019 Latent Space, with Applications to CycleDiffusion and Guidance"),Ke.forEach(t),De=u(we," by Chen Henry Wu, Fernando De la Torre."),we.forEach(t),fe=f(e),O=s(e,"P",{});var en=i(O);xe=u(en,"The abstract from the paper is:"),en.forEach(t),me=f(e),K=s(e,"P",{});var nn=i(K);te=s(nn,"EM",{});var tn=i(te);Pe=u(tn,"Diffusion models have achieved unprecedented performance in generative modeling. The commonly-adopted formulation of the latent code of diffusion models is a sequence of gradually denoised samples, as opposed to the simpler (e.g., Gaussian) latent space of GANs, VAEs, and normalizing flows. This paper provides an alternative, Gaussian formulation of the latent space of various diffusion models, as well as an invertible DPM-Encoder that maps images into the latent space. While our formulation is purely based on the definition of diffusion models, we demonstrate several intriguing consequences. (1) Empirically, we observe that a common latent space emerges from two diffusion models trained independently on related domains. In light of this finding, we propose CycleDiffusion, which uses DPM-Encoder for unpaired image-to-image translation. Furthermore, applying CycleDiffusion to text-to-image diffusion models, we show that large-scale text-to-image diffusion models can be used as zero-shot image-to-image editors. (2) One can guide pre-trained diffusion models and GANs by controlling the latent codes in a unified, plug-and-play formulation based on energy-based models. Using the CLIP model and a face recognition model as guidance, we demonstrate that diffusion models have better coverage of low-density sub-populations and individuals than GANs."),tn.forEach(t),nn.forEach(t),ue=f(e),C(B.$$.fragment,e),he=f(e),Z=s(e,"H2",{class:!0});var Me=i(Z);S=s(Me,"A",{id:!0,class:!0,href:!0});var on=i(S);oe=s(on,"SPAN",{});var sn=i(oe);C(V.$$.fragment,sn),sn.forEach(t),on.forEach(t),ke=f(Me),se=s(Me,"SPAN",{});var an=i(se);je=u(an,"CycleDiffusionPipeline"),an.forEach(t),Me.forEach(t),ge=f(e),y=s(e,"DIV",{class:!0});var U=i(y);C(Q.$$.fragment,U),Ze=f(U),ie=s(U,"P",{});var rn=i(ie);Ee=u(rn,"Pipeline for text-guided image to image generation using Stable Diffusion."),rn.forEach(t),Ne=f(U),X=s(U,"P",{});var ve=i(X);Be=u(ve,"This model inherits from "),ee=s(ve,"A",{href:!0});var ln=i(ee);Se=u(ln,"DiffusionPipeline"),ln.forEach(t),We=u(ve,`. Check the superclass documentation for the generic methods
implemented for all pipelines (downloading, saving, running on a particular device, etc.).`),ve.forEach(t),$e=f(U),T=s(U,"DIV",{class:!0});var ne=i(T);C(A.$$.fragment,ne),Ge=f(ne),ae=s(ne,"P",{});var cn=i(ae);Le=u(cn,"The call function to the pipeline for generation."),cn.forEach(t),Ve=f(ne),C(W.$$.fragment,ne),ne.forEach(t),Qe=f(U),$=s(U,"DIV",{class:!0});var Te=i($);C(R.$$.fragment,Te),Xe=f(Te),re=s(Te,"P",{});var pn=i(re);Ae=u(pn,"Encodes the prompt into text encoder hidden states."),pn.forEach(t),Te.forEach(t),U.forEach(t),ye=f(e),E=s(e,"H2",{class:!0});var Ue=i(E);G=s(Ue,"A",{id:!0,class:!0,href:!0});var dn=i(G);le=s(dn,"SPAN",{});var fn=i(le);C(Y.$$.fragment,fn),fn.forEach(t),dn.forEach(t),Re=f(Ue),ce=s(Ue,"SPAN",{});var mn=i(ce);Ye=u(mn,"StableDiffusionPiplineOutput"),mn.forEach(t),Ue.forEach(t),_e=f(e),N=s(e,"DIV",{class:!0});var Je=i(N);C(F.$$.fragment,Je),Fe=f(Je),pe=s(Je,"P",{});var un=i(pe);ze=u(un,"Output class for Stable Diffusion pipelines."),un.forEach(t),Je.forEach(t),this.h()},h(){c(r,"name","hf:doc:metadata"),c(r,"content",JSON.stringify(In)),c(h,"id","cycle-diffusion"),c(h,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(h,"href","#cycle-diffusion"),c(p,"class","relative group"),c(L,"href","https://huggingface.co/papers/2210.05559"),c(L,"rel","nofollow"),c(S,"id","diffusers.CycleDiffusionPipeline"),c(S,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(S,"href","#diffusers.CycleDiffusionPipeline"),c(Z,"class","relative group"),c(ee,"href","/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline"),c(T,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),c($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),c(y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),c(G,"id","diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput"),c(G,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(G,"href","#diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput"),c(E,"class","relative group"),c(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,l){n(document.head,r),g(e,M,l),g(e,p,l),n(p,h),n(h,_),I(a,_,null),n(p,b),n(p,k),n(k,j),g(e,w,l),g(e,v,l),n(v,H),n(v,L),n(L,Ie),n(v,De),g(e,fe,l),g(e,O,l),n(O,xe),g(e,me,l),g(e,K,l),n(K,te),n(te,Pe),g(e,ue,l),I(B,e,l),g(e,he,l),g(e,Z,l),n(Z,S),n(S,oe),I(V,oe,null),n(Z,ke),n(Z,se),n(se,je),g(e,ge,l),g(e,y,l),I(Q,y,null),n(y,Ze),n(y,ie),n(ie,Ee),n(y,Ne),n(y,X),n(X,Be),n(X,ee),n(ee,Se),n(X,We),n(y,$e),n(y,T),I(A,T,null),n(T,Ge),n(T,ae),n(ae,Le),n(T,Ve),I(W,T,null),n(y,Qe),n(y,$),I(R,$,null),n($,Xe),n($,re),n(re,Ae),g(e,ye,l),g(e,E,l),n(E,G),n(G,le),I(Y,le,null),n(E,Re),n(E,ce),n(ce,Ye),g(e,_e,l),g(e,N,l),I(F,N,null),n(N,Fe),n(N,pe),n(pe,ze),be=!0},p(e,[l]){const z={};l&2&&(z.$$scope={dirty:l,ctx:e}),B.$set(z);const de={};l&2&&(de.$$scope={dirty:l,ctx:e}),W.$set(de)},i(e){be||(D(a.$$.fragment,e),D(B.$$.fragment,e),D(V.$$.fragment,e),D(Q.$$.fragment,e),D(A.$$.fragment,e),D(W.$$.fragment,e),D(R.$$.fragment,e),D(Y.$$.fragment,e),D(F.$$.fragment,e),be=!0)},o(e){x(a.$$.fragment,e),x(B.$$.fragment,e),x(V.$$.fragment,e),x(Q.$$.fragment,e),x(A.$$.fragment,e),x(W.$$.fragment,e),x(R.$$.fragment,e),x(Y.$$.fragment,e),x(F.$$.fragment,e),be=!1},d(e){t(r),e&&t(M),e&&t(p),P(a),e&&t(w),e&&t(v),e&&t(fe),e&&t(O),e&&t(me),e&&t(K),e&&t(ue),P(B,e),e&&t(he),e&&t(Z),P(V),e&&t(ge),e&&t(y),P(Q),P(A),P(W),P(R),e&&t(ye),e&&t(E),P(Y),e&&t(_e),e&&t(N),P(F)}}}const In={local:"cycle-diffusion",sections:[{local:"diffusers.CycleDiffusionPipeline",title:"CycleDiffusionPipeline"},{local:"diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput",title:"StableDiffusionPiplineOutput"}],title:"Cycle Diffusion"};function Dn(q){return bn(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Nn extends hn{constructor(r){super();gn(this,r,Dn,Cn,yn,{})}}export{Nn as default,In as metadata};

Xet Storage Details

Size:
36.8 kB
·
Xet hash:
8be27702c55323e487be5a446df3436a6853787931915aa82ab343f20652eb57

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.