Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / diffusers /pr_11660 /en /_app /immutable /nodes /165.4b2a549a.js

rtrm's picture

about 1 month ago

105 kB

	import{s as Ms,o as Zs,n as Js}from"../chunks/scheduler.8c3d61f6.js";import{S as ks,i as Us,g as a,s,r as d,m as xs,A as Vs,h as r,f as t,c as i,j as g,u as c,x as p,n as vs,k as y,y as n,a as l,v as m,d as f,t as h,w as u}from"../chunks/index.da70eac4.js";import{T as js}from"../chunks/Tip.1d9b8c37.js";import{D as J}from"../chunks/Docstring.9419aa1d.js";import{C as v}from"../chunks/CodeBlock.a9c4becf.js";import{H as M,E as Is}from"../chunks/getInferenceSnippets.39110341.js";function Cs(Gt){let b,R='Make sure to check out the Schedulers <a href="../../using-diffusers/schedulers">guide</a> to learn how to explore the tradeoff between scheduler speed and quality, and see the <a href="../../using-diffusers/loading#reuse-a-pipeline">reuse components across pipelines</a> section to learn how to efficiently load the same components into multiple pipelines.';return{c(){b=a("p"),b.innerHTML=R},l(Z){b=r(Z,"P",{"data-svelte-h":!0}),p(b)!=="svelte-1qn15hi"&&(b.innerHTML=R)},m(Z,Ke){l(Z,b,Ke)},p:Js,d(Z){Z&&t(b)}}}function Xs(Gt){let b,R,Z,Ke,Q,Pt,X,In='<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>',St,E,Cn='<a href="https://huggingface.co/papers/2303.13439" rel="nofollow">Text2Video-Zero: Text-to-Image Diffusion Models are Zero-Shot Video Generators</a> is by Levon Khachatryan, Andranik Movsisyan, Vahram Tadevosyan, Roberto Henschel, <a href="https://www.ece.utexas.edu/people/faculty/atlas-wang" rel="nofollow">Zhangyang Wang</a>, Shant Navasardyan, <a href="https://www.humphreyshi.com" rel="nofollow">Humphrey Shi</a>.',Nt,Y,Xn="Text2Video-Zero enables zero-shot video generation using either:",$t,F,Wn="<li>A textual prompt</li> <li>A prompt combined with guidance from poses or edges</li> <li>Video Instruct-Pix2Pix (instruction-guided video editing)</li>",Lt,H,Gn="Results are temporally consistent and closely follow the guidance and textual prompts.",Bt,q,Pn='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/t2v_zero_teaser.png" alt="teaser-img"/>',zt,A,Sn="The abstract from the paper is:",Dt,O,Nn=`<em>Recent text-to-video generation approaches rely on computationally heavy training and require large-scale video datasets. In this paper, we introduce a new task of zero-shot text-to-video generation and propose a low-cost approach (without any training or optimization) by leveraging the power of existing text-to-image synthesis methods (e.g., Stable Diffusion), making them suitable for the video domain.
	Our key modifications include (i) enriching the latent codes of the generated frames with motion dynamics to keep the global scene and the background time consistent; and (ii) reprogramming frame-level self-attention using a new cross-frame attention of each frame on the first frame, to preserve the context, appearance, and identity of the foreground object.
	Experiments show that this leads to low overhead, yet high-quality and remarkably consistent video generation. Moreover, our approach is not limited to text-to-video synthesis but is also applicable to other tasks such as conditional and content-specialized video generation, and Video Instruct-Pix2Pix, i.e., instruction-guided video editing.
	As experiments show, our method performs comparably or sometimes better than recent approaches, despite not being trained on additional video data.</em>`,Rt,K,$n='You can find additional information about Text2Video-Zero on the <a href="https://text2video-zero.github.io/" rel="nofollow">project page</a>, <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, and <a href="https://github.com/Picsart-AI-Research/Text2Video-Zero" rel="nofollow">original codebase</a>.',Qt,ee,Et,te,Yt,oe,Ln="To generate a video from prompt, run the following Python code:",Ft,ne,Ht,se,Bn="You can change these parameters in the pipeline call:",qt,ie,zn='<li>Motion field strength (see the <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1):<ul><li><code>motion_field_strength_x</code> and <code>motion_field_strength_y</code>. Default: <code>motion_field_strength_x=12</code>, <code>motion_field_strength_y=12</code></li></ul></li> <li><code>T</code> and <code>T'</code> (see the <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1)<ul><li><code>t0</code> and <code>t1</code> in the range <code>{0, ..., num_inference_steps}</code>. Default: <code>t0=45</code>, <code>t1=48</code></li></ul></li> <li>Video length:<ul><li><code>video_length</code>, the number of frames video_length to be generated. Default: <code>video_length=8</code></li></ul></li>',At,ae,Dn="We can also generate longer videos by doing the processing in a chunk-by-chunk manner:",Ot,re,Kt,et,W,le,Ro,tt,Rn="TextToVideoZeroSDXLPipeline",Qo,eo,pe,to,de,oo,ce,Qn="To generate a video from prompt with additional pose control",no,ot,me,nt,En="Download a demo video",Eo,fe,so,V,j,st,Yn="Read video containing extracted pose images",Yo,he,Fo,it,Fn='To extract pose from actual video, read <a href="controlnet">ControlNet documentation</a>.',Ho,ue,at,Hn="Run <code>StableDiffusionControlNetPipeline</code> with our custom attention processor",qo,ge,io,rt,I,_e,Ao,lt,qn="Since our attention processor also works with SDXL, it can be utilized to generate a video from prompt using ControlNet models powered by SDXL:",Oo,Te,ao,be,ro,ye,An='To generate a video from prompt with additional Canny edge control, follow the same steps described above for pose-guided generation using <a href="https://huggingface.co/lllyasviel/sd-controlnet-canny" rel="nofollow">Canny edge ControlNet model</a>.',lo,we,po,xe,On='To perform text-guided video editing (with <a href="pix2pix">InstructPix2Pix</a>):',co,k,ve,pt,Kn="Download a demo video",Ko,Me,en,Ze,dt,es="Read video from path",tn,Je,on,ke,ct,ts="Run <code>StableDiffusionInstructPix2PixPipeline</code> with our custom attention processor",nn,Ue,mo,Ve,fo,je,os=`Methods <strong>Text-To-Video</strong>, <strong>Text-To-Video with Pose Control</strong> and <strong>Text-To-Video with Edge Control</strong>
	can run with custom <a href="../../training/dreambooth">DreamBooth</a> models, as shown below for
	<a href="https://huggingface.co/lllyasviel/sd-controlnet-canny" rel="nofollow">Canny edge ControlNet model</a> and
	<a href="https://huggingface.co/PAIR/text2video-zero-controlnet-canny-avatar" rel="nofollow">Avatar style DreamBooth</a> model:`,ho,U,Ie,mt,ns="Download a demo video",sn,Ce,an,Xe,ft,ss="Read video from path",rn,We,ln,Ge,ht,is="Run <code>StableDiffusionControlNetPipeline</code> with custom trained DreamBooth model",pn,Pe,uo,Se,as='You can filter out some available DreamBooth-trained models with <a href="https://huggingface.co/models?search=dreambooth" rel="nofollow">this link</a>.',go,G,_o,Ne,To,_,$e,dn,ut,rs="Pipeline for zero-shot text-to-video generation using Stable Diffusion.",cn,gt,ls=`This model inherits from <a href="/docs/diffusers/pr_11660/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods
	implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,mn,P,Le,fn,_t,ps="The call function to the pipeline for generation.",hn,S,Be,un,Tt,ds="Perform backward process given list of time steps.",gn,N,ze,_n,bt,cs="Encodes the prompt into text encoder hidden states.",Tn,$,De,bn,yt,ms="Perform DDPM forward process from time t0 to t1. This is the same as adding noise with corresponding variance.",bo,Re,yo,T,Qe,yn,wt,fs="Pipeline for zero-shot text-to-video generation using Stable Diffusion XL.",wn,xt,hs=`This model inherits from <a href="/docs/diffusers/pr_11660/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods
	implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,xn,L,Ee,vn,vt,us="Function invoked when calling the pipeline for generation.",Mn,B,Ye,Zn,Mt,gs="Perform backward process given list of time steps",Jn,z,Fe,kn,Zt,_s="Encodes the prompt into text encoder hidden states.",Un,D,He,Vn,Jt,Ts="Perform DDPM forward process from time t0 to t1. This is the same as adding noise with corresponding variance.",wo,qe,xo,C,Ae,jn,kt,bs="Output class for zero-shot text-to-video pipeline.",vo,Oe,Mo,Ct,Zo;return Q=new M({props:{title:"Text2Video-Zero",local:"text2video-zero",headingTag:"h1"}}),ee=new M({props:{title:"Usage example",local:"usage-example",headingTag:"h2"}}),te=new M({props:{title:"Text-To-Video",local:"text-to-video",headingTag:"h3"}}),ne=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9aZXJvUGlwZWxpbmUlMEFpbXBvcnQlMjBpbWFnZWlvJTBBJTBBbW9kZWxfaWQlMjAlM0QlMjAlMjJzdGFibGUtZGlmZnVzaW9uLXYxLTUlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIlMEFwaXBlJTIwJTNEJTIwVGV4dFRvVmlkZW9aZXJvUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2lkJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2KS50byglMjJjdWRhJTIyKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkElMjBwYW5kYSUyMGlzJTIwcGxheWluZyUyMGd1aXRhciUyMG9uJTIwdGltZXMlMjBzcXVhcmUlMjIlMEFyZXN1bHQlMjAlM0QlMjBwaXBlKHByb21wdCUzRHByb21wdCkuaW1hZ2VzJTBBcmVzdWx0JTIwJTNEJTIwJTVCKHIlMjAqJTIwMjU1KS5hc3R5cGUoJTIydWludDglMjIpJTIwZm9yJTIwciUyMGluJTIwcmVzdWx0JTVEJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoZeroPipeline
	<span class="hljs-keyword">import</span> imageio

	model_id = <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>
	pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>)

	prompt = <span class="hljs-string">"A panda is playing guitar on times square"</span>
	result = pipe(prompt=prompt).images
	result = [(r * <span class="hljs-number">255</span>).astype(<span class="hljs-string">"uint8"</span>) <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> result]
	imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),re=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9aZXJvUGlwZWxpbmUlMEFpbXBvcnQlMjBudW1weSUyMGFzJTIwbnAlMEElMEFtb2RlbF9pZCUyMCUzRCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUwQXBpcGUlMjAlM0QlMjBUZXh0VG9WaWRlb1plcm9QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnRvKCUyMmN1ZGElMjIpJTBBc2VlZCUyMCUzRCUyMDAlMEF2aWRlb19sZW5ndGglMjAlM0QlMjAyNCUyMCUyMCUyMzI0JTIwJUMzJUI3JTIwNGZwcyUyMCUzRCUyMDYlMjBzZWNvbmRzJTBBY2h1bmtfc2l6ZSUyMCUzRCUyMDglMEFwcm9tcHQlMjAlM0QlMjAlMjJBJTIwcGFuZGElMjBpcyUyMHBsYXlpbmclMjBndWl0YXIlMjBvbiUyMHRpbWVzJTIwc3F1YXJlJTIyJTBBJTBBJTIzJTIwR2VuZXJhdGUlMjB0aGUlMjB2aWRlbyUyMGNodW5rLWJ5LWNodW5rJTBBcmVzdWx0JTIwJTNEJTIwJTVCJTVEJTBBY2h1bmtfaWRzJTIwJTNEJTIwbnAuYXJhbmdlKDAlMkMlMjB2aWRlb19sZW5ndGglMkMlMjBjaHVua19zaXplJTIwLSUyMDEpJTBBZ2VuZXJhdG9yJTIwJTNEJTIwdG9yY2guR2VuZXJhdG9yKGRldmljZSUzRCUyMmN1ZGElMjIpJTBBZm9yJTIwaSUyMGluJTIwcmFuZ2UobGVuKGNodW5rX2lkcykpJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMlByb2Nlc3NpbmclMjBjaHVuayUyMCU3QmklMjAlMkIlMjAxJTdEJTIwJTJGJTIwJTdCbGVuKGNodW5rX2lkcyklN0QlMjIpJTBBJTIwJTIwJTIwJTIwY2hfc3RhcnQlMjAlM0QlMjBjaHVua19pZHMlNUJpJTVEJTBBJTIwJTIwJTIwJTIwY2hfZW5kJTIwJTNEJTIwdmlkZW9fbGVuZ3RoJTIwaWYlMjBpJTIwJTNEJTNEJTIwbGVuKGNodW5rX2lkcyklMjAtJTIwMSUyMGVsc2UlMjBjaHVua19pZHMlNUJpJTIwJTJCJTIwMSU1RCUwQSUyMCUyMCUyMCUyMCUyMyUyMEF0dGFjaCUyMHRoZSUyMGZpcnN0JTIwZnJhbWUlMjBmb3IlMjBDcm9zcyUyMEZyYW1lJTIwQXR0ZW50aW9uJTBBJTIwJTIwJTIwJTIwZnJhbWVfaWRzJTIwJTNEJTIwJTVCMCU1RCUyMCUyQiUyMGxpc3QocmFuZ2UoY2hfc3RhcnQlMkMlMjBjaF9lbmQpKSUwQSUyMCUyMCUyMCUyMCUyMyUyMEZpeCUyMHRoZSUyMHNlZWQlMjBmb3IlMjB0aGUlMjB0ZW1wb3JhbCUyMGNvbnNpc3RlbmN5JTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yLm1hbnVhbF9zZWVkKHNlZWQpJTBBJTIwJTIwJTIwJTIwb3V0cHV0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0Rwcm9tcHQlMkMlMjB2aWRlb19sZW5ndGglM0RsZW4oZnJhbWVfaWRzKSUyQyUyMGdlbmVyYXRvciUzRGdlbmVyYXRvciUyQyUyMGZyYW1lX2lkcyUzRGZyYW1lX2lkcyklMEElMjAlMjAlMjAlMjByZXN1bHQuYXBwZW5kKG91dHB1dC5pbWFnZXMlNUIxJTNBJTVEKSUwQSUwQSUyMyUyMENvbmNhdGVuYXRlJTIwY2h1bmtzJTIwYW5kJTIwc2F2ZSUwQXJlc3VsdCUyMCUzRCUyMG5wLmNvbmNhdGVuYXRlKHJlc3VsdCklMEFyZXN1bHQlMjAlM0QlMjAlNUIociUyMColMjAyNTUpLmFzdHlwZSglMjJ1aW50OCUyMiklMjBmb3IlMjByJTIwaW4lMjByZXN1bHQlNUQlMEFpbWFnZWlvLm1pbXNhdmUoJTIydmlkZW8ubXA0JTIyJTJDJTIwcmVzdWx0JTJDJTIwZnBzJTNENCk=",highlighted:`<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoZeroPipeline
	<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np

	model_id = <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>
	pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>)
	seed = <span class="hljs-number">0</span>
	video_length = <span class="hljs-number">24</span> <span class="hljs-comment">#24 ÷ 4fps = 6 seconds</span>
	chunk_size = <span class="hljs-number">8</span>
	prompt = <span class="hljs-string">"A panda is playing guitar on times square"</span>

	<span class="hljs-comment"># Generate the video chunk-by-chunk</span>
	result = []
	chunk_ids = np.arange(<span class="hljs-number">0</span>, video_length, chunk_size - <span class="hljs-number">1</span>)
	generator = torch.Generator(device=<span class="hljs-string">"cuda"</span>)
	<span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-built_in">len</span>(chunk_ids)):
	<span class="hljs-built_in">print</span>(<span class="hljs-string">f"Processing chunk <span class="hljs-subst">{i + <span class="hljs-number">1</span>}</span> / <span class="hljs-subst">{<span class="hljs-built_in">len</span>(chunk_ids)}</span>"</span>)
	ch_start = chunk_ids[i]
	ch_end = video_length <span class="hljs-keyword">if</span> i == <span class="hljs-built_in">len</span>(chunk_ids) - <span class="hljs-number">1</span> <span class="hljs-keyword">else</span> chunk_ids[i + <span class="hljs-number">1</span>]
	<span class="hljs-comment"># Attach the first frame for Cross Frame Attention</span>
	frame_ids = [<span class="hljs-number">0</span>] + <span class="hljs-built_in">list</span>(<span class="hljs-built_in">range</span>(ch_start, ch_end))
	<span class="hljs-comment"># Fix the seed for the temporal consistency</span>
	generator.manual_seed(seed)
	output = pipe(prompt=prompt, video_length=<span class="hljs-built_in">len</span>(frame_ids), generator=generator, frame_ids=frame_ids)
	result.append(output.images[<span class="hljs-number">1</span>:])

	<span class="hljs-comment"># Concatenate chunks and save</span>
	result = np.concatenate(result)
	result = [(r * <span class="hljs-number">255</span>).astype(<span class="hljs-string">"uint8"</span>) <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> result]
	imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),le=new M({props:{title:"SDXL Support",local:"sdxl-support",headingTag:"h4"}}),pe=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9aZXJvU0RYTFBpcGVsaW5lJTBBJTBBbW9kZWxfaWQlMjAlM0QlMjAlMjJzdGFiaWxpdHlhaSUyRnN0YWJsZS1kaWZmdXNpb24teGwtYmFzZS0xLjAlMjIlMEFwaXBlJTIwJTNEJTIwVGV4dFRvVmlkZW9aZXJvU0RYTFBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9pZCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUyMHZhcmlhbnQlM0QlMjJmcDE2JTIyJTJDJTIwdXNlX3NhZmV0ZW5zb3JzJTNEVHJ1ZSUwQSkudG8oJTIyY3VkYSUyMik=",highlighted:`<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoZeroSDXLPipeline

	model_id = <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span>
	pipe = TextToVideoZeroSDXLPipeline.from_pretrained(
	model_id, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>, use_safetensors=<span class="hljs-literal">True</span>
	).to(<span class="hljs-string">"cuda"</span>)`,wrap:!1}}),de=new M({props:{title:"Text-To-Video with Pose Control",local:"text-to-video-with-pose-control",headingTag:"h3"}}),fe=new v({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQSUwQWZpbGVuYW1lJTIwJTNEJTIwJTIyX19hc3NldHNfXyUyRnBvc2VzX3NrZWxldG9uX2dpZnMlMkZkYW5jZTFfY29yci5tcDQlMjIlMEFyZXBvX2lkJTIwJTNEJTIwJTIyUEFJUiUyRlRleHQyVmlkZW8tWmVybyUyMiUwQXZpZGVvX3BhdGglMjAlM0QlMjBoZl9odWJfZG93bmxvYWQocmVwb190eXBlJTNEJTIyc3BhY2UlMjIlMkMlMjByZXBvX2lkJTNEcmVwb19pZCUyQyUyMGZpbGVuYW1lJTNEZmlsZW5hbWUp",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download

	filename = <span class="hljs-string">"__assets__/poses_skeleton_gifs/dance1_corr.mp4"</span>
	repo_id = <span class="hljs-string">"PAIR/Text2Video-Zero"</span>
	video_path = hf_hub_download(repo_type=<span class="hljs-string">"space"</span>, repo_id=repo_id, filename=filename)`,wrap:!1}}),he=new v({props:{code:"ZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBaW1wb3J0JTIwaW1hZ2VpbyUwQSUwQXJlYWRlciUyMCUzRCUyMGltYWdlaW8uZ2V0X3JlYWRlcih2aWRlb19wYXRoJTJDJTIwJTIyZmZtcGVnJTIyKSUwQWZyYW1lX2NvdW50JTIwJTNEJTIwOCUwQXBvc2VfaW1hZ2VzJTIwJTNEJTIwJTVCSW1hZ2UuZnJvbWFycmF5KHJlYWRlci5nZXRfZGF0YShpKSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShmcmFtZV9jb3VudCklNUQ=",highlighted:`<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
	<span class="hljs-keyword">import</span> imageio

	reader = imageio.get_reader(video_path, <span class="hljs-string">"ffmpeg"</span>)
	frame_count = <span class="hljs-number">8</span>
	pose_images = [Image.fromarray(reader.get_data(i)) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(frame_count)]`,wrap:!1}}),ge=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uQ29udHJvbE5ldFBpcGVsaW5lJTJDJTIwQ29udHJvbE5ldE1vZGVsJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMudGV4dF90b192aWRlb19zeW50aGVzaXMucGlwZWxpbmVfdGV4dF90b192aWRlb196ZXJvJTIwaW1wb3J0JTIwQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IlMEElMEFtb2RlbF9pZCUyMCUzRCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUwQWNvbnRyb2xuZXQlMjAlM0QlMjBDb250cm9sTmV0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMmxsbHlhc3ZpZWwlMkZzZC1jb250cm9sbmV0LW9wZW5wb3NlJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2KSUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25Db250cm9sTmV0UGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTIwY29udHJvbG5ldCUzRGNvbnRyb2xuZXQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwU2V0JTIwdGhlJTIwYXR0ZW50aW9uJTIwcHJvY2Vzc29yJTBBcGlwZS51bmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBcGlwZS5jb250cm9sbmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBJTBBJTIzJTIwZml4JTIwbGF0ZW50cyUyMGZvciUyMGFsbCUyMGZyYW1lcyUwQWxhdGVudHMlMjAlM0QlMjB0b3JjaC5yYW5kbigoMSUyQyUyMDQlMkMlMjA2NCUyQyUyMDY0KSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnJlcGVhdChsZW4ocG9zZV9pbWFnZXMpJTJDJTIwMSUyQyUyMDElMkMlMjAxKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkRhcnRoJTIwVmFkZXIlMjBkYW5jaW5nJTIwaW4lMjBhJTIwZGVzZXJ0JTIyJTBBcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0QlNUJwcm9tcHQlNUQlMjAqJTIwbGVuKHBvc2VfaW1hZ2VzKSUyQyUyMGltYWdlJTNEcG9zZV9pbWFnZXMlMkMlMjBsYXRlbnRzJTNEbGF0ZW50cykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionControlNetPipeline, ControlNetModel
	<span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor

	model_id = <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>
	controlnet = ControlNetModel.from_pretrained(<span class="hljs-string">"lllyasviel/sd-controlnet-openpose"</span>, torch_dtype=torch.float16)
	pipe = StableDiffusionControlNetPipeline.from_pretrained(
	model_id, controlnet=controlnet, torch_dtype=torch.float16
	).to(<span class="hljs-string">"cuda"</span>)

	<span class="hljs-comment"># Set the attention processor</span>
	pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>))
	pipe.controlnet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>))

	<span class="hljs-comment"># fix latents for all frames</span>
	latents = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">4</span>, <span class="hljs-number">64</span>, <span class="hljs-number">64</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16).repeat(<span class="hljs-built_in">len</span>(pose_images), <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>)

	prompt = <span class="hljs-string">"Darth Vader dancing in a desert"</span>
	result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(pose_images), image=pose_images, latents=latents).images
	imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),_e=new M({props:{title:"SDXL Support",local:"sdxl-support",headingTag:"h4"}}),Te=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uWExDb250cm9sTmV0UGlwZWxpbmUlMkMlMjBDb250cm9sTmV0TW9kZWwlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy50ZXh0X3RvX3ZpZGVvX3N5bnRoZXNpcy5waXBlbGluZV90ZXh0X3RvX3ZpZGVvX3plcm8lMjBpbXBvcnQlMjBDcm9zc0ZyYW1lQXR0blByb2Nlc3NvciUwQSUwQWNvbnRyb2xuZXRfbW9kZWxfaWQlMjAlM0QlMjAndGhpYmF1ZCUyRmNvbnRyb2xuZXQtb3BlbnBvc2Utc2R4bC0xLjAnJTBBbW9kZWxfaWQlMjAlM0QlMjAnc3RhYmlsaXR5YWklMkZzdGFibGUtZGlmZnVzaW9uLXhsLWJhc2UtMS4wJyUwQSUwQWNvbnRyb2xuZXQlMjAlM0QlMjBDb250cm9sTmV0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKGNvbnRyb2xuZXRfbW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpJTBBcGlwZSUyMCUzRCUyMFN0YWJsZURpZmZ1c2lvbkNvbnRyb2xOZXRQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTA5bW9kZWxfaWQlMkMlMjBjb250cm9sbmV0JTNEY29udHJvbG5ldCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUwQSkudG8oJ2N1ZGEnKSUwQSUwQSUyMyUyMFNldCUyMHRoZSUyMGF0dGVudGlvbiUyMHByb2Nlc3NvciUwQXBpcGUudW5ldC5zZXRfYXR0bl9wcm9jZXNzb3IoQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IoYmF0Y2hfc2l6ZSUzRDIpKSUwQXBpcGUuY29udHJvbG5ldC5zZXRfYXR0bl9wcm9jZXNzb3IoQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IoYmF0Y2hfc2l6ZSUzRDIpKSUwQSUwQSUyMyUyMGZpeCUyMGxhdGVudHMlMjBmb3IlMjBhbGwlMjBmcmFtZXMlMEFsYXRlbnRzJTIwJTNEJTIwdG9yY2gucmFuZG4oKDElMkMlMjA0JTJDJTIwMTI4JTJDJTIwMTI4KSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnJlcGVhdChsZW4ocG9zZV9pbWFnZXMpJTJDJTIwMSUyQyUyMDElMkMlMjAxKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkRhcnRoJTIwVmFkZXIlMjBkYW5jaW5nJTIwaW4lMjBhJTIwZGVzZXJ0JTIyJTBBcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0QlNUJwcm9tcHQlNUQlMjAqJTIwbGVuKHBvc2VfaW1hZ2VzKSUyQyUyMGltYWdlJTNEcG9zZV9pbWFnZXMlMkMlMjBsYXRlbnRzJTNEbGF0ZW50cykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionXLControlNetPipeline, ControlNetModel
	<span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor

	controlnet_model_id = <span class="hljs-string">'thibaud/controlnet-openpose-sdxl-1.0'</span>
	model_id = <span class="hljs-string">'stabilityai/stable-diffusion-xl-base-1.0'</span>

	controlnet = ControlNetModel.from_pretrained(controlnet_model_id, torch_dtype=torch.float16)
	pipe = StableDiffusionControlNetPipeline.from_pretrained(
	model_id, controlnet=controlnet, torch_dtype=torch.float16
	).to(<span class="hljs-string">'cuda'</span>)

	<span class="hljs-comment"># Set the attention processor</span>
	pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>))
	pipe.controlnet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>))

	<span class="hljs-comment"># fix latents for all frames</span>
	latents = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">4</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16).repeat(<span class="hljs-built_in">len</span>(pose_images), <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>)

	prompt = <span class="hljs-string">"Darth Vader dancing in a desert"</span>
	result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(pose_images), image=pose_images, latents=latents).images
	imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),be=new M({props:{title:"Text-To-Video with Edge Control",local:"text-to-video-with-edge-control",headingTag:"h3"}}),we=new M({props:{title:"Video Instruct-Pix2Pix",local:"video-instruct-pix2pix",headingTag:"h3"}}),Me=new v({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQSUwQWZpbGVuYW1lJTIwJTNEJTIwJTIyX19hc3NldHNfXyUyRnBpeDJwaXglMjB2aWRlbyUyRmNhbWVsLm1wNCUyMiUwQXJlcG9faWQlMjAlM0QlMjAlMjJQQUlSJTJGVGV4dDJWaWRlby1aZXJvJTIyJTBBdmlkZW9fcGF0aCUyMCUzRCUyMGhmX2h1Yl9kb3dubG9hZChyZXBvX3R5cGUlM0QlMjJzcGFjZSUyMiUyQyUyMHJlcG9faWQlM0RyZXBvX2lkJTJDJTIwZmlsZW5hbWUlM0RmaWxlbmFtZSk=",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download

	filename = <span class="hljs-string">"__assets__/pix2pix video/camel.mp4"</span>
	repo_id = <span class="hljs-string">"PAIR/Text2Video-Zero"</span>
	video_path = hf_hub_download(repo_type=<span class="hljs-string">"space"</span>, repo_id=repo_id, filename=filename)`,wrap:!1}}),Je=new v({props:{code:"ZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBaW1wb3J0JTIwaW1hZ2VpbyUwQSUwQXJlYWRlciUyMCUzRCUyMGltYWdlaW8uZ2V0X3JlYWRlcih2aWRlb19wYXRoJTJDJTIwJTIyZmZtcGVnJTIyKSUwQWZyYW1lX2NvdW50JTIwJTNEJTIwOCUwQXZpZGVvJTIwJTNEJTIwJTVCSW1hZ2UuZnJvbWFycmF5KHJlYWRlci5nZXRfZGF0YShpKSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShmcmFtZV9jb3VudCklNUQ=",highlighted:`<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
	<span class="hljs-keyword">import</span> imageio

	reader = imageio.get_reader(video_path, <span class="hljs-string">"ffmpeg"</span>)
	frame_count = <span class="hljs-number">8</span>
	video = [Image.fromarray(reader.get_data(i)) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(frame_count)]`,wrap:!1}}),Ue=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uSW5zdHJ1Y3RQaXgyUGl4UGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy50ZXh0X3RvX3ZpZGVvX3N5bnRoZXNpcy5waXBlbGluZV90ZXh0X3RvX3ZpZGVvX3plcm8lMjBpbXBvcnQlMjBDcm9zc0ZyYW1lQXR0blByb2Nlc3NvciUwQSUwQW1vZGVsX2lkJTIwJTNEJTIwJTIydGltYnJvb2tzJTJGaW5zdHJ1Y3QtcGl4MnBpeCUyMiUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25JbnN0cnVjdFBpeDJQaXhQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnRvKCUyMmN1ZGElMjIpJTBBcGlwZS51bmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMykpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIybWFrZSUyMGl0JTIwVmFuJTIwR29naCUyMFN0YXJyeSUyME5pZ2h0JTIwc3R5bGUlMjIlMEFyZXN1bHQlMjAlM0QlMjBwaXBlKHByb21wdCUzRCU1QnByb21wdCU1RCUyMColMjBsZW4odmlkZW8pJTJDJTIwaW1hZ2UlM0R2aWRlbykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMmVkaXRlZF92aWRlby5tcDQlMjIlMkMlMjByZXN1bHQlMkMlMjBmcHMlM0Q0KQ==",highlighted:`<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionInstructPix2PixPipeline
	<span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor

	model_id = <span class="hljs-string">"timbrooks/instruct-pix2pix"</span>
	pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>)
	pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">3</span>))

	prompt = <span class="hljs-string">"make it Van Gogh Starry Night style"</span>
	result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(video), image=video).images
	imageio.mimsave(<span class="hljs-string">"edited_video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),Ve=new M({props:{title:"DreamBooth specialization",local:"dreambooth-specialization",headingTag:"h3"}}),Ce=new v({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQSUwQWZpbGVuYW1lJTIwJTNEJTIwJTIyX19hc3NldHNfXyUyRmNhbm55X3ZpZGVvc19tcDQlMkZnaXJsX3R1cm5pbmcubXA0JTIyJTBBcmVwb19pZCUyMCUzRCUyMCUyMlBBSVIlMkZUZXh0MlZpZGVvLVplcm8lMjIlMEF2aWRlb19wYXRoJTIwJTNEJTIwaGZfaHViX2Rvd25sb2FkKHJlcG9fdHlwZSUzRCUyMnNwYWNlJTIyJTJDJTIwcmVwb19pZCUzRHJlcG9faWQlMkMlMjBmaWxlbmFtZSUzRGZpbGVuYW1lKQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download

	filename = <span class="hljs-string">"__assets__/canny_videos_mp4/girl_turning.mp4"</span>
	repo_id = <span class="hljs-string">"PAIR/Text2Video-Zero"</span>
	video_path = hf_hub_download(repo_type=<span class="hljs-string">"space"</span>, repo_id=repo_id, filename=filename)`,wrap:!1}}),We=new v({props:{code:"ZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBaW1wb3J0JTIwaW1hZ2VpbyUwQSUwQXJlYWRlciUyMCUzRCUyMGltYWdlaW8uZ2V0X3JlYWRlcih2aWRlb19wYXRoJTJDJTIwJTIyZmZtcGVnJTIyKSUwQWZyYW1lX2NvdW50JTIwJTNEJTIwOCUwQWNhbm55X2VkZ2VzJTIwJTNEJTIwJTVCSW1hZ2UuZnJvbWFycmF5KHJlYWRlci5nZXRfZGF0YShpKSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShmcmFtZV9jb3VudCklNUQ=",highlighted:`<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
	<span class="hljs-keyword">import</span> imageio

	reader = imageio.get_reader(video_path, <span class="hljs-string">"ffmpeg"</span>)
	frame_count = <span class="hljs-number">8</span>
	canny_edges = [Image.fromarray(reader.get_data(i)) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(frame_count)]`,wrap:!1}}),Pe=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uQ29udHJvbE5ldFBpcGVsaW5lJTJDJTIwQ29udHJvbE5ldE1vZGVsJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMudGV4dF90b192aWRlb19zeW50aGVzaXMucGlwZWxpbmVfdGV4dF90b192aWRlb196ZXJvJTIwaW1wb3J0JTIwQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IlMEElMEElMjMlMjBzZXQlMjBtb2RlbCUyMGlkJTIwdG8lMjBjdXN0b20lMjBtb2RlbCUwQW1vZGVsX2lkJTIwJTNEJTIwJTIyUEFJUiUyRnRleHQydmlkZW8temVyby1jb250cm9sbmV0LWNhbm55LWF2YXRhciUyMiUwQWNvbnRyb2xuZXQlMjAlM0QlMjBDb250cm9sTmV0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMmxsbHlhc3ZpZWwlMkZzZC1jb250cm9sbmV0LWNhbm55JTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2KSUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25Db250cm9sTmV0UGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTIwY29udHJvbG5ldCUzRGNvbnRyb2xuZXQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwU2V0JTIwdGhlJTIwYXR0ZW50aW9uJTIwcHJvY2Vzc29yJTBBcGlwZS51bmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBcGlwZS5jb250cm9sbmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBJTBBJTIzJTIwZml4JTIwbGF0ZW50cyUyMGZvciUyMGFsbCUyMGZyYW1lcyUwQWxhdGVudHMlMjAlM0QlMjB0b3JjaC5yYW5kbigoMSUyQyUyMDQlMkMlMjA2NCUyQyUyMDY0KSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnJlcGVhdChsZW4oY2FubnlfZWRnZXMpJTJDJTIwMSUyQyUyMDElMkMlMjAxKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMm9pbCUyMHBhaW50aW5nJTIwb2YlMjBhJTIwYmVhdXRpZnVsJTIwZ2lybCUyMGF2YXRhciUyMHN0eWxlJTIyJTBBcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0QlNUJwcm9tcHQlNUQlMjAqJTIwbGVuKGNhbm55X2VkZ2VzKSUyQyUyMGltYWdlJTNEY2FubnlfZWRnZXMlMkMlMjBsYXRlbnRzJTNEbGF0ZW50cykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionControlNetPipeline, ControlNetModel
	<span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor

	<span class="hljs-comment"># set model id to custom model</span>
	model_id = <span class="hljs-string">"PAIR/text2video-zero-controlnet-canny-avatar"</span>
	controlnet = ControlNetModel.from_pretrained(<span class="hljs-string">"lllyasviel/sd-controlnet-canny"</span>, torch_dtype=torch.float16)
	pipe = StableDiffusionControlNetPipeline.from_pretrained(
	model_id, controlnet=controlnet, torch_dtype=torch.float16
	).to(<span class="hljs-string">"cuda"</span>)

	<span class="hljs-comment"># Set the attention processor</span>
	pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>))
	pipe.controlnet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>))

	<span class="hljs-comment"># fix latents for all frames</span>
	latents = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">4</span>, <span class="hljs-number">64</span>, <span class="hljs-number">64</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16).repeat(<span class="hljs-built_in">len</span>(canny_edges), <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>)

	prompt = <span class="hljs-string">"oil painting of a beautiful girl avatar style"</span>
	result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(canny_edges), image=canny_edges, latents=latents).images
	imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),G=new js({props:{$$slots:{default:[Cs]},$$scope:{ctx:Gt}}}),Ne=new M({props:{title:"TextToVideoZeroPipeline",local:"diffusers.TextToVideoZeroPipeline",headingTag:"h2"}}),$e=new J({props:{name:"class diffusers.TextToVideoZeroPipeline",anchor:"diffusers.TextToVideoZeroPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": CLIPTextModel"},{name:"tokenizer",val:": CLIPTokenizer"},{name:"unet",val:": UNet2DConditionModel"},{name:"scheduler",val:": KarrasDiffusionSchedulers"},{name:"safety_checker",val:": StableDiffusionSafetyChecker"},{name:"feature_extractor",val:": CLIPImageProcessor"},{name:"requires_safety_checker",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_11660/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) —
	Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.TextToVideoZeroPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>CLIPTextModel</code>) —
	Frozen text-encoder (<a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a>).`,name:"text_encoder"},{anchor:"diffusers.TextToVideoZeroPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) —
	A <a href="https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a> to tokenize text.`,name:"tokenizer"},{anchor:"diffusers.TextToVideoZeroPipeline.unet",description:`<strong>unet</strong> (<a href="/docs/diffusers/pr_11660/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a>) —
	A <a href="/docs/diffusers/pr_11660/en/api/models/unet3d-cond#diffusers.UNet3DConditionModel">UNet3DConditionModel</a> to denoise the encoded video latents.`,name:"unet"},{anchor:"diffusers.TextToVideoZeroPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_11660/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) —
	A scheduler to be used in combination with <code>unet</code> to denoise the encoded image latents. Can be one of
	<a href="/docs/diffusers/pr_11660/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, <a href="/docs/diffusers/pr_11660/en/api/schedulers/lms_discrete#diffusers.LMSDiscreteScheduler">LMSDiscreteScheduler</a>, or <a href="/docs/diffusers/pr_11660/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a>.`,name:"scheduler"},{anchor:"diffusers.TextToVideoZeroPipeline.safety_checker",description:`<strong>safety_checker</strong> (<code>StableDiffusionSafetyChecker</code>) —
	Classification module that estimates whether generated images could be considered offensive or harmful.
	Please refer to the <a href="https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5" rel="nofollow">model card</a> for
	more details about a model’s potential harms.`,name:"safety_checker"},{anchor:"diffusers.TextToVideoZeroPipeline.feature_extractor",description:`<strong>feature_extractor</strong> (<code>CLIPImageProcessor</code>) —
	A <code>CLIPImageProcessor</code> to extract features from generated images; used as inputs to the <code>safety_checker</code>.`,name:"feature_extractor"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L298"}}),Le=new J({props:{name:"__call__",anchor:"diffusers.TextToVideoZeroPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"video_length",val:": typing.Optional[int] = 8"},{name:"height",val:": typing.Optional[int] = None"},{name:"width",val:": typing.Optional[int] = None"},{name:"num_inference_steps",val:": int = 50"},{name:"guidance_scale",val:": float = 7.5"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"motion_field_strength_x",val:": float = 12"},{name:"motion_field_strength_y",val:": float = 12"},{name:"output_type",val:": typing.Optional[str] = 'tensor'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": typing.Optional[typing.Callable[[int, int, torch.Tensor], NoneType]] = None"},{name:"callback_steps",val:": typing.Optional[int] = 1"},{name:"t0",val:": int = 44"},{name:"t1",val:": int = 47"},{name:"frame_ids",val:": typing.Optional[typing.List[int]] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	The prompt or prompts to guide image generation. If not defined, you need to pass <code>prompt_embeds</code>.`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.video_length",description:`<strong>video_length</strong> (<code>int</code>, <em>optional</em>, defaults to 8) —
	The number of generated video frames.`,name:"video_length"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>self.unet.config.sample_size * self.vae_scale_factor</code>) —
	The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>self.unet.config.sample_size * self.vae_scale_factor</code>) —
	The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) —
	The number of denoising steps. More denoising steps usually lead to a higher quality image at the
	expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) —
	A higher guidance scale value encourages the model to generate images closely linked to the text
	<code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	The prompt or prompts to guide what to not include in video generation. If not defined, you need to
	pass <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (<code>guidance_scale < 1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) —
	Corresponds to parameter eta (η) from the <a href="https://huggingface.co/papers/2010.02502" rel="nofollow">DDIM</a> paper. Only
	applies to the <a href="/docs/diffusers/pr_11660/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, and is ignored in other schedulers.`,name:"eta"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) —
	A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make
	generation deterministic.`,name:"generator"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video
	generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
	tensor is generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"np"</code>) —
	The output format of the generated video. Choose between <code>"latent"</code> and <code>"np"</code>.`,name:"output_type"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether or not to return a
	<a href="/docs/diffusers/pr_11660/en/api/pipelines/text_to_video_zero#diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput">TextToVideoPipelineOutput</a> instead of
	a plain tuple.`,name:"return_dict"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) —
	A function that calls every <code>callback_steps</code> steps during inference. The function is called with the
	following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at
	every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.motion_field_strength_x",description:`<strong>motion_field_strength_x</strong> (<code>float</code>, <em>optional</em>, defaults to 12) —
	Strength of motion in generated video along x-axis. See the
	<a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"motion_field_strength_x"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.motion_field_strength_y",description:`<strong>motion_field_strength_y</strong> (<code>float</code>, <em>optional</em>, defaults to 12) —
	Strength of motion in generated video along y-axis. See the
	<a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"motion_field_strength_y"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.t0",description:`<strong>t0</strong> (<code>int</code>, <em>optional</em>, defaults to 44) —
	Timestep t0. Should be in the range [0, num_inference_steps - 1]. See the
	<a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.t1",description:`<strong>t1</strong> (<code>int</code>, <em>optional</em>, defaults to 47) —
	Timestep t0. Should be in the range [t0 + 1, num_inference_steps - 1]. See the
	<a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t1"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.frame_ids",description:`<strong>frame_ids</strong> (<code>List[int]</code>, <em>optional</em>) —
	Indexes of the frames that are being generated. This is used when generating longer videos
	chunk-by-chunk.`,name:"frame_ids"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L543",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>The output contains a <code>ndarray</code> of the generated video, when <code>output_type</code> != <code>"latent"</code>, otherwise a
	latent code of generated videos and a list of <code>bool</code>s indicating whether the corresponding generated
	video contains “not-safe-for-work” (nsfw) content..</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><a
	href="/docs/diffusers/pr_11660/en/api/pipelines/text_to_video_zero#diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput"
	>TextToVideoPipelineOutput</a></p>
	`}}),Be=new J({props:{name:"backward_loop",anchor:"diffusers.TextToVideoZeroPipeline.backward_loop",parameters:[{name:"latents",val:""},{name:"timesteps",val:""},{name:"prompt_embeds",val:""},{name:"guidance_scale",val:""},{name:"callback",val:""},{name:"callback_steps",val:""},{name:"num_warmup_steps",val:""},{name:"extra_step_kwargs",val:""},{name:"cross_attention_kwargs",val:" = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.latents",description:`<strong>latents</strong> —
	Latents at time timesteps[0].`,name:"latents"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.timesteps",description:`<strong>timesteps</strong> —
	Time steps along which to perform backward process.`,name:"timesteps"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.prompt_embeds",description:`<strong>prompt_embeds</strong> —
	Pre-generated text embeddings.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.guidance_scale",description:`<strong>guidance_scale</strong> —
	A higher guidance scale value encourages the model to generate images closely linked to the text
	<code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) —
	A function that calls every <code>callback_steps</code> steps during inference. The function is called with the
	following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at
	every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.extra_step_kwargs",description:`<strong>extra_step_kwargs</strong> —
	Extra_step_kwargs.`,name:"extra_step_kwargs"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> —
	A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined in
	<a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow"><code>self.processor</code></a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.num_warmup_steps",description:`<strong>num_warmup_steps</strong> —
	number of warmup steps.`,name:"num_warmup_steps"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L388",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>Latents of backward process output at time timesteps[-1].</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>latents</p>
	`}}),ze=new J({props:{name:"encode_prompt",anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt",parameters:[{name:"prompt",val:""},{name:"device",val:""},{name:"num_images_per_prompt",val:""},{name:"do_classifier_free_guidance",val:""},{name:"negative_prompt",val:" = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"lora_scale",val:": typing.Optional[float] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	prompt to be encoded`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>):
	torch device`,name:"device"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) —
	number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) —
	whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	The prompt or prompts not to guide the image generation. If not defined, one has to pass
	<code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is
	less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
	provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt
	weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input
	argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.lora_scale",description:`<strong>lora_scale</strong> (<code>float</code>, <em>optional</em>) —
	A LoRA scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.`,name:"lora_scale"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) —
	Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
	the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L815"}}),De=new J({props:{name:"forward_loop",anchor:"diffusers.TextToVideoZeroPipeline.forward_loop",parameters:[{name:"x_t0",val:""},{name:"t0",val:""},{name:"t1",val:""},{name:"generator",val:""}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.x_t0",description:`<strong>x_t0</strong> —
	Latent code at time t0.`,name:"x_t0"},{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.t0",description:`<strong>t0</strong> —
	Timestep at t0.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.t1",description:`<strong>t1</strong> —
	Timestamp at t1.`,name:"t1"},{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) —
	A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make
	generation deterministic.`,name:"generator"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L364",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>Forward process applied to x_t0 from time t0 to t1.</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>x_t1</p>
	`}}),Re=new M({props:{title:"TextToVideoZeroSDXLPipeline",local:"diffusers.TextToVideoZeroSDXLPipeline",headingTag:"h2"}}),Qe=new J({props:{name:"class diffusers.TextToVideoZeroSDXLPipeline",anchor:"diffusers.TextToVideoZeroSDXLPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": CLIPTextModel"},{name:"text_encoder_2",val:": CLIPTextModelWithProjection"},{name:"tokenizer",val:": CLIPTokenizer"},{name:"tokenizer_2",val:": CLIPTokenizer"},{name:"unet",val:": UNet2DConditionModel"},{name:"scheduler",val:": KarrasDiffusionSchedulers"},{name:"image_encoder",val:": CLIPVisionModelWithProjection = None"},{name:"feature_extractor",val:": CLIPImageProcessor = None"},{name:"force_zeros_for_empty_prompt",val:": bool = True"},{name:"add_watermarker",val:": typing.Optional[bool] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_11660/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) —
	Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>CLIPTextModel</code>) —
	Frozen text-encoder. Stable Diffusion XL uses the text portion of
	<a href="https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel" rel="nofollow">CLIP</a>, specifically
	the <a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a> variant.`,name:"text_encoder"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.text_encoder_2",description:`<strong>text_encoder_2</strong> (<code> CLIPTextModelWithProjection</code>) —
	Second frozen text-encoder. Stable Diffusion XL uses the text and pool portion of
	<a href="https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection" rel="nofollow">CLIP</a>,
	specifically the
	<a href="https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k" rel="nofollow">laion/CLIP-ViT-bigG-14-laion2B-39B-b160k</a>
	variant.`,name:"text_encoder_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) —
	Tokenizer of class
	<a href="https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a>.`,name:"tokenizer"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.tokenizer_2",description:`<strong>tokenizer_2</strong> (<code>CLIPTokenizer</code>) —
	Second Tokenizer of class
	<a href="https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a>.`,name:"tokenizer_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.unet",description:'<strong>unet</strong> (<a href="/docs/diffusers/pr_11660/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a>) — Conditional U-Net architecture to denoise the encoded image latents.',name:"unet"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_11660/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) —
	A scheduler to be used in combination with <code>unet</code> to denoise the encoded image latents. Can be one of
	<a href="/docs/diffusers/pr_11660/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, <a href="/docs/diffusers/pr_11660/en/api/schedulers/lms_discrete#diffusers.LMSDiscreteScheduler">LMSDiscreteScheduler</a>, or <a href="/docs/diffusers/pr_11660/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a>.`,name:"scheduler"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L348"}}),Ee=new J({props:{name:"__call__",anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"prompt_2",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"video_length",val:": typing.Optional[int] = 8"},{name:"height",val:": typing.Optional[int] = None"},{name:"width",val:": typing.Optional[int] = None"},{name:"num_inference_steps",val:": int = 50"},{name:"denoising_end",val:": typing.Optional[float] = None"},{name:"guidance_scale",val:": float = 7.5"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"negative_prompt_2",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"frame_ids",val:": typing.Optional[typing.List[int]] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"motion_field_strength_x",val:": float = 12"},{name:"motion_field_strength_y",val:": float = 12"},{name:"output_type",val:": typing.Optional[str] = 'tensor'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": typing.Optional[typing.Callable[[int, int, torch.Tensor], NoneType]] = None"},{name:"callback_steps",val:": int = 1"},{name:"cross_attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"guidance_rescale",val:": float = 0.0"},{name:"original_size",val:": typing.Optional[typing.Tuple[int, int]] = None"},{name:"crops_coords_top_left",val:": typing.Tuple[int, int] = (0, 0)"},{name:"target_size",val:": typing.Optional[typing.Tuple[int, int]] = None"},{name:"t0",val:": int = 44"},{name:"t1",val:": int = 47"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>.
	instead.`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.prompt_2",description:`<strong>prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	The prompt or prompts to be sent to the <code>tokenizer_2</code> and <code>text_encoder_2</code>. If not defined, <code>prompt</code> is
	used in both text-encoders`,name:"prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.video_length",description:`<strong>video_length</strong> (<code>int</code>, <em>optional</em>, defaults to 8) —
	The number of generated video frames.`,name:"video_length"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size * self.vae_scale_factor) —
	The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size * self.vae_scale_factor) —
	The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) —
	The number of denoising steps. More denoising steps usually lead to a higher quality image at the
	expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.denoising_end",description:`<strong>denoising_end</strong> (<code>float</code>, <em>optional</em>) —
	When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be
	completed before it is intentionally prematurely terminated. As a result, the returned sample will
	still retain a substantial amount of noise as determined by the discrete timesteps selected by the
	scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a
	“Mixture of Denoisers” multi-pipeline setup, as elaborated in <a href="https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output" rel="nofollow"><strong>Refining the Image
	Output</strong></a>`,name:"denoising_end"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) —
	Guidance scale as defined in <a href="https://huggingface.co/papers/2207.12598" rel="nofollow">Classifier-Free Diffusion
	Guidance</a>. <code>guidance_scale</code> is defined as <code>w</code> of equation 2.
	of <a href="https://huggingface.co/papers/2205.11487" rel="nofollow">Imagen Paper</a>. Guidance scale is enabled by setting
	<code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to
	the text <code>prompt</code>, usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	The prompt or prompts not to guide the image generation. If not defined, one has to pass
	<code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is
	less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_prompt_2",description:`<strong>negative_prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	The prompt or prompts not to guide the image generation to be sent to <code>tokenizer_2</code> and
	<code>text_encoder_2</code>. If not defined, <code>negative_prompt</code> is used in both text-encoders`,name:"negative_prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) —
	Corresponds to parameter eta (η) in the DDIM paper: <a href="https://huggingface.co/papers/2010.02502" rel="nofollow">https://huggingface.co/papers/2010.02502</a>. Only
	applies to <a href="/docs/diffusers/pr_11660/en/api/schedulers/ddim#diffusers.DDIMScheduler">schedulers.DDIMScheduler</a>, will be ignored for others.`,name:"eta"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) —
	One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a>
	to make generation deterministic.`,name:"generator"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.frame_ids",description:`<strong>frame_ids</strong> (<code>List[int]</code>, <em>optional</em>) —
	Indexes of the frames that are being generated. This is used when generating longer videos
	chunk-by-chunk.`,name:"frame_ids"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
	provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt
	weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input
	argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.pooled_prompt_embeds",description:`<strong>pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting.
	If not provided, pooled text embeddings will be generated from <code>prompt</code> input argument.`,name:"pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_pooled_prompt_embeds",description:`<strong>negative_pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt
	weighting. If not provided, pooled negative_prompt_embeds will be generated from <code>negative_prompt</code>
	input argument.`,name:"negative_pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
	generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
	tensor will ge generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.motion_field_strength_x",description:`<strong>motion_field_strength_x</strong> (<code>float</code>, <em>optional</em>, defaults to 12) —
	Strength of motion in generated video along x-axis. See the
	<a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"motion_field_strength_x"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.motion_field_strength_y",description:`<strong>motion_field_strength_y</strong> (<code>float</code>, <em>optional</em>, defaults to 12) —
	Strength of motion in generated video along y-axis. See the
	<a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"motion_field_strength_y"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) —
	The output format of the generate image. Choose between
	<a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether or not to return a <code>~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput</code> instead
	of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) —
	A function that will be called every <code>callback_steps</code> steps during inference. The function will be
	called with the following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	The frequency at which the <code>callback</code> function will be called. If not specified, the callback will be
	called at every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) —
	A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under
	<code>self.processor</code> in
	<a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py" rel="nofollow">diffusers.cross_attention</a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.guidance_rescale",description:`<strong>guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to 0.7) —
	Guidance rescale factor proposed by <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are
	Flawed</a> <code>guidance_scale</code> is defined as <code>φ</code> in equation 16. of
	<a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are
	Flawed</a>. Guidance rescale factor should fix overexposure when
	using zero terminal SNR.`,name:"guidance_rescale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.original_size",description:`<strong>original_size</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to (1024, 1024)) —
	If <code>original_size</code> is not the same as <code>target_size</code> the image will appear to be down- or upsampled.
	<code>original_size</code> defaults to <code>(width, height)</code> if not specified. Part of SDXL’s micro-conditioning as
	explained in section 2.2 of
	<a href="https://huggingface.co/papers/2307.01952" rel="nofollow">https://huggingface.co/papers/2307.01952</a>.`,name:"original_size"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.crops_coords_top_left",description:`<strong>crops_coords_top_left</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to (0, 0)) —
	<code>crops_coords_top_left</code> can be used to generate an image that appears to be “cropped” from the position
	<code>crops_coords_top_left</code> downwards. Favorable, well-centered images are usually achieved by setting
	<code>crops_coords_top_left</code> to (0, 0). Part of SDXL’s micro-conditioning as explained in section 2.2 of
	<a href="https://huggingface.co/papers/2307.01952" rel="nofollow">https://huggingface.co/papers/2307.01952</a>.`,name:"crops_coords_top_left"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.target_size",description:`<strong>target_size</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to (1024, 1024)) —
	For most cases, <code>target_size</code> should be set to the desired height and width of the generated image. If
	not specified it will default to <code>(width, height)</code>. Part of SDXL’s micro-conditioning as explained in
	section 2.2 of <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">https://huggingface.co/papers/2307.01952</a>.`,name:"target_size"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.t0",description:`<strong>t0</strong> (<code>int</code>, <em>optional</em>, defaults to 44) —
	Timestep t0. Should be in the range [0, num_inference_steps - 1]. See the
	<a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.t1",description:`<strong>t1</strong> (<code>int</code>, <em>optional</em>, defaults to 47) —
	Timestep t0. Should be in the range [t0 + 1, num_inference_steps - 1]. See the
	<a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t1"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L949",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>~pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoSDXLPipelineOutput</code> or
	<code>tuple</code>: <code>~pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoSDXLPipelineOutput</code>
	if <code>return_dict</code> is True, otherwise a <code>tuple</code>. When returning a tuple, the first element is a list with the
	generated images.</p>
	`}}),Ye=new J({props:{name:"backward_loop",anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop",parameters:[{name:"latents",val:""},{name:"timesteps",val:""},{name:"prompt_embeds",val:""},{name:"guidance_scale",val:""},{name:"callback",val:""},{name:"callback_steps",val:""},{name:"num_warmup_steps",val:""},{name:"extra_step_kwargs",val:""},{name:"add_text_embeds",val:""},{name:"add_time_ids",val:""},{name:"cross_attention_kwargs",val:" = None"},{name:"guidance_rescale",val:": float = 0.0"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.latents",description:`<strong>latents</strong> —
	Latents at time timesteps[0].`,name:"latents"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.timesteps",description:`<strong>timesteps</strong> —
	Time steps along which to perform backward process.`,name:"timesteps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.prompt_embeds",description:`<strong>prompt_embeds</strong> —
	Pre-generated text embeddings.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.guidance_scale",description:`<strong>guidance_scale</strong> —
	A higher guidance scale value encourages the model to generate images closely linked to the text
	<code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) —
	A function that calls every <code>callback_steps</code> steps during inference. The function is called with the
	following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at
	every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.extra_step_kwargs",description:`<strong>extra_step_kwargs</strong> —
	Extra_step_kwargs.`,name:"extra_step_kwargs"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> —
	A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined in
	<a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow"><code>self.processor</code></a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.num_warmup_steps",description:`<strong>num_warmup_steps</strong> —
	number of warmup steps.`,name:"num_warmup_steps"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L860",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>latents of backward process output at time timesteps[-1]</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>latents</p>
	`}}),Fe=new J({props:{name:"encode_prompt",anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt",parameters:[{name:"prompt",val:": str"},{name:"prompt_2",val:": typing.Optional[str] = None"},{name:"device",val:": typing.Optional[torch.device] = None"},{name:"num_images_per_prompt",val:": int = 1"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"negative_prompt",val:": typing.Optional[str] = None"},{name:"negative_prompt_2",val:": typing.Optional[str] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"lora_scale",val:": typing.Optional[float] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	prompt to be encoded`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.prompt_2",description:`<strong>prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	The prompt or prompts to be sent to the <code>tokenizer_2</code> and <code>text_encoder_2</code>. If not defined, <code>prompt</code> is
	used in both text-encoders`,name:"prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>):
	torch device`,name:"device"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) —
	number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) —
	whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	The prompt or prompts not to guide the image generation. If not defined, one has to pass
	<code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is
	less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_prompt_2",description:`<strong>negative_prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	The prompt or prompts not to guide the image generation to be sent to <code>tokenizer_2</code> and
	<code>text_encoder_2</code>. If not defined, <code>negative_prompt</code> is used in both text-encoders`,name:"negative_prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
	provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt
	weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input
	argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.pooled_prompt_embeds",description:`<strong>pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting.
	If not provided, pooled text embeddings will be generated from <code>prompt</code> input argument.`,name:"pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_pooled_prompt_embeds",description:`<strong>negative_pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt
	weighting. If not provided, pooled negative_prompt_embeds will be generated from <code>negative_prompt</code>
	input argument.`,name:"negative_pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.lora_scale",description:`<strong>lora_scale</strong> (<code>float</code>, <em>optional</em>) —
	A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.`,name:"lora_scale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) —
	Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
	the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L597"}}),He=new J({props:{name:"forward_loop",anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop",parameters:[{name:"x_t0",val:""},{name:"t0",val:""},{name:"t1",val:""},{name:"generator",val:""}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.x_t0",description:`<strong>x_t0</strong> —
	Latent code at time t0.`,name:"x_t0"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.t0",description:`<strong>t0</strong> —
	Timestep at t0.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.t1",description:`<strong>t1</strong> —
	Timestamp at t1.`,name:"t1"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) —
	A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make
	generation deterministic.`,name:"generator"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L836",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>Forward process applied to x_t0 from time t0 to t1.</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>x_t1</p>
	`}}),qe=new M({props:{title:"TextToVideoPipelineOutput",local:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput",headingTag:"h2"}}),Ae=new J({props:{name:"class diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput",anchor:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput",parameters:[{name:"images",val:": typing.Union[typing.List[PIL.Image.Image], numpy.ndarray]"},{name:"nsfw_content_detected",val:": typing.Optional[typing.List[bool]]"}],parametersDescription:[{anchor:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput.images",description:`<strong>images</strong> (<code>[List[PIL.Image.Image]</code>, <code>np.ndarray</code>]) —
	List of denoised PIL images of length <code>batch_size</code> or NumPy array of shape <code>(batch_size, height, width, num_channels)</code>.`,name:"images"},{anchor:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput.nsfw_content_detected",description:`<strong>nsfw_content_detected</strong> (<code>[List[bool]]</code>) —
	List indicating whether the corresponding generated image contains “not-safe-for-work” (nsfw) content or
	<code>None</code> if safety checking could not be performed.`,name:"nsfw_content_detected"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L196"}}),Oe=new Is({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/text_to_video_zero.md"}}),{c(){b=a("meta"),R=s(),Z=a("p"),Ke=s(),d(Q.$$.fragment),Pt=s(),X=a("div"),X.innerHTML=In,St=s(),E=a("p"),E.innerHTML=Cn,Nt=s(),Y=a("p"),Y.textContent=Xn,$t=s(),F=a("ol"),F.innerHTML=Wn,Lt=s(),H=a("p"),H.textContent=Gn,Bt=s(),q=a("p"),q.innerHTML=Pn,zt=s(),A=a("p"),A.textContent=Sn,Dt=s(),O=a("p"),O.innerHTML=Nn,Rt=s(),K=a("p"),K.innerHTML=$n,Qt=s(),d(ee.$$.fragment),Et=s(),d(te.$$.fragment),Yt=s(),oe=a("p"),oe.textContent=Ln,Ft=s(),d(ne.$$.fragment),Ht=s(),se=a("p"),se.textContent=Bn,qt=s(),ie=a("ul"),ie.innerHTML=zn,At=s(),ae=a("p"),ae.textContent=Dn,Ot=s(),d(re.$$.fragment),Kt=s(),et=a("ul"),W=a("li"),d(le.$$.fragment),Ro=xs("In order to use the SDXL model when generating a video from prompt, use the "),tt=a("code"),tt.textContent=Rn,Qo=xs(" pipeline:"),eo=s(),d(pe.$$.fragment),to=s(),d(de.$$.fragment),oo=s(),ce=a("p"),ce.textContent=Qn,no=s(),ot=a("ol"),me=a("li"),nt=a("p"),nt.textContent=En,Eo=s(),d(fe.$$.fragment),so=s(),V=a("ol"),j=a("li"),st=a("p"),st.textContent=Yn,Yo=s(),d(he.$$.fragment),Fo=s(),it=a("p"),it.innerHTML=Fn,Ho=s(),ue=a("li"),at=a("p"),at.innerHTML=Hn,qo=s(),d(ge.$$.fragment),io=s(),rt=a("ul"),I=a("li"),d(_e.$$.fragment),Ao=s(),lt=a("p"),lt.textContent=qn,Oo=s(),d(Te.$$.fragment),ao=s(),d(be.$$.fragment),ro=s(),ye=a("p"),ye.innerHTML=An,lo=s(),d(we.$$.fragment),po=s(),xe=a("p"),xe.innerHTML=On,co=s(),k=a("ol"),ve=a("li"),pt=a("p"),pt.textContent=Kn,Ko=s(),d(Me.$$.fragment),en=s(),Ze=a("li"),dt=a("p"),dt.textContent=es,tn=s(),d(Je.$$.fragment),on=s(),ke=a("li"),ct=a("p"),ct.innerHTML=ts,nn=s(),d(Ue.$$.fragment),mo=s(),d(Ve.$$.fragment),fo=s(),je=a("p"),je.innerHTML=os,ho=s(),U=a("ol"),Ie=a("li"),mt=a("p"),mt.textContent=ns,sn=s(),d(Ce.$$.fragment),an=s(),Xe=a("li"),ft=a("p"),ft.textContent=ss,rn=s(),d(We.$$.fragment),ln=s(),Ge=a("li"),ht=a("p"),ht.innerHTML=is,pn=s(),d(Pe.$$.fragment),uo=s(),Se=a("p"),Se.innerHTML=as,go=s(),d(G.$$.fragment),_o=s(),d(Ne.$$.fragment),To=s(),_=a("div"),d($e.$$.fragment),dn=s(),ut=a("p"),ut.textContent=rs,cn=s(),gt=a("p"),gt.innerHTML=ls,mn=s(),P=a("div"),d(Le.$$.fragment),fn=s(),_t=a("p"),_t.textContent=ps,hn=s(),S=a("div"),d(Be.$$.fragment),un=s(),Tt=a("p"),Tt.textContent=ds,gn=s(),N=a("div"),d(ze.$$.fragment),_n=s(),bt=a("p"),bt.textContent=cs,Tn=s(),$=a("div"),d(De.$$.fragment),bn=s(),yt=a("p"),yt.textContent=ms,bo=s(),d(Re.$$.fragment),yo=s(),T=a("div"),d(Qe.$$.fragment),yn=s(),wt=a("p"),wt.textContent=fs,wn=s(),xt=a("p"),xt.innerHTML=hs,xn=s(),L=a("div"),d(Ee.$$.fragment),vn=s(),vt=a("p"),vt.textContent=us,Mn=s(),B=a("div"),d(Ye.$$.fragment),Zn=s(),Mt=a("p"),Mt.textContent=gs,Jn=s(),z=a("div"),d(Fe.$$.fragment),kn=s(),Zt=a("p"),Zt.textContent=_s,Un=s(),D=a("div"),d(He.$$.fragment),Vn=s(),Jt=a("p"),Jt.textContent=Ts,wo=s(),d(qe.$$.fragment),xo=s(),C=a("div"),d(Ae.$$.fragment),jn=s(),kt=a("p"),kt.textContent=bs,vo=s(),d(Oe.$$.fragment),Mo=s(),Ct=a("p"),this.h()},l(e){const o=Vs("svelte-u9bgzb",document.head);b=r(o,"META",{name:!0,content:!0}),o.forEach(t),R=i(e),Z=r(e,"P",{}),g(Z).forEach(t),Ke=i(e),c(Q.$$.fragment,e),Pt=i(e),X=r(e,"DIV",{class:!0,"data-svelte-h":!0}),p(X)!=="svelte-si9ct8"&&(X.innerHTML=In),St=i(e),E=r(e,"P",{"data-svelte-h":!0}),p(E)!=="svelte-16xpzkf"&&(E.innerHTML=Cn),Nt=i(e),Y=r(e,"P",{"data-svelte-h":!0}),p(Y)!=="svelte-1jaz0ks"&&(Y.textContent=Xn),$t=i(e),F=r(e,"OL",{"data-svelte-h":!0}),p(F)!=="svelte-19ca1wn"&&(F.innerHTML=Wn),Lt=i(e),H=r(e,"P",{"data-svelte-h":!0}),p(H)!=="svelte-yhxhyq"&&(H.textContent=Gn),Bt=i(e),q=r(e,"P",{"data-svelte-h":!0}),p(q)!=="svelte-uupw0l"&&(q.innerHTML=Pn),zt=i(e),A=r(e,"P",{"data-svelte-h":!0}),p(A)!=="svelte-1cwsb16"&&(A.textContent=Sn),Dt=i(e),O=r(e,"P",{"data-svelte-h":!0}),p(O)!=="svelte-1rtg145"&&(O.innerHTML=Nn),Rt=i(e),K=r(e,"P",{"data-svelte-h":!0}),p(K)!=="svelte-18vykkr"&&(K.innerHTML=$n),Qt=i(e),c(ee.$$.fragment,e),Et=i(e),c(te.$$.fragment,e),Yt=i(e),oe=r(e,"P",{"data-svelte-h":!0}),p(oe)!=="svelte-1dpixty"&&(oe.textContent=Ln),Ft=i(e),c(ne.$$.fragment,e),Ht=i(e),se=r(e,"P",{"data-svelte-h":!0}),p(se)!=="svelte-rs2kss"&&(se.textContent=Bn),qt=i(e),ie=r(e,"UL",{"data-svelte-h":!0}),p(ie)!=="svelte-1w3gvi6"&&(ie.innerHTML=zn),At=i(e),ae=r(e,"P",{"data-svelte-h":!0}),p(ae)!=="svelte-1xdujjh"&&(ae.textContent=Dn),Ot=i(e),c(re.$$.fragment,e),Kt=i(e),et=r(e,"UL",{});var Xt=g(et);W=r(Xt,"LI",{});var Wt=g(W);c(le.$$.fragment,Wt),Ro=vs(Wt,"In order to use the SDXL model when generating a video from prompt, use the "),tt=r(Wt,"CODE",{"data-svelte-h":!0}),p(tt)!=="svelte-7o0i0w"&&(tt.textContent=Rn),Qo=vs(Wt," pipeline:"),Wt.forEach(t),Xt.forEach(t),eo=i(e),c(pe.$$.fragment,e),to=i(e),c(de.$$.fragment,e),oo=i(e),ce=r(e,"P",{"data-svelte-h":!0}),p(ce)!=="svelte-13nlg5v"&&(ce.textContent=Qn),no=i(e),ot=r(e,"OL",{});var ys=g(ot);me=r(ys,"LI",{});var Jo=g(me);nt=r(Jo,"P",{"data-svelte-h":!0}),p(nt)!=="svelte-1xo2nq1"&&(nt.textContent=En),Eo=i(Jo),c(fe.$$.fragment,Jo),Jo.forEach(t),ys.forEach(t),so=i(e),V=r(e,"OL",{start:!0});var ko=g(V);j=r(ko,"LI",{});var Ut=g(j);st=r(Ut,"P",{"data-svelte-h":!0}),p(st)!=="svelte-7pm43o"&&(st.textContent=Yn),Yo=i(Ut),c(he.$$.fragment,Ut),Fo=i(Ut),it=r(Ut,"P",{"data-svelte-h":!0}),p(it)!=="svelte-mij11h"&&(it.innerHTML=Fn),Ut.forEach(t),Ho=i(ko),ue=r(ko,"LI",{});var Uo=g(ue);at=r(Uo,"P",{"data-svelte-h":!0}),p(at)!=="svelte-1wsv5bt"&&(at.innerHTML=Hn),qo=i(Uo),c(ge.$$.fragment,Uo),Uo.forEach(t),ko.forEach(t),io=i(e),rt=r(e,"UL",{});var ws=g(rt);I=r(ws,"LI",{});var Vt=g(I);c(_e.$$.fragment,Vt),Ao=i(Vt),lt=r(Vt,"P",{"data-svelte-h":!0}),p(lt)!=="svelte-bcy3mo"&&(lt.textContent=qn),Oo=i(Vt),c(Te.$$.fragment,Vt),Vt.forEach(t),ws.forEach(t),ao=i(e),c(be.$$.fragment,e),ro=i(e),ye=r(e,"P",{"data-svelte-h":!0}),p(ye)!=="svelte-1fgx3w6"&&(ye.innerHTML=An),lo=i(e),c(we.$$.fragment,e),po=i(e),xe=r(e,"P",{"data-svelte-h":!0}),p(xe)!=="svelte-o0vmiz"&&(xe.innerHTML=On),co=i(e),k=r(e,"OL",{});var jt=g(k);ve=r(jt,"LI",{});var Vo=g(ve);pt=r(Vo,"P",{"data-svelte-h":!0}),p(pt)!=="svelte-1xo2nq1"&&(pt.textContent=Kn),Ko=i(Vo),c(Me.$$.fragment,Vo),Vo.forEach(t),en=i(jt),Ze=r(jt,"LI",{});var jo=g(Ze);dt=r(jo,"P",{"data-svelte-h":!0}),p(dt)!=="svelte-1wln6n0"&&(dt.textContent=es),tn=i(jo),c(Je.$$.fragment,jo),jo.forEach(t),on=i(jt),ke=r(jt,"LI",{});var Io=g(ke);ct=r(Io,"P",{"data-svelte-h":!0}),p(ct)!=="svelte-14jy04z"&&(ct.innerHTML=ts),nn=i(Io),c(Ue.$$.fragment,Io),Io.forEach(t),jt.forEach(t),mo=i(e),c(Ve.$$.fragment,e),fo=i(e),je=r(e,"P",{"data-svelte-h":!0}),p(je)!=="svelte-x1pkth"&&(je.innerHTML=os),ho=i(e),U=r(e,"OL",{});var It=g(U);Ie=r(It,"LI",{});var Co=g(Ie);mt=r(Co,"P",{"data-svelte-h":!0}),p(mt)!=="svelte-1xo2nq1"&&(mt.textContent=ns),sn=i(Co),c(Ce.$$.fragment,Co),Co.forEach(t),an=i(It),Xe=r(It,"LI",{});var Xo=g(Xe);ft=r(Xo,"P",{"data-svelte-h":!0}),p(ft)!=="svelte-1wln6n0"&&(ft.textContent=ss),rn=i(Xo),c(We.$$.fragment,Xo),Xo.forEach(t),ln=i(It),Ge=r(It,"LI",{});var Wo=g(Ge);ht=r(Wo,"P",{"data-svelte-h":!0}),p(ht)!=="svelte-okpmrc"&&(ht.innerHTML=is),pn=i(Wo),c(Pe.$$.fragment,Wo),Wo.forEach(t),It.forEach(t),uo=i(e),Se=r(e,"P",{"data-svelte-h":!0}),p(Se)!=="svelte-18tnclt"&&(Se.innerHTML=as),go=i(e),c(G.$$.fragment,e),_o=i(e),c(Ne.$$.fragment,e),To=i(e),_=r(e,"DIV",{class:!0});var w=g(_);c($e.$$.fragment,w),dn=i(w),ut=r(w,"P",{"data-svelte-h":!0}),p(ut)!=="svelte-1q57293"&&(ut.textContent=rs),cn=i(w),gt=r(w,"P",{"data-svelte-h":!0}),p(gt)!=="svelte-1ffteuy"&&(gt.innerHTML=ls),mn=i(w),P=r(w,"DIV",{class:!0});var Go=g(P);c(Le.$$.fragment,Go),fn=i(Go),_t=r(Go,"P",{"data-svelte-h":!0}),p(_t)!=="svelte-50j04k"&&(_t.textContent=ps),Go.forEach(t),hn=i(w),S=r(w,"DIV",{class:!0});var Po=g(S);c(Be.$$.fragment,Po),un=i(Po),Tt=r(Po,"P",{"data-svelte-h":!0}),p(Tt)!=="svelte-1cxzr1t"&&(Tt.textContent=ds),Po.forEach(t),gn=i(w),N=r(w,"DIV",{class:!0});var So=g(N);c(ze.$$.fragment,So),_n=i(So),bt=r(So,"P",{"data-svelte-h":!0}),p(bt)!=="svelte-16q0ax1"&&(bt.textContent=cs),So.forEach(t),Tn=i(w),$=r(w,"DIV",{class:!0});var No=g($);c(De.$$.fragment,No),bn=i(No),yt=r(No,"P",{"data-svelte-h":!0}),p(yt)!=="svelte-1d8vbe1"&&(yt.textContent=ms),No.forEach(t),w.forEach(t),bo=i(e),c(Re.$$.fragment,e),yo=i(e),T=r(e,"DIV",{class:!0});var x=g(T);c(Qe.$$.fragment,x),yn=i(x),wt=r(x,"P",{"data-svelte-h":!0}),p(wt)!=="svelte-pyonrv"&&(wt.textContent=fs),wn=i(x),xt=r(x,"P",{"data-svelte-h":!0}),p(xt)!=="svelte-1ffteuy"&&(xt.innerHTML=hs),xn=i(x),L=r(x,"DIV",{class:!0});var $o=g(L);c(Ee.$$.fragment,$o),vn=i($o),vt=r($o,"P",{"data-svelte-h":!0}),p(vt)!=="svelte-v78lg8"&&(vt.textContent=us),$o.forEach(t),Mn=i(x),B=r(x,"DIV",{class:!0});var Lo=g(B);c(Ye.$$.fragment,Lo),Zn=i(Lo),Mt=r(Lo,"P",{"data-svelte-h":!0}),p(Mt)!=="svelte-jp6j47"&&(Mt.textContent=gs),Lo.forEach(t),Jn=i(x),z=r(x,"DIV",{class:!0});var Bo=g(z);c(Fe.$$.fragment,Bo),kn=i(Bo),Zt=r(Bo,"P",{"data-svelte-h":!0}),p(Zt)!=="svelte-16q0ax1"&&(Zt.textContent=_s),Bo.forEach(t),Un=i(x),D=r(x,"DIV",{class:!0});var zo=g(D);c(He.$$.fragment,zo),Vn=i(zo),Jt=r(zo,"P",{"data-svelte-h":!0}),p(Jt)!=="svelte-1d8vbe1"&&(Jt.textContent=Ts),zo.forEach(t),x.forEach(t),wo=i(e),c(qe.$$.fragment,e),xo=i(e),C=r(e,"DIV",{class:!0});var Do=g(C);c(Ae.$$.fragment,Do),jn=i(Do),kt=r(Do,"P",{"data-svelte-h":!0}),p(kt)!=="svelte-1dgz4ei"&&(kt.textContent=bs),Do.forEach(t),vo=i(e),c(Oe.$$.fragment,e),Mo=i(e),Ct=r(e,"P",{}),g(Ct).forEach(t),this.h()},h(){y(b,"name","hf:doc:metadata"),y(b,"content",Ws),y(X,"class","flex flex-wrap space-x-1"),y(V,"start","2"),y(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(_,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(T,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,o){n(document.head,b),l(e,R,o),l(e,Z,o),l(e,Ke,o),m(Q,e,o),l(e,Pt,o),l(e,X,o),l(e,St,o),l(e,E,o),l(e,Nt,o),l(e,Y,o),l(e,$t,o),l(e,F,o),l(e,Lt,o),l(e,H,o),l(e,Bt,o),l(e,q,o),l(e,zt,o),l(e,A,o),l(e,Dt,o),l(e,O,o),l(e,Rt,o),l(e,K,o),l(e,Qt,o),m(ee,e,o),l(e,Et,o),m(te,e,o),l(e,Yt,o),l(e,oe,o),l(e,Ft,o),m(ne,e,o),l(e,Ht,o),l(e,se,o),l(e,qt,o),l(e,ie,o),l(e,At,o),l(e,ae,o),l(e,Ot,o),m(re,e,o),l(e,Kt,o),l(e,et,o),n(et,W),m(le,W,null),n(W,Ro),n(W,tt),n(W,Qo),l(e,eo,o),m(pe,e,o),l(e,to,o),m(de,e,o),l(e,oo,o),l(e,ce,o),l(e,no,o),l(e,ot,o),n(ot,me),n(me,nt),n(me,Eo),m(fe,me,null),l(e,so,o),l(e,V,o),n(V,j),n(j,st),n(j,Yo),m(he,j,null),n(j,Fo),n(j,it),n(V,Ho),n(V,ue),n(ue,at),n(ue,qo),m(ge,ue,null),l(e,io,o),l(e,rt,o),n(rt,I),m(_e,I,null),n(I,Ao),n(I,lt),n(I,Oo),m(Te,I,null),l(e,ao,o),m(be,e,o),l(e,ro,o),l(e,ye,o),l(e,lo,o),m(we,e,o),l(e,po,o),l(e,xe,o),l(e,co,o),l(e,k,o),n(k,ve),n(ve,pt),n(ve,Ko),m(Me,ve,null),n(k,en),n(k,Ze),n(Ze,dt),n(Ze,tn),m(Je,Ze,null),n(k,on),n(k,ke),n(ke,ct),n(ke,nn),m(Ue,ke,null),l(e,mo,o),m(Ve,e,o),l(e,fo,o),l(e,je,o),l(e,ho,o),l(e,U,o),n(U,Ie),n(Ie,mt),n(Ie,sn),m(Ce,Ie,null),n(U,an),n(U,Xe),n(Xe,ft),n(Xe,rn),m(We,Xe,null),n(U,ln),n(U,Ge),n(Ge,ht),n(Ge,pn),m(Pe,Ge,null),l(e,uo,o),l(e,Se,o),l(e,go,o),m(G,e,o),l(e,_o,o),m(Ne,e,o),l(e,To,o),l(e,_,o),m($e,_,null),n(_,dn),n(_,ut),n(_,cn),n(_,gt),n(_,mn),n(_,P),m(Le,P,null),n(P,fn),n(P,_t),n(_,hn),n(_,S),m(Be,S,null),n(S,un),n(S,Tt),n(_,gn),n(_,N),m(ze,N,null),n(N,_n),n(N,bt),n(_,Tn),n(_,$),m(De,$,null),n($,bn),n($,yt),l(e,bo,o),m(Re,e,o),l(e,yo,o),l(e,T,o),m(Qe,T,null),n(T,yn),n(T,wt),n(T,wn),n(T,xt),n(T,xn),n(T,L),m(Ee,L,null),n(L,vn),n(L,vt),n(T,Mn),n(T,B),m(Ye,B,null),n(B,Zn),n(B,Mt),n(T,Jn),n(T,z),m(Fe,z,null),n(z,kn),n(z,Zt),n(T,Un),n(T,D),m(He,D,null),n(D,Vn),n(D,Jt),l(e,wo,o),m(qe,e,o),l(e,xo,o),l(e,C,o),m(Ae,C,null),n(C,jn),n(C,kt),l(e,vo,o),m(Oe,e,o),l(e,Mo,o),l(e,Ct,o),Zo=!0},p(e,[o]){const Xt={};o&2&&(Xt.$$scope={dirty:o,ctx:e}),G.$set(Xt)},i(e){Zo\|\|(f(Q.$$.fragment,e),f(ee.$$.fragment,e),f(te.$$.fragment,e),f(ne.$$.fragment,e),f(re.$$.fragment,e),f(le.$$.fragment,e),f(pe.$$.fragment,e),f(de.$$.fragment,e),f(fe.$$.fragment,e),f(he.$$.fragment,e),f(ge.$$.fragment,e),f(_e.$$.fragment,e),f(Te.$$.fragment,e),f(be.$$.fragment,e),f(we.$$.fragment,e),f(Me.$$.fragment,e),f(Je.$$.fragment,e),f(Ue.$$.fragment,e),f(Ve.$$.fragment,e),f(Ce.$$.fragment,e),f(We.$$.fragment,e),f(Pe.$$.fragment,e),f(G.$$.fragment,e),f(Ne.$$.fragment,e),f($e.$$.fragment,e),f(Le.$$.fragment,e),f(Be.$$.fragment,e),f(ze.$$.fragment,e),f(De.$$.fragment,e),f(Re.$$.fragment,e),f(Qe.$$.fragment,e),f(Ee.$$.fragment,e),f(Ye.$$.fragment,e),f(Fe.$$.fragment,e),f(He.$$.fragment,e),f(qe.$$.fragment,e),f(Ae.$$.fragment,e),f(Oe.$$.fragment,e),Zo=!0)},o(e){h(Q.$$.fragment,e),h(ee.$$.fragment,e),h(te.$$.fragment,e),h(ne.$$.fragment,e),h(re.$$.fragment,e),h(le.$$.fragment,e),h(pe.$$.fragment,e),h(de.$$.fragment,e),h(fe.$$.fragment,e),h(he.$$.fragment,e),h(ge.$$.fragment,e),h(_e.$$.fragment,e),h(Te.$$.fragment,e),h(be.$$.fragment,e),h(we.$$.fragment,e),h(Me.$$.fragment,e),h(Je.$$.fragment,e),h(Ue.$$.fragment,e),h(Ve.$$.fragment,e),h(Ce.$$.fragment,e),h(We.$$.fragment,e),h(Pe.$$.fragment,e),h(G.$$.fragment,e),h(Ne.$$.fragment,e),h($e.$$.fragment,e),h(Le.$$.fragment,e),h(Be.$$.fragment,e),h(ze.$$.fragment,e),h(De.$$.fragment,e),h(Re.$$.fragment,e),h(Qe.$$.fragment,e),h(Ee.$$.fragment,e),h(Ye.$$.fragment,e),h(Fe.$$.fragment,e),h(He.$$.fragment,e),h(qe.$$.fragment,e),h(Ae.$$.fragment,e),h(Oe.$$.fragment,e),Zo=!1},d(e){e&&(t(R),t(Z),t(Ke),t(Pt),t(X),t(St),t(E),t(Nt),t(Y),t($t),t(F),t(Lt),t(H),t(Bt),t(q),t(zt),t(A),t(Dt),t(O),t(Rt),t(K),t(Qt),t(Et),t(Yt),t(oe),t(Ft),t(Ht),t(se),t(qt),t(ie),t(At),t(ae),t(Ot),t(Kt),t(et),t(eo),t(to),t(oo),t(ce),t(no),t(ot),t(so),t(V),t(io),t(rt),t(ao),t(ro),t(ye),t(lo),t(po),t(xe),t(co),t(k),t(mo),t(fo),t(je),t(ho),t(U),t(uo),t(Se),t(go),t(_o),t(To),t(_),t(bo),t(yo),t(T),t(wo),t(xo),t(C),t(vo),t(Mo),t(Ct)),t(b),u(Q,e),u(ee,e),u(te,e),u(ne,e),u(re,e),u(le),u(pe,e),u(de,e),u(fe),u(he),u(ge),u(_e),u(Te),u(be,e),u(we,e),u(Me),u(Je),u(Ue),u(Ve,e),u(Ce),u(We),u(Pe),u(G,e),u(Ne,e),u($e),u(Le),u(Be),u(ze),u(De),u(Re,e),u(Qe),u(Ee),u(Ye),u(Fe),u(He),u(qe,e),u(Ae),u(Oe,e)}}}const Ws='{"title":"Text2Video-Zero","local":"text2video-zero","sections":[{"title":"Usage example","local":"usage-example","sections":[{"title":"Text-To-Video","local":"text-to-video","sections":[{"title":"SDXL Support","local":"sdxl-support","sections":[],"depth":4}],"depth":3},{"title":"Text-To-Video with Pose Control","local":"text-to-video-with-pose-control","sections":[{"title":"SDXL Support","local":"sdxl-support","sections":[],"depth":4}],"depth":3},{"title":"Text-To-Video with Edge Control","local":"text-to-video-with-edge-control","sections":[],"depth":3},{"title":"Video Instruct-Pix2Pix","local":"video-instruct-pix2pix","sections":[],"depth":3},{"title":"DreamBooth specialization","local":"dreambooth-specialization","sections":[],"depth":3}],"depth":2},{"title":"TextToVideoZeroPipeline","local":"diffusers.TextToVideoZeroPipeline","sections":[],"depth":2},{"title":"TextToVideoZeroSDXLPipeline","local":"diffusers.TextToVideoZeroSDXLPipeline","sections":[],"depth":2},{"title":"TextToVideoPipelineOutput","local":"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput","sections":[],"depth":2}],"depth":1}';function Gs(Gt){return Zs(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class zs extends ks{constructor(b){super(),Us(this,b,Gs,Xs,Ms,{})}}export{zs as component};

Xet Storage Details

Size:: 105 kB
Xet hash:: 023c3a50e2cd93d617ed00631c3ce50b8b2817f393a6c3f9a9cee121b12d4ece

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.