Buckets:
| import{s as ws,o as xs,n as vs}from"../chunks/scheduler.8c3d61f6.js";import{S as Ms,i as Zs,g as a,s,r as d,m as Ts,A as Js,h as r,f as t,c as i,j as g,u as c,x as p,n as ys,k as x,y as n,a as l,v as m,d as f,t as h,w as u}from"../chunks/index.da70eac4.js";import{T as Us}from"../chunks/Tip.1d9b8c37.js";import{D as J}from"../chunks/Docstring.6b390b9a.js";import{C as v}from"../chunks/CodeBlock.00a903b3.js";import{H as M,E as ks}from"../chunks/EditOnGithub.1e64e623.js";function Vs(Wt){let T,D='Make sure to check out the Schedulers <a href="../../using-diffusers/schedulers">guide</a> to learn how to explore the tradeoff between scheduler speed and quality, and see the <a href="../../using-diffusers/loading#reuse-components-across-pipelines">reuse components across pipelines</a> section to learn how to efficiently load the same components into multiple pipelines.';return{c(){T=a("p"),T.innerHTML=D},l(Z){T=r(Z,"P",{"data-svelte-h":!0}),p(T)!=="svelte-1wmc0l4"&&(T.innerHTML=D)},m(Z,Oe){l(Z,T,Oe)},p:vs,d(Z){Z&&t(T)}}}function js(Wt){let T,D,Z,Oe,R,Gt,Q,Vn='<a href="https://huggingface.co/papers/2303.13439" rel="nofollow">Text2Video-Zero: Text-to-Image Diffusion Models are Zero-Shot Video Generators</a> is by Levon Khachatryan, Andranik Movsisyan, Vahram Tadevosyan, Roberto Henschel, <a href="https://www.ece.utexas.edu/people/faculty/atlas-wang" rel="nofollow">Zhangyang Wang</a>, Shant Navasardyan, <a href="https://www.humphreyshi.com" rel="nofollow">Humphrey Shi</a>.',Pt,E,jn="Text2Video-Zero enables zero-shot video generation using either:",St,Y,In="<li>A textual prompt</li> <li>A prompt combined with guidance from poses or edges</li> <li>Video Instruct-Pix2Pix (instruction-guided video editing)</li>",Nt,F,Cn="Results are temporally consistent and closely follow the guidance and textual prompts.",$t,H,Xn='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/t2v_zero_teaser.png" alt="teaser-img"/>',Lt,q,Wn="The abstract from the paper is:",Bt,A,Gn=`<em>Recent text-to-video generation approaches rely on computationally heavy training and require large-scale video datasets. In this paper, we introduce a new task of zero-shot text-to-video generation and propose a low-cost approach (without any training or optimization) by leveraging the power of existing text-to-image synthesis methods (e.g., Stable Diffusion), making them suitable for the video domain. | |
| Our key modifications include (i) enriching the latent codes of the generated frames with motion dynamics to keep the global scene and the background time consistent; and (ii) reprogramming frame-level self-attention using a new cross-frame attention of each frame on the first frame, to preserve the context, appearance, and identity of the foreground object. | |
| Experiments show that this leads to low overhead, yet high-quality and remarkably consistent video generation. Moreover, our approach is not limited to text-to-video synthesis but is also applicable to other tasks such as conditional and content-specialized video generation, and Video Instruct-Pix2Pix, i.e., instruction-guided video editing. | |
| As experiments show, our method performs comparably or sometimes better than recent approaches, despite not being trained on additional video data.</em>`,zt,O,Pn='You can find additional information about Text2Video-Zero on the <a href="https://text2video-zero.github.io/" rel="nofollow">project page</a>, <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, and <a href="https://github.com/Picsart-AI-Research/Text2Video-Zero" rel="nofollow">original codebase</a>.',Dt,K,Rt,ee,Qt,te,Sn="To generate a video from prompt, run the following Python code:",Et,oe,Yt,ne,Nn="You can change these parameters in the pipeline call:",Ft,se,$n='<li>Motion field strength (see the <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1):<ul><li><code>motion_field_strength_x</code> and <code>motion_field_strength_y</code>. Default: <code>motion_field_strength_x=12</code>, <code>motion_field_strength_y=12</code></li></ul></li> <li><code>T</code> and <code>T'</code> (see the <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1)<ul><li><code>t0</code> and <code>t1</code> in the range <code>{0, ..., num_inference_steps}</code>. Default: <code>t0=45</code>, <code>t1=48</code></li></ul></li> <li>Video length:<ul><li><code>video_length</code>, the number of frames video_length to be generated. Default: <code>video_length=8</code></li></ul></li>',Ht,ie,Ln="We can also generate longer videos by doing the processing in a chunk-by-chunk manner:",qt,ae,At,Ke,X,re,zo,et,Bn="TextToVideoZeroSDXLPipeline",Do,Ot,le,Kt,pe,eo,de,zn="To generate a video from prompt with additional pose control",to,tt,ce,ot,Dn="Download a demo video",Ro,me,oo,V,j,nt,Rn="Read video containing extracted pose images",Qo,fe,Eo,st,Qn='To extract pose from actual video, read <a href="controlnet">ControlNet documentation</a>.',Yo,he,it,En="Run <code>StableDiffusionControlNetPipeline</code> with our custom attention processor",Fo,ue,no,at,I,ge,Ho,rt,Yn="Since our attention processor also works with SDXL, it can be utilized to generate a video from prompt using ControlNet models powered by SDXL:",qo,_e,so,be,io,Te,Fn='To generate a video from prompt with additional Canny edge control, follow the same steps described above for pose-guided generation using <a href="https://huggingface.co/lllyasviel/sd-controlnet-canny" rel="nofollow">Canny edge ControlNet model</a>.',ao,ye,ro,we,Hn='To perform text-guided video editing (with <a href="pix2pix">InstructPix2Pix</a>):',lo,U,xe,lt,qn="Download a demo video",Ao,ve,Oo,Me,pt,An="Read video from path",Ko,Ze,en,Je,dt,On="Run <code>StableDiffusionInstructPix2PixPipeline</code> with our custom attention processor",tn,Ue,po,ke,co,Ve,Kn=`Methods <strong>Text-To-Video</strong>, <strong>Text-To-Video with Pose Control</strong> and <strong>Text-To-Video with Edge Control</strong> | |
| can run with custom <a href="../../training/dreambooth">DreamBooth</a> models, as shown below for | |
| <a href="https://huggingface.co/lllyasviel/sd-controlnet-canny" rel="nofollow">Canny edge ControlNet model</a> and | |
| <a href="https://huggingface.co/PAIR/text2video-zero-controlnet-canny-avatar" rel="nofollow">Avatar style DreamBooth</a> model:`,mo,k,je,ct,es="Download a demo video",on,Ie,nn,Ce,mt,ts="Read video from path",sn,Xe,an,We,ft,os="Run <code>StableDiffusionControlNetPipeline</code> with custom trained DreamBooth model",rn,Ge,fo,Pe,ns='You can filter out some available DreamBooth-trained models with <a href="https://huggingface.co/models?search=dreambooth" rel="nofollow">this link</a>.',ho,W,uo,Se,go,_,Ne,ln,ht,ss="Pipeline for zero-shot text-to-video generation using Stable Diffusion.",pn,ut,is=`This model inherits from <a href="/docs/diffusers/pr_10083/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods | |
| implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,dn,G,$e,cn,gt,as="The call function to the pipeline for generation.",mn,P,Le,fn,_t,rs="Perform backward process given list of time steps.",hn,S,Be,un,bt,ls="Encodes the prompt into text encoder hidden states.",gn,N,ze,_n,Tt,ps="Perform DDPM forward process from time t0 to t1. This is the same as adding noise with corresponding variance.",_o,De,bo,b,Re,bn,yt,ds="Pipeline for zero-shot text-to-video generation using Stable Diffusion XL.",Tn,wt,cs=`This model inherits from <a href="/docs/diffusers/pr_10083/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods | |
| implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,yn,$,Qe,wn,xt,ms="Function invoked when calling the pipeline for generation.",xn,L,Ee,vn,vt,fs="Perform backward process given list of time steps",Mn,B,Ye,Zn,Mt,hs="Encodes the prompt into text encoder hidden states.",Jn,z,Fe,Un,Zt,us="Perform DDPM forward process from time t0 to t1. This is the same as adding noise with corresponding variance.",To,He,yo,C,qe,kn,Jt,gs="Output class for zero-shot text-to-video pipeline.",wo,Ae,xo,It,vo;return R=new M({props:{title:"Text2Video-Zero",local:"text2video-zero",headingTag:"h1"}}),K=new M({props:{title:"Usage example",local:"usage-example",headingTag:"h2"}}),ee=new M({props:{title:"Text-To-Video",local:"text-to-video",headingTag:"h3"}}),oe=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9aZXJvUGlwZWxpbmUlMEFpbXBvcnQlMjBpbWFnZWlvJTBBJTBBbW9kZWxfaWQlMjAlM0QlMjAlMjJzdGFibGUtZGlmZnVzaW9uLXYxLTUlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIlMEFwaXBlJTIwJTNEJTIwVGV4dFRvVmlkZW9aZXJvUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2lkJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2KS50byglMjJjdWRhJTIyKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkElMjBwYW5kYSUyMGlzJTIwcGxheWluZyUyMGd1aXRhciUyMG9uJTIwdGltZXMlMjBzcXVhcmUlMjIlMEFyZXN1bHQlMjAlM0QlMjBwaXBlKHByb21wdCUzRHByb21wdCkuaW1hZ2VzJTBBcmVzdWx0JTIwJTNEJTIwJTVCKHIlMjAqJTIwMjU1KS5hc3R5cGUoJTIydWludDglMjIpJTIwZm9yJTIwciUyMGluJTIwcmVzdWx0JTVEJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoZeroPipeline | |
| <span class="hljs-keyword">import</span> imageio | |
| model_id = <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span> | |
| pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"A panda is playing guitar on times square"</span> | |
| result = pipe(prompt=prompt).images | |
| result = [(r * <span class="hljs-number">255</span>).astype(<span class="hljs-string">"uint8"</span>) <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> result] | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),ae=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9aZXJvUGlwZWxpbmUlMEFpbXBvcnQlMjBudW1weSUyMGFzJTIwbnAlMEElMEFtb2RlbF9pZCUyMCUzRCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUwQXBpcGUlMjAlM0QlMjBUZXh0VG9WaWRlb1plcm9QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnRvKCUyMmN1ZGElMjIpJTBBc2VlZCUyMCUzRCUyMDAlMEF2aWRlb19sZW5ndGglMjAlM0QlMjAyNCUyMCUyMCUyMzI0JTIwJUMzJUI3JTIwNGZwcyUyMCUzRCUyMDYlMjBzZWNvbmRzJTBBY2h1bmtfc2l6ZSUyMCUzRCUyMDglMEFwcm9tcHQlMjAlM0QlMjAlMjJBJTIwcGFuZGElMjBpcyUyMHBsYXlpbmclMjBndWl0YXIlMjBvbiUyMHRpbWVzJTIwc3F1YXJlJTIyJTBBJTBBJTIzJTIwR2VuZXJhdGUlMjB0aGUlMjB2aWRlbyUyMGNodW5rLWJ5LWNodW5rJTBBcmVzdWx0JTIwJTNEJTIwJTVCJTVEJTBBY2h1bmtfaWRzJTIwJTNEJTIwbnAuYXJhbmdlKDAlMkMlMjB2aWRlb19sZW5ndGglMkMlMjBjaHVua19zaXplJTIwLSUyMDEpJTBBZ2VuZXJhdG9yJTIwJTNEJTIwdG9yY2guR2VuZXJhdG9yKGRldmljZSUzRCUyMmN1ZGElMjIpJTBBZm9yJTIwaSUyMGluJTIwcmFuZ2UobGVuKGNodW5rX2lkcykpJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMlByb2Nlc3NpbmclMjBjaHVuayUyMCU3QmklMjAlMkIlMjAxJTdEJTIwJTJGJTIwJTdCbGVuKGNodW5rX2lkcyklN0QlMjIpJTBBJTIwJTIwJTIwJTIwY2hfc3RhcnQlMjAlM0QlMjBjaHVua19pZHMlNUJpJTVEJTBBJTIwJTIwJTIwJTIwY2hfZW5kJTIwJTNEJTIwdmlkZW9fbGVuZ3RoJTIwaWYlMjBpJTIwJTNEJTNEJTIwbGVuKGNodW5rX2lkcyklMjAtJTIwMSUyMGVsc2UlMjBjaHVua19pZHMlNUJpJTIwJTJCJTIwMSU1RCUwQSUyMCUyMCUyMCUyMCUyMyUyMEF0dGFjaCUyMHRoZSUyMGZpcnN0JTIwZnJhbWUlMjBmb3IlMjBDcm9zcyUyMEZyYW1lJTIwQXR0ZW50aW9uJTBBJTIwJTIwJTIwJTIwZnJhbWVfaWRzJTIwJTNEJTIwJTVCMCU1RCUyMCUyQiUyMGxpc3QocmFuZ2UoY2hfc3RhcnQlMkMlMjBjaF9lbmQpKSUwQSUyMCUyMCUyMCUyMCUyMyUyMEZpeCUyMHRoZSUyMHNlZWQlMjBmb3IlMjB0aGUlMjB0ZW1wb3JhbCUyMGNvbnNpc3RlbmN5JTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yLm1hbnVhbF9zZWVkKHNlZWQpJTBBJTIwJTIwJTIwJTIwb3V0cHV0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0Rwcm9tcHQlMkMlMjB2aWRlb19sZW5ndGglM0RsZW4oZnJhbWVfaWRzKSUyQyUyMGdlbmVyYXRvciUzRGdlbmVyYXRvciUyQyUyMGZyYW1lX2lkcyUzRGZyYW1lX2lkcyklMEElMjAlMjAlMjAlMjByZXN1bHQuYXBwZW5kKG91dHB1dC5pbWFnZXMlNUIxJTNBJTVEKSUwQSUwQSUyMyUyMENvbmNhdGVuYXRlJTIwY2h1bmtzJTIwYW5kJTIwc2F2ZSUwQXJlc3VsdCUyMCUzRCUyMG5wLmNvbmNhdGVuYXRlKHJlc3VsdCklMEFyZXN1bHQlMjAlM0QlMjAlNUIociUyMColMjAyNTUpLmFzdHlwZSglMjJ1aW50OCUyMiklMjBmb3IlMjByJTIwaW4lMjByZXN1bHQlNUQlMEFpbWFnZWlvLm1pbXNhdmUoJTIydmlkZW8ubXA0JTIyJTJDJTIwcmVzdWx0JTJDJTIwZnBzJTNENCk=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoZeroPipeline | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| model_id = <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span> | |
| pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| seed = <span class="hljs-number">0</span> | |
| video_length = <span class="hljs-number">24</span> <span class="hljs-comment">#24 ÷ 4fps = 6 seconds</span> | |
| chunk_size = <span class="hljs-number">8</span> | |
| prompt = <span class="hljs-string">"A panda is playing guitar on times square"</span> | |
| <span class="hljs-comment"># Generate the video chunk-by-chunk</span> | |
| result = [] | |
| chunk_ids = np.arange(<span class="hljs-number">0</span>, video_length, chunk_size - <span class="hljs-number">1</span>) | |
| generator = torch.Generator(device=<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-built_in">len</span>(chunk_ids)): | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Processing chunk <span class="hljs-subst">{i + <span class="hljs-number">1</span>}</span> / <span class="hljs-subst">{<span class="hljs-built_in">len</span>(chunk_ids)}</span>"</span>) | |
| ch_start = chunk_ids[i] | |
| ch_end = video_length <span class="hljs-keyword">if</span> i == <span class="hljs-built_in">len</span>(chunk_ids) - <span class="hljs-number">1</span> <span class="hljs-keyword">else</span> chunk_ids[i + <span class="hljs-number">1</span>] | |
| <span class="hljs-comment"># Attach the first frame for Cross Frame Attention</span> | |
| frame_ids = [<span class="hljs-number">0</span>] + <span class="hljs-built_in">list</span>(<span class="hljs-built_in">range</span>(ch_start, ch_end)) | |
| <span class="hljs-comment"># Fix the seed for the temporal consistency</span> | |
| generator.manual_seed(seed) | |
| output = pipe(prompt=prompt, video_length=<span class="hljs-built_in">len</span>(frame_ids), generator=generator, frame_ids=frame_ids) | |
| result.append(output.images[<span class="hljs-number">1</span>:]) | |
| <span class="hljs-comment"># Concatenate chunks and save</span> | |
| result = np.concatenate(result) | |
| result = [(r * <span class="hljs-number">255</span>).astype(<span class="hljs-string">"uint8"</span>) <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> result] | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),re=new M({props:{title:"SDXL Support",local:"sdxl-support",headingTag:"h4"}}),le=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9aZXJvU0RYTFBpcGVsaW5lJTBBJTBBbW9kZWxfaWQlMjAlM0QlMjAlMjJzdGFiaWxpdHlhaSUyRnN0YWJsZS1kaWZmdXNpb24teGwtYmFzZS0xLjAlMjIlMEFwaXBlJTIwJTNEJTIwVGV4dFRvVmlkZW9aZXJvU0RYTFBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9pZCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUyMHZhcmlhbnQlM0QlMjJmcDE2JTIyJTJDJTIwdXNlX3NhZmV0ZW5zb3JzJTNEVHJ1ZSUwQSkudG8oJTIyY3VkYSUyMik=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoZeroSDXLPipeline | |
| model_id = <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span> | |
| pipe = TextToVideoZeroSDXLPipeline.from_pretrained( | |
| model_id, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>, use_safetensors=<span class="hljs-literal">True</span> | |
| ).to(<span class="hljs-string">"cuda"</span>)`,wrap:!1}}),pe=new M({props:{title:"Text-To-Video with Pose Control",local:"text-to-video-with-pose-control",headingTag:"h3"}}),me=new v({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQSUwQWZpbGVuYW1lJTIwJTNEJTIwJTIyX19hc3NldHNfXyUyRnBvc2VzX3NrZWxldG9uX2dpZnMlMkZkYW5jZTFfY29yci5tcDQlMjIlMEFyZXBvX2lkJTIwJTNEJTIwJTIyUEFJUiUyRlRleHQyVmlkZW8tWmVybyUyMiUwQXZpZGVvX3BhdGglMjAlM0QlMjBoZl9odWJfZG93bmxvYWQocmVwb190eXBlJTNEJTIyc3BhY2UlMjIlMkMlMjByZXBvX2lkJTNEcmVwb19pZCUyQyUyMGZpbGVuYW1lJTNEZmlsZW5hbWUp",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download | |
| filename = <span class="hljs-string">"__assets__/poses_skeleton_gifs/dance1_corr.mp4"</span> | |
| repo_id = <span class="hljs-string">"PAIR/Text2Video-Zero"</span> | |
| video_path = hf_hub_download(repo_type=<span class="hljs-string">"space"</span>, repo_id=repo_id, filename=filename)`,wrap:!1}}),fe=new v({props:{code:"ZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBaW1wb3J0JTIwaW1hZ2VpbyUwQSUwQXJlYWRlciUyMCUzRCUyMGltYWdlaW8uZ2V0X3JlYWRlcih2aWRlb19wYXRoJTJDJTIwJTIyZmZtcGVnJTIyKSUwQWZyYW1lX2NvdW50JTIwJTNEJTIwOCUwQXBvc2VfaW1hZ2VzJTIwJTNEJTIwJTVCSW1hZ2UuZnJvbWFycmF5KHJlYWRlci5nZXRfZGF0YShpKSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShmcmFtZV9jb3VudCklNUQ=",highlighted:`<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">import</span> imageio | |
| reader = imageio.get_reader(video_path, <span class="hljs-string">"ffmpeg"</span>) | |
| frame_count = <span class="hljs-number">8</span> | |
| pose_images = [Image.fromarray(reader.get_data(i)) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(frame_count)]`,wrap:!1}}),ue=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uQ29udHJvbE5ldFBpcGVsaW5lJTJDJTIwQ29udHJvbE5ldE1vZGVsJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMudGV4dF90b192aWRlb19zeW50aGVzaXMucGlwZWxpbmVfdGV4dF90b192aWRlb196ZXJvJTIwaW1wb3J0JTIwQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IlMEElMEFtb2RlbF9pZCUyMCUzRCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUwQWNvbnRyb2xuZXQlMjAlM0QlMjBDb250cm9sTmV0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMmxsbHlhc3ZpZWwlMkZzZC1jb250cm9sbmV0LW9wZW5wb3NlJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2KSUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25Db250cm9sTmV0UGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTIwY29udHJvbG5ldCUzRGNvbnRyb2xuZXQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwU2V0JTIwdGhlJTIwYXR0ZW50aW9uJTIwcHJvY2Vzc29yJTBBcGlwZS51bmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBcGlwZS5jb250cm9sbmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBJTBBJTIzJTIwZml4JTIwbGF0ZW50cyUyMGZvciUyMGFsbCUyMGZyYW1lcyUwQWxhdGVudHMlMjAlM0QlMjB0b3JjaC5yYW5kbigoMSUyQyUyMDQlMkMlMjA2NCUyQyUyMDY0KSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnJlcGVhdChsZW4ocG9zZV9pbWFnZXMpJTJDJTIwMSUyQyUyMDElMkMlMjAxKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkRhcnRoJTIwVmFkZXIlMjBkYW5jaW5nJTIwaW4lMjBhJTIwZGVzZXJ0JTIyJTBBcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0QlNUJwcm9tcHQlNUQlMjAqJTIwbGVuKHBvc2VfaW1hZ2VzKSUyQyUyMGltYWdlJTNEcG9zZV9pbWFnZXMlMkMlMjBsYXRlbnRzJTNEbGF0ZW50cykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionControlNetPipeline, ControlNetModel | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| model_id = <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span> | |
| controlnet = ControlNetModel.from_pretrained(<span class="hljs-string">"lllyasviel/sd-controlnet-openpose"</span>, torch_dtype=torch.float16) | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| model_id, controlnet=controlnet, torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># Set the attention processor</span> | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| pipe.controlnet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| <span class="hljs-comment"># fix latents for all frames</span> | |
| latents = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">4</span>, <span class="hljs-number">64</span>, <span class="hljs-number">64</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16).repeat(<span class="hljs-built_in">len</span>(pose_images), <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>) | |
| prompt = <span class="hljs-string">"Darth Vader dancing in a desert"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(pose_images), image=pose_images, latents=latents).images | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),ge=new M({props:{title:"SDXL Support",local:"sdxl-support",headingTag:"h4"}}),_e=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uWExDb250cm9sTmV0UGlwZWxpbmUlMkMlMjBDb250cm9sTmV0TW9kZWwlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy50ZXh0X3RvX3ZpZGVvX3N5bnRoZXNpcy5waXBlbGluZV90ZXh0X3RvX3ZpZGVvX3plcm8lMjBpbXBvcnQlMjBDcm9zc0ZyYW1lQXR0blByb2Nlc3NvciUwQSUwQWNvbnRyb2xuZXRfbW9kZWxfaWQlMjAlM0QlMjAndGhpYmF1ZCUyRmNvbnRyb2xuZXQtb3BlbnBvc2Utc2R4bC0xLjAnJTBBbW9kZWxfaWQlMjAlM0QlMjAnc3RhYmlsaXR5YWklMkZzdGFibGUtZGlmZnVzaW9uLXhsLWJhc2UtMS4wJyUwQSUwQWNvbnRyb2xuZXQlMjAlM0QlMjBDb250cm9sTmV0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKGNvbnRyb2xuZXRfbW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpJTBBcGlwZSUyMCUzRCUyMFN0YWJsZURpZmZ1c2lvbkNvbnRyb2xOZXRQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTA5bW9kZWxfaWQlMkMlMjBjb250cm9sbmV0JTNEY29udHJvbG5ldCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUwQSkudG8oJ2N1ZGEnKSUwQSUwQSUyMyUyMFNldCUyMHRoZSUyMGF0dGVudGlvbiUyMHByb2Nlc3NvciUwQXBpcGUudW5ldC5zZXRfYXR0bl9wcm9jZXNzb3IoQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IoYmF0Y2hfc2l6ZSUzRDIpKSUwQXBpcGUuY29udHJvbG5ldC5zZXRfYXR0bl9wcm9jZXNzb3IoQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IoYmF0Y2hfc2l6ZSUzRDIpKSUwQSUwQSUyMyUyMGZpeCUyMGxhdGVudHMlMjBmb3IlMjBhbGwlMjBmcmFtZXMlMEFsYXRlbnRzJTIwJTNEJTIwdG9yY2gucmFuZG4oKDElMkMlMjA0JTJDJTIwMTI4JTJDJTIwMTI4KSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnJlcGVhdChsZW4ocG9zZV9pbWFnZXMpJTJDJTIwMSUyQyUyMDElMkMlMjAxKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkRhcnRoJTIwVmFkZXIlMjBkYW5jaW5nJTIwaW4lMjBhJTIwZGVzZXJ0JTIyJTBBcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0QlNUJwcm9tcHQlNUQlMjAqJTIwbGVuKHBvc2VfaW1hZ2VzKSUyQyUyMGltYWdlJTNEcG9zZV9pbWFnZXMlMkMlMjBsYXRlbnRzJTNEbGF0ZW50cykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionXLControlNetPipeline, ControlNetModel | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| controlnet_model_id = <span class="hljs-string">'thibaud/controlnet-openpose-sdxl-1.0'</span> | |
| model_id = <span class="hljs-string">'stabilityai/stable-diffusion-xl-base-1.0'</span> | |
| controlnet = ControlNetModel.from_pretrained(controlnet_model_id, torch_dtype=torch.float16) | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| model_id, controlnet=controlnet, torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">'cuda'</span>) | |
| <span class="hljs-comment"># Set the attention processor</span> | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| pipe.controlnet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| <span class="hljs-comment"># fix latents for all frames</span> | |
| latents = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">4</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16).repeat(<span class="hljs-built_in">len</span>(pose_images), <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>) | |
| prompt = <span class="hljs-string">"Darth Vader dancing in a desert"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(pose_images), image=pose_images, latents=latents).images | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),be=new M({props:{title:"Text-To-Video with Edge Control",local:"text-to-video-with-edge-control",headingTag:"h3"}}),ye=new M({props:{title:"Video Instruct-Pix2Pix",local:"video-instruct-pix2pix",headingTag:"h3"}}),ve=new v({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQSUwQWZpbGVuYW1lJTIwJTNEJTIwJTIyX19hc3NldHNfXyUyRnBpeDJwaXglMjB2aWRlbyUyRmNhbWVsLm1wNCUyMiUwQXJlcG9faWQlMjAlM0QlMjAlMjJQQUlSJTJGVGV4dDJWaWRlby1aZXJvJTIyJTBBdmlkZW9fcGF0aCUyMCUzRCUyMGhmX2h1Yl9kb3dubG9hZChyZXBvX3R5cGUlM0QlMjJzcGFjZSUyMiUyQyUyMHJlcG9faWQlM0RyZXBvX2lkJTJDJTIwZmlsZW5hbWUlM0RmaWxlbmFtZSk=",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download | |
| filename = <span class="hljs-string">"__assets__/pix2pix video/camel.mp4"</span> | |
| repo_id = <span class="hljs-string">"PAIR/Text2Video-Zero"</span> | |
| video_path = hf_hub_download(repo_type=<span class="hljs-string">"space"</span>, repo_id=repo_id, filename=filename)`,wrap:!1}}),Ze=new v({props:{code:"ZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBaW1wb3J0JTIwaW1hZ2VpbyUwQSUwQXJlYWRlciUyMCUzRCUyMGltYWdlaW8uZ2V0X3JlYWRlcih2aWRlb19wYXRoJTJDJTIwJTIyZmZtcGVnJTIyKSUwQWZyYW1lX2NvdW50JTIwJTNEJTIwOCUwQXZpZGVvJTIwJTNEJTIwJTVCSW1hZ2UuZnJvbWFycmF5KHJlYWRlci5nZXRfZGF0YShpKSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShmcmFtZV9jb3VudCklNUQ=",highlighted:`<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">import</span> imageio | |
| reader = imageio.get_reader(video_path, <span class="hljs-string">"ffmpeg"</span>) | |
| frame_count = <span class="hljs-number">8</span> | |
| video = [Image.fromarray(reader.get_data(i)) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(frame_count)]`,wrap:!1}}),Ue=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uSW5zdHJ1Y3RQaXgyUGl4UGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy50ZXh0X3RvX3ZpZGVvX3N5bnRoZXNpcy5waXBlbGluZV90ZXh0X3RvX3ZpZGVvX3plcm8lMjBpbXBvcnQlMjBDcm9zc0ZyYW1lQXR0blByb2Nlc3NvciUwQSUwQW1vZGVsX2lkJTIwJTNEJTIwJTIydGltYnJvb2tzJTJGaW5zdHJ1Y3QtcGl4MnBpeCUyMiUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25JbnN0cnVjdFBpeDJQaXhQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnRvKCUyMmN1ZGElMjIpJTBBcGlwZS51bmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMykpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIybWFrZSUyMGl0JTIwVmFuJTIwR29naCUyMFN0YXJyeSUyME5pZ2h0JTIwc3R5bGUlMjIlMEFyZXN1bHQlMjAlM0QlMjBwaXBlKHByb21wdCUzRCU1QnByb21wdCU1RCUyMColMjBsZW4odmlkZW8pJTJDJTIwaW1hZ2UlM0R2aWRlbykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMmVkaXRlZF92aWRlby5tcDQlMjIlMkMlMjByZXN1bHQlMkMlMjBmcHMlM0Q0KQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionInstructPix2PixPipeline | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| model_id = <span class="hljs-string">"timbrooks/instruct-pix2pix"</span> | |
| pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">3</span>)) | |
| prompt = <span class="hljs-string">"make it Van Gogh Starry Night style"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(video), image=video).images | |
| imageio.mimsave(<span class="hljs-string">"edited_video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),ke=new M({props:{title:"DreamBooth specialization",local:"dreambooth-specialization",headingTag:"h3"}}),Ie=new v({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQSUwQWZpbGVuYW1lJTIwJTNEJTIwJTIyX19hc3NldHNfXyUyRmNhbm55X3ZpZGVvc19tcDQlMkZnaXJsX3R1cm5pbmcubXA0JTIyJTBBcmVwb19pZCUyMCUzRCUyMCUyMlBBSVIlMkZUZXh0MlZpZGVvLVplcm8lMjIlMEF2aWRlb19wYXRoJTIwJTNEJTIwaGZfaHViX2Rvd25sb2FkKHJlcG9fdHlwZSUzRCUyMnNwYWNlJTIyJTJDJTIwcmVwb19pZCUzRHJlcG9faWQlMkMlMjBmaWxlbmFtZSUzRGZpbGVuYW1lKQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download | |
| filename = <span class="hljs-string">"__assets__/canny_videos_mp4/girl_turning.mp4"</span> | |
| repo_id = <span class="hljs-string">"PAIR/Text2Video-Zero"</span> | |
| video_path = hf_hub_download(repo_type=<span class="hljs-string">"space"</span>, repo_id=repo_id, filename=filename)`,wrap:!1}}),Xe=new v({props:{code:"ZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBaW1wb3J0JTIwaW1hZ2VpbyUwQSUwQXJlYWRlciUyMCUzRCUyMGltYWdlaW8uZ2V0X3JlYWRlcih2aWRlb19wYXRoJTJDJTIwJTIyZmZtcGVnJTIyKSUwQWZyYW1lX2NvdW50JTIwJTNEJTIwOCUwQWNhbm55X2VkZ2VzJTIwJTNEJTIwJTVCSW1hZ2UuZnJvbWFycmF5KHJlYWRlci5nZXRfZGF0YShpKSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShmcmFtZV9jb3VudCklNUQ=",highlighted:`<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">import</span> imageio | |
| reader = imageio.get_reader(video_path, <span class="hljs-string">"ffmpeg"</span>) | |
| frame_count = <span class="hljs-number">8</span> | |
| canny_edges = [Image.fromarray(reader.get_data(i)) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(frame_count)]`,wrap:!1}}),Ge=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uQ29udHJvbE5ldFBpcGVsaW5lJTJDJTIwQ29udHJvbE5ldE1vZGVsJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMudGV4dF90b192aWRlb19zeW50aGVzaXMucGlwZWxpbmVfdGV4dF90b192aWRlb196ZXJvJTIwaW1wb3J0JTIwQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IlMEElMEElMjMlMjBzZXQlMjBtb2RlbCUyMGlkJTIwdG8lMjBjdXN0b20lMjBtb2RlbCUwQW1vZGVsX2lkJTIwJTNEJTIwJTIyUEFJUiUyRnRleHQydmlkZW8temVyby1jb250cm9sbmV0LWNhbm55LWF2YXRhciUyMiUwQWNvbnRyb2xuZXQlMjAlM0QlMjBDb250cm9sTmV0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMmxsbHlhc3ZpZWwlMkZzZC1jb250cm9sbmV0LWNhbm55JTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2KSUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25Db250cm9sTmV0UGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTIwY29udHJvbG5ldCUzRGNvbnRyb2xuZXQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwU2V0JTIwdGhlJTIwYXR0ZW50aW9uJTIwcHJvY2Vzc29yJTBBcGlwZS51bmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBcGlwZS5jb250cm9sbmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBJTBBJTIzJTIwZml4JTIwbGF0ZW50cyUyMGZvciUyMGFsbCUyMGZyYW1lcyUwQWxhdGVudHMlMjAlM0QlMjB0b3JjaC5yYW5kbigoMSUyQyUyMDQlMkMlMjA2NCUyQyUyMDY0KSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnJlcGVhdChsZW4oY2FubnlfZWRnZXMpJTJDJTIwMSUyQyUyMDElMkMlMjAxKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMm9pbCUyMHBhaW50aW5nJTIwb2YlMjBhJTIwYmVhdXRpZnVsJTIwZ2lybCUyMGF2YXRhciUyMHN0eWxlJTIyJTBBcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0QlNUJwcm9tcHQlNUQlMjAqJTIwbGVuKGNhbm55X2VkZ2VzKSUyQyUyMGltYWdlJTNEY2FubnlfZWRnZXMlMkMlMjBsYXRlbnRzJTNEbGF0ZW50cykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionControlNetPipeline, ControlNetModel | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| <span class="hljs-comment"># set model id to custom model</span> | |
| model_id = <span class="hljs-string">"PAIR/text2video-zero-controlnet-canny-avatar"</span> | |
| controlnet = ControlNetModel.from_pretrained(<span class="hljs-string">"lllyasviel/sd-controlnet-canny"</span>, torch_dtype=torch.float16) | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| model_id, controlnet=controlnet, torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># Set the attention processor</span> | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| pipe.controlnet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| <span class="hljs-comment"># fix latents for all frames</span> | |
| latents = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">4</span>, <span class="hljs-number">64</span>, <span class="hljs-number">64</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16).repeat(<span class="hljs-built_in">len</span>(canny_edges), <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>) | |
| prompt = <span class="hljs-string">"oil painting of a beautiful girl avatar style"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(canny_edges), image=canny_edges, latents=latents).images | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),W=new Us({props:{$$slots:{default:[Vs]},$$scope:{ctx:Wt}}}),Se=new M({props:{title:"TextToVideoZeroPipeline",local:"diffusers.TextToVideoZeroPipeline",headingTag:"h2"}}),Ne=new J({props:{name:"class diffusers.TextToVideoZeroPipeline",anchor:"diffusers.TextToVideoZeroPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": CLIPTextModel"},{name:"tokenizer",val:": CLIPTokenizer"},{name:"unet",val:": UNet2DConditionModel"},{name:"scheduler",val:": KarrasDiffusionSchedulers"},{name:"safety_checker",val:": StableDiffusionSafetyChecker"},{name:"feature_extractor",val:": CLIPImageProcessor"},{name:"requires_safety_checker",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_10083/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.TextToVideoZeroPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>CLIPTextModel</code>) — | |
| Frozen text-encoder (<a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a>).`,name:"text_encoder"},{anchor:"diffusers.TextToVideoZeroPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) — | |
| A <a href="https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a> to tokenize text.`,name:"tokenizer"},{anchor:"diffusers.TextToVideoZeroPipeline.unet",description:`<strong>unet</strong> (<a href="/docs/diffusers/pr_10083/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a>) — | |
| A <a href="/docs/diffusers/pr_10083/en/api/models/unet3d-cond#diffusers.UNet3DConditionModel">UNet3DConditionModel</a> to denoise the encoded video latents.`,name:"unet"},{anchor:"diffusers.TextToVideoZeroPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_10083/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) — | |
| A scheduler to be used in combination with <code>unet</code> to denoise the encoded image latents. Can be one of | |
| <a href="/docs/diffusers/pr_10083/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, <a href="/docs/diffusers/pr_10083/en/api/schedulers/lms_discrete#diffusers.LMSDiscreteScheduler">LMSDiscreteScheduler</a>, or <a href="/docs/diffusers/pr_10083/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a>.`,name:"scheduler"},{anchor:"diffusers.TextToVideoZeroPipeline.safety_checker",description:`<strong>safety_checker</strong> (<code>StableDiffusionSafetyChecker</code>) — | |
| Classification module that estimates whether generated images could be considered offensive or harmful. | |
| Please refer to the <a href="https://huggingface.co/runwayml/stable-diffusion-v1-5" rel="nofollow">model card</a> for more details | |
| about a model’s potential harms.`,name:"safety_checker"},{anchor:"diffusers.TextToVideoZeroPipeline.feature_extractor",description:`<strong>feature_extractor</strong> (<code>CLIPImageProcessor</code>) — | |
| A <code>CLIPImageProcessor</code> to extract features from generated images; used as inputs to the <code>safety_checker</code>.`,name:"feature_extractor"}],source:"https://github.com/huggingface/diffusers/blob/vr_10083/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L284"}}),$e=new J({props:{name:"__call__",anchor:"diffusers.TextToVideoZeroPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"video_length",val:": typing.Optional[int] = 8"},{name:"height",val:": typing.Optional[int] = None"},{name:"width",val:": typing.Optional[int] = None"},{name:"num_inference_steps",val:": int = 50"},{name:"guidance_scale",val:": float = 7.5"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"motion_field_strength_x",val:": float = 12"},{name:"motion_field_strength_y",val:": float = 12"},{name:"output_type",val:": typing.Optional[str] = 'tensor'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": typing.Optional[typing.Callable[[int, int, torch.Tensor], NoneType]] = None"},{name:"callback_steps",val:": typing.Optional[int] = 1"},{name:"t0",val:": int = 44"},{name:"t1",val:": int = 47"},{name:"frame_ids",val:": typing.Optional[typing.List[int]] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide image generation. If not defined, you need to pass <code>prompt_embeds</code>.`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.video_length",description:`<strong>video_length</strong> (<code>int</code>, <em>optional</em>, defaults to 8) — | |
| The number of generated video frames.`,name:"video_length"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>self.unet.config.sample_size * self.vae_scale_factor</code>) — | |
| The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>self.unet.config.sample_size * self.vae_scale_factor</code>) — | |
| The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) — | |
| A higher guidance scale value encourages the model to generate images closely linked to the text | |
| <code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide what to not include in video generation. If not defined, you need to | |
| pass <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (<code>guidance_scale < 1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) from the <a href="https://arxiv.org/abs/2010.02502" rel="nofollow">DDIM</a> paper. Only applies | |
| to the <a href="/docs/diffusers/pr_10083/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, and is ignored in other schedulers.`,name:"eta"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor is generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"np"</code>) — | |
| The output format of the generated video. Choose between <code>"latent"</code> and <code>"np"</code>.`,name:"output_type"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a | |
| <a href="/docs/diffusers/pr_10083/en/api/pipelines/text_to_video_zero#diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput">TextToVideoPipelineOutput</a> instead of | |
| a plain tuple.`,name:"return_dict"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.motion_field_strength_x",description:`<strong>motion_field_strength_x</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along x-axis. See the <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, | |
| Sect. 3.3.1.`,name:"motion_field_strength_x"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.motion_field_strength_y",description:`<strong>motion_field_strength_y</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along y-axis. See the <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, | |
| Sect. 3.3.1.`,name:"motion_field_strength_y"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.t0",description:`<strong>t0</strong> (<code>int</code>, <em>optional</em>, defaults to 44) — | |
| Timestep t0. Should be in the range [0, num_inference_steps - 1]. See the | |
| <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.t1",description:`<strong>t1</strong> (<code>int</code>, <em>optional</em>, defaults to 47) — | |
| Timestep t0. Should be in the range [t0 + 1, num_inference_steps - 1]. See the | |
| <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t1"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.frame_ids",description:`<strong>frame_ids</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Indexes of the frames that are being generated. This is used when generating longer videos | |
| chunk-by-chunk.`,name:"frame_ids"}],source:"https://github.com/huggingface/diffusers/blob/vr_10083/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L521",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The output contains a <code>ndarray</code> of the generated video, when <code>output_type</code> != <code>"latent"</code>, otherwise a | |
| latent code of generated videos and a list of <code>bool</code>s indicating whether the corresponding generated | |
| video contains “not-safe-for-work” (nsfw) content..</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/diffusers/pr_10083/en/api/pipelines/text_to_video_zero#diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput" | |
| >TextToVideoPipelineOutput</a></p> | |
| `}}),Le=new J({props:{name:"backward_loop",anchor:"diffusers.TextToVideoZeroPipeline.backward_loop",parameters:[{name:"latents",val:""},{name:"timesteps",val:""},{name:"prompt_embeds",val:""},{name:"guidance_scale",val:""},{name:"callback",val:""},{name:"callback_steps",val:""},{name:"num_warmup_steps",val:""},{name:"extra_step_kwargs",val:""},{name:"cross_attention_kwargs",val:" = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.latents",description:`<strong>latents</strong> — | |
| Latents at time timesteps[0].`,name:"latents"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.timesteps",description:`<strong>timesteps</strong> — | |
| Time steps along which to perform backward process.`,name:"timesteps"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.prompt_embeds",description:`<strong>prompt_embeds</strong> — | |
| Pre-generated text embeddings.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.guidance_scale",description:`<strong>guidance_scale</strong> — | |
| A higher guidance scale value encourages the model to generate images closely linked to the text | |
| <code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.extra_step_kwargs",description:`<strong>extra_step_kwargs</strong> — | |
| Extra_step_kwargs.`,name:"extra_step_kwargs"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow"><code>self.processor</code></a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.num_warmup_steps",description:`<strong>num_warmup_steps</strong> — | |
| number of warmup steps.`,name:"num_warmup_steps"}],source:"https://github.com/huggingface/diffusers/blob/vr_10083/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L370",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Latents of backward process output at time timesteps[-1].</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>latents</p> | |
| `}}),Be=new J({props:{name:"encode_prompt",anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt",parameters:[{name:"prompt",val:""},{name:"device",val:""},{name:"num_images_per_prompt",val:""},{name:"do_classifier_free_guidance",val:""},{name:"negative_prompt",val:" = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"lora_scale",val:": typing.Optional[float] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>): | |
| torch device`,name:"device"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.lora_scale",description:`<strong>lora_scale</strong> (<code>float</code>, <em>optional</em>) — | |
| A LoRA scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.`,name:"lora_scale"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_10083/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L793"}}),ze=new J({props:{name:"forward_loop",anchor:"diffusers.TextToVideoZeroPipeline.forward_loop",parameters:[{name:"x_t0",val:""},{name:"t0",val:""},{name:"t1",val:""},{name:"generator",val:""}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.x_t0",description:`<strong>x_t0</strong> — | |
| Latent code at time t0.`,name:"x_t0"},{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.t0",description:`<strong>t0</strong> — | |
| Timestep at t0.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.t1",description:`<strong>t1</strong> — | |
| Timestamp at t1.`,name:"t1"},{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"}],source:"https://github.com/huggingface/diffusers/blob/vr_10083/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L346",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Forward process applied to x_t0 from time t0 to t1.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>x_t1</p> | |
| `}}),De=new M({props:{title:"TextToVideoZeroSDXLPipeline",local:"diffusers.TextToVideoZeroSDXLPipeline",headingTag:"h2"}}),Re=new J({props:{name:"class diffusers.TextToVideoZeroSDXLPipeline",anchor:"diffusers.TextToVideoZeroSDXLPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": CLIPTextModel"},{name:"text_encoder_2",val:": CLIPTextModelWithProjection"},{name:"tokenizer",val:": CLIPTokenizer"},{name:"tokenizer_2",val:": CLIPTokenizer"},{name:"unet",val:": UNet2DConditionModel"},{name:"scheduler",val:": KarrasDiffusionSchedulers"},{name:"image_encoder",val:": CLIPVisionModelWithProjection = None"},{name:"feature_extractor",val:": CLIPImageProcessor = None"},{name:"force_zeros_for_empty_prompt",val:": bool = True"},{name:"add_watermarker",val:": typing.Optional[bool] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_10083/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>CLIPTextModel</code>) — | |
| Frozen text-encoder. Stable Diffusion XL uses the text portion of | |
| <a href="https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel" rel="nofollow">CLIP</a>, specifically | |
| the <a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a> variant.`,name:"text_encoder"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.text_encoder_2",description:`<strong>text_encoder_2</strong> (<code> CLIPTextModelWithProjection</code>) — | |
| Second frozen text-encoder. Stable Diffusion XL uses the text and pool portion of | |
| <a href="https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection" rel="nofollow">CLIP</a>, | |
| specifically the | |
| <a href="https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k" rel="nofollow">laion/CLIP-ViT-bigG-14-laion2B-39B-b160k</a> | |
| variant.`,name:"text_encoder_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) — | |
| Tokenizer of class | |
| <a href="https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a>.`,name:"tokenizer"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.tokenizer_2",description:`<strong>tokenizer_2</strong> (<code>CLIPTokenizer</code>) — | |
| Second Tokenizer of class | |
| <a href="https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a>.`,name:"tokenizer_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.unet",description:'<strong>unet</strong> (<a href="/docs/diffusers/pr_10083/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a>) — Conditional U-Net architecture to denoise the encoded image latents.',name:"unet"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_10083/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) — | |
| A scheduler to be used in combination with <code>unet</code> to denoise the encoded image latents. Can be one of | |
| <a href="/docs/diffusers/pr_10083/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, <a href="/docs/diffusers/pr_10083/en/api/schedulers/lms_discrete#diffusers.LMSDiscreteScheduler">LMSDiscreteScheduler</a>, or <a href="/docs/diffusers/pr_10083/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a>.`,name:"scheduler"}],source:"https://github.com/huggingface/diffusers/blob/vr_10083/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L338"}}),Qe=new J({props:{name:"__call__",anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"prompt_2",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"video_length",val:": typing.Optional[int] = 8"},{name:"height",val:": typing.Optional[int] = None"},{name:"width",val:": typing.Optional[int] = None"},{name:"num_inference_steps",val:": int = 50"},{name:"denoising_end",val:": typing.Optional[float] = None"},{name:"guidance_scale",val:": float = 7.5"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"negative_prompt_2",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"frame_ids",val:": typing.Optional[typing.List[int]] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"motion_field_strength_x",val:": float = 12"},{name:"motion_field_strength_y",val:": float = 12"},{name:"output_type",val:": typing.Optional[str] = 'tensor'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": typing.Optional[typing.Callable[[int, int, torch.Tensor], NoneType]] = None"},{name:"callback_steps",val:": int = 1"},{name:"cross_attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"guidance_rescale",val:": float = 0.0"},{name:"original_size",val:": typing.Optional[typing.Tuple[int, int]] = None"},{name:"crops_coords_top_left",val:": typing.Tuple[int, int] = (0, 0)"},{name:"target_size",val:": typing.Optional[typing.Tuple[int, int]] = None"},{name:"t0",val:": int = 44"},{name:"t1",val:": int = 47"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.prompt_2",description:`<strong>prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to be sent to the <code>tokenizer_2</code> and <code>text_encoder_2</code>. If not defined, <code>prompt</code> is | |
| used in both text-encoders`,name:"prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.video_length",description:`<strong>video_length</strong> (<code>int</code>, <em>optional</em>, defaults to 8) — | |
| The number of generated video frames.`,name:"video_length"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size * self.vae_scale_factor) — | |
| The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size * self.vae_scale_factor) — | |
| The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.denoising_end",description:`<strong>denoising_end</strong> (<code>float</code>, <em>optional</em>) — | |
| When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be | |
| completed before it is intentionally prematurely terminated. As a result, the returned sample will | |
| still retain a substantial amount of noise as determined by the discrete timesteps selected by the | |
| scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a | |
| “Mixture of Denoisers” multi-pipeline setup, as elaborated in <a href="https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output" rel="nofollow"><strong>Refining the Image | |
| Output</strong></a>`,name:"denoising_end"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) — | |
| Guidance scale as defined in <a href="https://arxiv.org/abs/2207.12598" rel="nofollow">Classifier-Free Diffusion Guidance</a>. | |
| <code>guidance_scale</code> is defined as <code>w</code> of equation 2. of <a href="https://arxiv.org/pdf/2205.11487.pdf" rel="nofollow">Imagen | |
| Paper</a>. Guidance scale is enabled by setting <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to the text <code>prompt</code>, | |
| usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_prompt_2",description:`<strong>negative_prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation to be sent to <code>tokenizer_2</code> and | |
| <code>text_encoder_2</code>. If not defined, <code>negative_prompt</code> is used in both text-encoders`,name:"negative_prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) in the DDIM paper: <a href="https://arxiv.org/abs/2010.02502" rel="nofollow">https://arxiv.org/abs/2010.02502</a>. Only applies to | |
| <a href="/docs/diffusers/pr_10083/en/api/schedulers/ddim#diffusers.DDIMScheduler">schedulers.DDIMScheduler</a>, will be ignored for others.`,name:"eta"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a> | |
| to make generation deterministic.`,name:"generator"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.frame_ids",description:`<strong>frame_ids</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Indexes of the frames that are being generated. This is used when generating longer videos | |
| chunk-by-chunk.`,name:"frame_ids"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.pooled_prompt_embeds",description:`<strong>pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. | |
| If not provided, pooled text embeddings will be generated from <code>prompt</code> input argument.`,name:"pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_pooled_prompt_embeds",description:`<strong>negative_pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, pooled negative_prompt_embeds will be generated from <code>negative_prompt</code> | |
| input argument.`,name:"negative_pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will ge generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.motion_field_strength_x",description:`<strong>motion_field_strength_x</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along x-axis. See the <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, | |
| Sect. 3.3.1.`,name:"motion_field_strength_x"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.motion_field_strength_y",description:`<strong>motion_field_strength_y</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along y-axis. See the <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, | |
| Sect. 3.3.1.`,name:"motion_field_strength_y"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generate image. Choose between | |
| <a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput</code> instead | |
| of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that will be called every <code>callback_steps</code> steps during inference. The function will be | |
| called with the following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function will be called. If not specified, the callback will be | |
| called at every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py" rel="nofollow">diffusers.cross_attention</a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.guidance_rescale",description:`<strong>guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to 0.7) — | |
| Guidance rescale factor proposed by <a href="https://arxiv.org/pdf/2305.08891.pdf" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are | |
| Flawed</a> <code>guidance_scale</code> is defined as <code>φ</code> in equation 16. of | |
| <a href="https://arxiv.org/pdf/2305.08891.pdf" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are Flawed</a>. | |
| Guidance rescale factor should fix overexposure when using zero terminal SNR.`,name:"guidance_rescale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.original_size",description:`<strong>original_size</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to (1024, 1024)) — | |
| If <code>original_size</code> is not the same as <code>target_size</code> the image will appear to be down- or upsampled. | |
| <code>original_size</code> defaults to <code>(width, height)</code> if not specified. Part of SDXL’s micro-conditioning as | |
| explained in section 2.2 of | |
| <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">https://huggingface.co/papers/2307.01952</a>.`,name:"original_size"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.crops_coords_top_left",description:`<strong>crops_coords_top_left</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to (0, 0)) — | |
| <code>crops_coords_top_left</code> can be used to generate an image that appears to be “cropped” from the position | |
| <code>crops_coords_top_left</code> downwards. Favorable, well-centered images are usually achieved by setting | |
| <code>crops_coords_top_left</code> to (0, 0). Part of SDXL’s micro-conditioning as explained in section 2.2 of | |
| <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">https://huggingface.co/papers/2307.01952</a>.`,name:"crops_coords_top_left"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.target_size",description:`<strong>target_size</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to (1024, 1024)) — | |
| For most cases, <code>target_size</code> should be set to the desired height and width of the generated image. If | |
| not specified it will default to <code>(width, height)</code>. Part of SDXL’s micro-conditioning as explained in | |
| section 2.2 of <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">https://huggingface.co/papers/2307.01952</a>.`,name:"target_size"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.t0",description:`<strong>t0</strong> (<code>int</code>, <em>optional</em>, defaults to 44) — | |
| Timestep t0. Should be in the range [0, num_inference_steps - 1]. See the | |
| <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.t1",description:`<strong>t1</strong> (<code>int</code>, <em>optional</em>, defaults to 47) — | |
| Timestep t0. Should be in the range [t0 + 1, num_inference_steps - 1]. See the | |
| <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t1"}],source:"https://github.com/huggingface/diffusers/blob/vr_10083/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L927",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>~pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoSDXLPipelineOutput</code> or | |
| <code>tuple</code>: <code>~pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoSDXLPipelineOutput</code> | |
| if <code>return_dict</code> is True, otherwise a <code>tuple</code>. When returning a tuple, the first element is a list with the | |
| generated images.</p> | |
| `}}),Ee=new J({props:{name:"backward_loop",anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop",parameters:[{name:"latents",val:""},{name:"timesteps",val:""},{name:"prompt_embeds",val:""},{name:"guidance_scale",val:""},{name:"callback",val:""},{name:"callback_steps",val:""},{name:"num_warmup_steps",val:""},{name:"extra_step_kwargs",val:""},{name:"add_text_embeds",val:""},{name:"add_time_ids",val:""},{name:"cross_attention_kwargs",val:" = None"},{name:"guidance_rescale",val:": float = 0.0"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.latents",description:`<strong>latents</strong> — | |
| Latents at time timesteps[0].`,name:"latents"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.timesteps",description:`<strong>timesteps</strong> — | |
| Time steps along which to perform backward process.`,name:"timesteps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.prompt_embeds",description:`<strong>prompt_embeds</strong> — | |
| Pre-generated text embeddings.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.guidance_scale",description:`<strong>guidance_scale</strong> — | |
| A higher guidance scale value encourages the model to generate images closely linked to the text | |
| <code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.extra_step_kwargs",description:`<strong>extra_step_kwargs</strong> — | |
| Extra_step_kwargs.`,name:"extra_step_kwargs"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow"><code>self.processor</code></a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.num_warmup_steps",description:`<strong>num_warmup_steps</strong> — | |
| number of warmup steps.`,name:"num_warmup_steps"}],source:"https://github.com/huggingface/diffusers/blob/vr_10083/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L842",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>latents of backward process output at time timesteps[-1]</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>latents</p> | |
| `}}),Ye=new J({props:{name:"encode_prompt",anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt",parameters:[{name:"prompt",val:": str"},{name:"prompt_2",val:": typing.Optional[str] = None"},{name:"device",val:": typing.Optional[torch.device] = None"},{name:"num_images_per_prompt",val:": int = 1"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"negative_prompt",val:": typing.Optional[str] = None"},{name:"negative_prompt_2",val:": typing.Optional[str] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"lora_scale",val:": typing.Optional[float] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.prompt_2",description:`<strong>prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to be sent to the <code>tokenizer_2</code> and <code>text_encoder_2</code>. If not defined, <code>prompt</code> is | |
| used in both text-encoders`,name:"prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>): | |
| torch device`,name:"device"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_prompt_2",description:`<strong>negative_prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation to be sent to <code>tokenizer_2</code> and | |
| <code>text_encoder_2</code>. If not defined, <code>negative_prompt</code> is used in both text-encoders`,name:"negative_prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.pooled_prompt_embeds",description:`<strong>pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. | |
| If not provided, pooled text embeddings will be generated from <code>prompt</code> input argument.`,name:"pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_pooled_prompt_embeds",description:`<strong>negative_pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, pooled negative_prompt_embeds will be generated from <code>negative_prompt</code> | |
| input argument.`,name:"negative_pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.lora_scale",description:`<strong>lora_scale</strong> (<code>float</code>, <em>optional</em>) — | |
| A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.`,name:"lora_scale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_10083/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L583"}}),Fe=new J({props:{name:"forward_loop",anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop",parameters:[{name:"x_t0",val:""},{name:"t0",val:""},{name:"t1",val:""},{name:"generator",val:""}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.x_t0",description:`<strong>x_t0</strong> — | |
| Latent code at time t0.`,name:"x_t0"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.t0",description:`<strong>t0</strong> — | |
| Timestep at t0.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.t1",description:`<strong>t1</strong> — | |
| Timestamp at t1.`,name:"t1"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"}],source:"https://github.com/huggingface/diffusers/blob/vr_10083/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L818",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Forward process applied to x_t0 from time t0 to t1.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>x_t1</p> | |
| `}}),He=new M({props:{title:"TextToVideoPipelineOutput",local:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput",headingTag:"h2"}}),qe=new J({props:{name:"class diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput",anchor:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput",parameters:[{name:"images",val:": typing.Union[typing.List[PIL.Image.Image], numpy.ndarray]"},{name:"nsfw_content_detected",val:": typing.Optional[typing.List[bool]]"}],parametersDescription:[{anchor:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput.images",description:`<strong>images</strong> (<code>[List[PIL.Image.Image]</code>, <code>np.ndarray</code>]) — | |
| List of denoised PIL images of length <code>batch_size</code> or NumPy array of shape <code>(batch_size, height, width, num_channels)</code>.`,name:"images"},{anchor:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput.nsfw_content_detected",description:`<strong>nsfw_content_detected</strong> (<code>[List[bool]]</code>) — | |
| List indicating whether the corresponding generated image contains “not-safe-for-work” (nsfw) content or | |
| <code>None</code> if safety checking could not be performed.`,name:"nsfw_content_detected"}],source:"https://github.com/huggingface/diffusers/blob/vr_10083/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L182"}}),Ae=new ks({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/text_to_video_zero.md"}}),{c(){T=a("meta"),D=s(),Z=a("p"),Oe=s(),d(R.$$.fragment),Gt=s(),Q=a("p"),Q.innerHTML=Vn,Pt=s(),E=a("p"),E.textContent=jn,St=s(),Y=a("ol"),Y.innerHTML=In,Nt=s(),F=a("p"),F.textContent=Cn,$t=s(),H=a("p"),H.innerHTML=Xn,Lt=s(),q=a("p"),q.textContent=Wn,Bt=s(),A=a("p"),A.innerHTML=Gn,zt=s(),O=a("p"),O.innerHTML=Pn,Dt=s(),d(K.$$.fragment),Rt=s(),d(ee.$$.fragment),Qt=s(),te=a("p"),te.textContent=Sn,Et=s(),d(oe.$$.fragment),Yt=s(),ne=a("p"),ne.textContent=Nn,Ft=s(),se=a("ul"),se.innerHTML=$n,Ht=s(),ie=a("p"),ie.textContent=Ln,qt=s(),d(ae.$$.fragment),At=s(),Ke=a("ul"),X=a("li"),d(re.$$.fragment),zo=Ts("In order to use the SDXL model when generating a video from prompt, use the "),et=a("code"),et.textContent=Bn,Do=Ts(" pipeline:"),Ot=s(),d(le.$$.fragment),Kt=s(),d(pe.$$.fragment),eo=s(),de=a("p"),de.textContent=zn,to=s(),tt=a("ol"),ce=a("li"),ot=a("p"),ot.textContent=Dn,Ro=s(),d(me.$$.fragment),oo=s(),V=a("ol"),j=a("li"),nt=a("p"),nt.textContent=Rn,Qo=s(),d(fe.$$.fragment),Eo=s(),st=a("p"),st.innerHTML=Qn,Yo=s(),he=a("li"),it=a("p"),it.innerHTML=En,Fo=s(),d(ue.$$.fragment),no=s(),at=a("ul"),I=a("li"),d(ge.$$.fragment),Ho=s(),rt=a("p"),rt.textContent=Yn,qo=s(),d(_e.$$.fragment),so=s(),d(be.$$.fragment),io=s(),Te=a("p"),Te.innerHTML=Fn,ao=s(),d(ye.$$.fragment),ro=s(),we=a("p"),we.innerHTML=Hn,lo=s(),U=a("ol"),xe=a("li"),lt=a("p"),lt.textContent=qn,Ao=s(),d(ve.$$.fragment),Oo=s(),Me=a("li"),pt=a("p"),pt.textContent=An,Ko=s(),d(Ze.$$.fragment),en=s(),Je=a("li"),dt=a("p"),dt.innerHTML=On,tn=s(),d(Ue.$$.fragment),po=s(),d(ke.$$.fragment),co=s(),Ve=a("p"),Ve.innerHTML=Kn,mo=s(),k=a("ol"),je=a("li"),ct=a("p"),ct.textContent=es,on=s(),d(Ie.$$.fragment),nn=s(),Ce=a("li"),mt=a("p"),mt.textContent=ts,sn=s(),d(Xe.$$.fragment),an=s(),We=a("li"),ft=a("p"),ft.innerHTML=os,rn=s(),d(Ge.$$.fragment),fo=s(),Pe=a("p"),Pe.innerHTML=ns,ho=s(),d(W.$$.fragment),uo=s(),d(Se.$$.fragment),go=s(),_=a("div"),d(Ne.$$.fragment),ln=s(),ht=a("p"),ht.textContent=ss,pn=s(),ut=a("p"),ut.innerHTML=is,dn=s(),G=a("div"),d($e.$$.fragment),cn=s(),gt=a("p"),gt.textContent=as,mn=s(),P=a("div"),d(Le.$$.fragment),fn=s(),_t=a("p"),_t.textContent=rs,hn=s(),S=a("div"),d(Be.$$.fragment),un=s(),bt=a("p"),bt.textContent=ls,gn=s(),N=a("div"),d(ze.$$.fragment),_n=s(),Tt=a("p"),Tt.textContent=ps,_o=s(),d(De.$$.fragment),bo=s(),b=a("div"),d(Re.$$.fragment),bn=s(),yt=a("p"),yt.textContent=ds,Tn=s(),wt=a("p"),wt.innerHTML=cs,yn=s(),$=a("div"),d(Qe.$$.fragment),wn=s(),xt=a("p"),xt.textContent=ms,xn=s(),L=a("div"),d(Ee.$$.fragment),vn=s(),vt=a("p"),vt.textContent=fs,Mn=s(),B=a("div"),d(Ye.$$.fragment),Zn=s(),Mt=a("p"),Mt.textContent=hs,Jn=s(),z=a("div"),d(Fe.$$.fragment),Un=s(),Zt=a("p"),Zt.textContent=us,To=s(),d(He.$$.fragment),yo=s(),C=a("div"),d(qe.$$.fragment),kn=s(),Jt=a("p"),Jt.textContent=gs,wo=s(),d(Ae.$$.fragment),xo=s(),It=a("p"),this.h()},l(e){const o=Js("svelte-u9bgzb",document.head);T=r(o,"META",{name:!0,content:!0}),o.forEach(t),D=i(e),Z=r(e,"P",{}),g(Z).forEach(t),Oe=i(e),c(R.$$.fragment,e),Gt=i(e),Q=r(e,"P",{"data-svelte-h":!0}),p(Q)!=="svelte-16xpzkf"&&(Q.innerHTML=Vn),Pt=i(e),E=r(e,"P",{"data-svelte-h":!0}),p(E)!=="svelte-1jaz0ks"&&(E.textContent=jn),St=i(e),Y=r(e,"OL",{"data-svelte-h":!0}),p(Y)!=="svelte-19ca1wn"&&(Y.innerHTML=In),Nt=i(e),F=r(e,"P",{"data-svelte-h":!0}),p(F)!=="svelte-yhxhyq"&&(F.textContent=Cn),$t=i(e),H=r(e,"P",{"data-svelte-h":!0}),p(H)!=="svelte-uupw0l"&&(H.innerHTML=Xn),Lt=i(e),q=r(e,"P",{"data-svelte-h":!0}),p(q)!=="svelte-1cwsb16"&&(q.textContent=Wn),Bt=i(e),A=r(e,"P",{"data-svelte-h":!0}),p(A)!=="svelte-1rtg145"&&(A.innerHTML=Gn),zt=i(e),O=r(e,"P",{"data-svelte-h":!0}),p(O)!=="svelte-u78olw"&&(O.innerHTML=Pn),Dt=i(e),c(K.$$.fragment,e),Rt=i(e),c(ee.$$.fragment,e),Qt=i(e),te=r(e,"P",{"data-svelte-h":!0}),p(te)!=="svelte-1dpixty"&&(te.textContent=Sn),Et=i(e),c(oe.$$.fragment,e),Yt=i(e),ne=r(e,"P",{"data-svelte-h":!0}),p(ne)!=="svelte-rs2kss"&&(ne.textContent=Nn),Ft=i(e),se=r(e,"UL",{"data-svelte-h":!0}),p(se)!=="svelte-1na1mhs"&&(se.innerHTML=$n),Ht=i(e),ie=r(e,"P",{"data-svelte-h":!0}),p(ie)!=="svelte-1xdujjh"&&(ie.textContent=Ln),qt=i(e),c(ae.$$.fragment,e),At=i(e),Ke=r(e,"UL",{});var Ct=g(Ke);X=r(Ct,"LI",{});var Xt=g(X);c(re.$$.fragment,Xt),zo=ys(Xt,"In order to use the SDXL model when generating a video from prompt, use the "),et=r(Xt,"CODE",{"data-svelte-h":!0}),p(et)!=="svelte-7o0i0w"&&(et.textContent=Bn),Do=ys(Xt," pipeline:"),Xt.forEach(t),Ct.forEach(t),Ot=i(e),c(le.$$.fragment,e),Kt=i(e),c(pe.$$.fragment,e),eo=i(e),de=r(e,"P",{"data-svelte-h":!0}),p(de)!=="svelte-13nlg5v"&&(de.textContent=zn),to=i(e),tt=r(e,"OL",{});var _s=g(tt);ce=r(_s,"LI",{});var Mo=g(ce);ot=r(Mo,"P",{"data-svelte-h":!0}),p(ot)!=="svelte-1xo2nq1"&&(ot.textContent=Dn),Ro=i(Mo),c(me.$$.fragment,Mo),Mo.forEach(t),_s.forEach(t),oo=i(e),V=r(e,"OL",{start:!0});var Zo=g(V);j=r(Zo,"LI",{});var Ut=g(j);nt=r(Ut,"P",{"data-svelte-h":!0}),p(nt)!=="svelte-7pm43o"&&(nt.textContent=Rn),Qo=i(Ut),c(fe.$$.fragment,Ut),Eo=i(Ut),st=r(Ut,"P",{"data-svelte-h":!0}),p(st)!=="svelte-mij11h"&&(st.innerHTML=Qn),Ut.forEach(t),Yo=i(Zo),he=r(Zo,"LI",{});var Jo=g(he);it=r(Jo,"P",{"data-svelte-h":!0}),p(it)!=="svelte-1wsv5bt"&&(it.innerHTML=En),Fo=i(Jo),c(ue.$$.fragment,Jo),Jo.forEach(t),Zo.forEach(t),no=i(e),at=r(e,"UL",{});var bs=g(at);I=r(bs,"LI",{});var kt=g(I);c(ge.$$.fragment,kt),Ho=i(kt),rt=r(kt,"P",{"data-svelte-h":!0}),p(rt)!=="svelte-bcy3mo"&&(rt.textContent=Yn),qo=i(kt),c(_e.$$.fragment,kt),kt.forEach(t),bs.forEach(t),so=i(e),c(be.$$.fragment,e),io=i(e),Te=r(e,"P",{"data-svelte-h":!0}),p(Te)!=="svelte-1fgx3w6"&&(Te.innerHTML=Fn),ao=i(e),c(ye.$$.fragment,e),ro=i(e),we=r(e,"P",{"data-svelte-h":!0}),p(we)!=="svelte-o0vmiz"&&(we.innerHTML=Hn),lo=i(e),U=r(e,"OL",{});var Vt=g(U);xe=r(Vt,"LI",{});var Uo=g(xe);lt=r(Uo,"P",{"data-svelte-h":!0}),p(lt)!=="svelte-1xo2nq1"&&(lt.textContent=qn),Ao=i(Uo),c(ve.$$.fragment,Uo),Uo.forEach(t),Oo=i(Vt),Me=r(Vt,"LI",{});var ko=g(Me);pt=r(ko,"P",{"data-svelte-h":!0}),p(pt)!=="svelte-1wln6n0"&&(pt.textContent=An),Ko=i(ko),c(Ze.$$.fragment,ko),ko.forEach(t),en=i(Vt),Je=r(Vt,"LI",{});var Vo=g(Je);dt=r(Vo,"P",{"data-svelte-h":!0}),p(dt)!=="svelte-14jy04z"&&(dt.innerHTML=On),tn=i(Vo),c(Ue.$$.fragment,Vo),Vo.forEach(t),Vt.forEach(t),po=i(e),c(ke.$$.fragment,e),co=i(e),Ve=r(e,"P",{"data-svelte-h":!0}),p(Ve)!=="svelte-x1pkth"&&(Ve.innerHTML=Kn),mo=i(e),k=r(e,"OL",{});var jt=g(k);je=r(jt,"LI",{});var jo=g(je);ct=r(jo,"P",{"data-svelte-h":!0}),p(ct)!=="svelte-1xo2nq1"&&(ct.textContent=es),on=i(jo),c(Ie.$$.fragment,jo),jo.forEach(t),nn=i(jt),Ce=r(jt,"LI",{});var Io=g(Ce);mt=r(Io,"P",{"data-svelte-h":!0}),p(mt)!=="svelte-1wln6n0"&&(mt.textContent=ts),sn=i(Io),c(Xe.$$.fragment,Io),Io.forEach(t),an=i(jt),We=r(jt,"LI",{});var Co=g(We);ft=r(Co,"P",{"data-svelte-h":!0}),p(ft)!=="svelte-okpmrc"&&(ft.innerHTML=os),rn=i(Co),c(Ge.$$.fragment,Co),Co.forEach(t),jt.forEach(t),fo=i(e),Pe=r(e,"P",{"data-svelte-h":!0}),p(Pe)!=="svelte-18tnclt"&&(Pe.innerHTML=ns),ho=i(e),c(W.$$.fragment,e),uo=i(e),c(Se.$$.fragment,e),go=i(e),_=r(e,"DIV",{class:!0});var y=g(_);c(Ne.$$.fragment,y),ln=i(y),ht=r(y,"P",{"data-svelte-h":!0}),p(ht)!=="svelte-1q57293"&&(ht.textContent=ss),pn=i(y),ut=r(y,"P",{"data-svelte-h":!0}),p(ut)!=="svelte-1bg3yum"&&(ut.innerHTML=is),dn=i(y),G=r(y,"DIV",{class:!0});var Xo=g(G);c($e.$$.fragment,Xo),cn=i(Xo),gt=r(Xo,"P",{"data-svelte-h":!0}),p(gt)!=="svelte-50j04k"&&(gt.textContent=as),Xo.forEach(t),mn=i(y),P=r(y,"DIV",{class:!0});var Wo=g(P);c(Le.$$.fragment,Wo),fn=i(Wo),_t=r(Wo,"P",{"data-svelte-h":!0}),p(_t)!=="svelte-1cxzr1t"&&(_t.textContent=rs),Wo.forEach(t),hn=i(y),S=r(y,"DIV",{class:!0});var Go=g(S);c(Be.$$.fragment,Go),un=i(Go),bt=r(Go,"P",{"data-svelte-h":!0}),p(bt)!=="svelte-16q0ax1"&&(bt.textContent=ls),Go.forEach(t),gn=i(y),N=r(y,"DIV",{class:!0});var Po=g(N);c(ze.$$.fragment,Po),_n=i(Po),Tt=r(Po,"P",{"data-svelte-h":!0}),p(Tt)!=="svelte-1d8vbe1"&&(Tt.textContent=ps),Po.forEach(t),y.forEach(t),_o=i(e),c(De.$$.fragment,e),bo=i(e),b=r(e,"DIV",{class:!0});var w=g(b);c(Re.$$.fragment,w),bn=i(w),yt=r(w,"P",{"data-svelte-h":!0}),p(yt)!=="svelte-pyonrv"&&(yt.textContent=ds),Tn=i(w),wt=r(w,"P",{"data-svelte-h":!0}),p(wt)!=="svelte-1bg3yum"&&(wt.innerHTML=cs),yn=i(w),$=r(w,"DIV",{class:!0});var So=g($);c(Qe.$$.fragment,So),wn=i(So),xt=r(So,"P",{"data-svelte-h":!0}),p(xt)!=="svelte-v78lg8"&&(xt.textContent=ms),So.forEach(t),xn=i(w),L=r(w,"DIV",{class:!0});var No=g(L);c(Ee.$$.fragment,No),vn=i(No),vt=r(No,"P",{"data-svelte-h":!0}),p(vt)!=="svelte-jp6j47"&&(vt.textContent=fs),No.forEach(t),Mn=i(w),B=r(w,"DIV",{class:!0});var $o=g(B);c(Ye.$$.fragment,$o),Zn=i($o),Mt=r($o,"P",{"data-svelte-h":!0}),p(Mt)!=="svelte-16q0ax1"&&(Mt.textContent=hs),$o.forEach(t),Jn=i(w),z=r(w,"DIV",{class:!0});var Lo=g(z);c(Fe.$$.fragment,Lo),Un=i(Lo),Zt=r(Lo,"P",{"data-svelte-h":!0}),p(Zt)!=="svelte-1d8vbe1"&&(Zt.textContent=us),Lo.forEach(t),w.forEach(t),To=i(e),c(He.$$.fragment,e),yo=i(e),C=r(e,"DIV",{class:!0});var Bo=g(C);c(qe.$$.fragment,Bo),kn=i(Bo),Jt=r(Bo,"P",{"data-svelte-h":!0}),p(Jt)!=="svelte-1dgz4ei"&&(Jt.textContent=gs),Bo.forEach(t),wo=i(e),c(Ae.$$.fragment,e),xo=i(e),It=r(e,"P",{}),g(It).forEach(t),this.h()},h(){x(T,"name","hf:doc:metadata"),x(T,"content",Is),x(V,"start","2"),x(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(_,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(b,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,o){n(document.head,T),l(e,D,o),l(e,Z,o),l(e,Oe,o),m(R,e,o),l(e,Gt,o),l(e,Q,o),l(e,Pt,o),l(e,E,o),l(e,St,o),l(e,Y,o),l(e,Nt,o),l(e,F,o),l(e,$t,o),l(e,H,o),l(e,Lt,o),l(e,q,o),l(e,Bt,o),l(e,A,o),l(e,zt,o),l(e,O,o),l(e,Dt,o),m(K,e,o),l(e,Rt,o),m(ee,e,o),l(e,Qt,o),l(e,te,o),l(e,Et,o),m(oe,e,o),l(e,Yt,o),l(e,ne,o),l(e,Ft,o),l(e,se,o),l(e,Ht,o),l(e,ie,o),l(e,qt,o),m(ae,e,o),l(e,At,o),l(e,Ke,o),n(Ke,X),m(re,X,null),n(X,zo),n(X,et),n(X,Do),l(e,Ot,o),m(le,e,o),l(e,Kt,o),m(pe,e,o),l(e,eo,o),l(e,de,o),l(e,to,o),l(e,tt,o),n(tt,ce),n(ce,ot),n(ce,Ro),m(me,ce,null),l(e,oo,o),l(e,V,o),n(V,j),n(j,nt),n(j,Qo),m(fe,j,null),n(j,Eo),n(j,st),n(V,Yo),n(V,he),n(he,it),n(he,Fo),m(ue,he,null),l(e,no,o),l(e,at,o),n(at,I),m(ge,I,null),n(I,Ho),n(I,rt),n(I,qo),m(_e,I,null),l(e,so,o),m(be,e,o),l(e,io,o),l(e,Te,o),l(e,ao,o),m(ye,e,o),l(e,ro,o),l(e,we,o),l(e,lo,o),l(e,U,o),n(U,xe),n(xe,lt),n(xe,Ao),m(ve,xe,null),n(U,Oo),n(U,Me),n(Me,pt),n(Me,Ko),m(Ze,Me,null),n(U,en),n(U,Je),n(Je,dt),n(Je,tn),m(Ue,Je,null),l(e,po,o),m(ke,e,o),l(e,co,o),l(e,Ve,o),l(e,mo,o),l(e,k,o),n(k,je),n(je,ct),n(je,on),m(Ie,je,null),n(k,nn),n(k,Ce),n(Ce,mt),n(Ce,sn),m(Xe,Ce,null),n(k,an),n(k,We),n(We,ft),n(We,rn),m(Ge,We,null),l(e,fo,o),l(e,Pe,o),l(e,ho,o),m(W,e,o),l(e,uo,o),m(Se,e,o),l(e,go,o),l(e,_,o),m(Ne,_,null),n(_,ln),n(_,ht),n(_,pn),n(_,ut),n(_,dn),n(_,G),m($e,G,null),n(G,cn),n(G,gt),n(_,mn),n(_,P),m(Le,P,null),n(P,fn),n(P,_t),n(_,hn),n(_,S),m(Be,S,null),n(S,un),n(S,bt),n(_,gn),n(_,N),m(ze,N,null),n(N,_n),n(N,Tt),l(e,_o,o),m(De,e,o),l(e,bo,o),l(e,b,o),m(Re,b,null),n(b,bn),n(b,yt),n(b,Tn),n(b,wt),n(b,yn),n(b,$),m(Qe,$,null),n($,wn),n($,xt),n(b,xn),n(b,L),m(Ee,L,null),n(L,vn),n(L,vt),n(b,Mn),n(b,B),m(Ye,B,null),n(B,Zn),n(B,Mt),n(b,Jn),n(b,z),m(Fe,z,null),n(z,Un),n(z,Zt),l(e,To,o),m(He,e,o),l(e,yo,o),l(e,C,o),m(qe,C,null),n(C,kn),n(C,Jt),l(e,wo,o),m(Ae,e,o),l(e,xo,o),l(e,It,o),vo=!0},p(e,[o]){const Ct={};o&2&&(Ct.$$scope={dirty:o,ctx:e}),W.$set(Ct)},i(e){vo||(f(R.$$.fragment,e),f(K.$$.fragment,e),f(ee.$$.fragment,e),f(oe.$$.fragment,e),f(ae.$$.fragment,e),f(re.$$.fragment,e),f(le.$$.fragment,e),f(pe.$$.fragment,e),f(me.$$.fragment,e),f(fe.$$.fragment,e),f(ue.$$.fragment,e),f(ge.$$.fragment,e),f(_e.$$.fragment,e),f(be.$$.fragment,e),f(ye.$$.fragment,e),f(ve.$$.fragment,e),f(Ze.$$.fragment,e),f(Ue.$$.fragment,e),f(ke.$$.fragment,e),f(Ie.$$.fragment,e),f(Xe.$$.fragment,e),f(Ge.$$.fragment,e),f(W.$$.fragment,e),f(Se.$$.fragment,e),f(Ne.$$.fragment,e),f($e.$$.fragment,e),f(Le.$$.fragment,e),f(Be.$$.fragment,e),f(ze.$$.fragment,e),f(De.$$.fragment,e),f(Re.$$.fragment,e),f(Qe.$$.fragment,e),f(Ee.$$.fragment,e),f(Ye.$$.fragment,e),f(Fe.$$.fragment,e),f(He.$$.fragment,e),f(qe.$$.fragment,e),f(Ae.$$.fragment,e),vo=!0)},o(e){h(R.$$.fragment,e),h(K.$$.fragment,e),h(ee.$$.fragment,e),h(oe.$$.fragment,e),h(ae.$$.fragment,e),h(re.$$.fragment,e),h(le.$$.fragment,e),h(pe.$$.fragment,e),h(me.$$.fragment,e),h(fe.$$.fragment,e),h(ue.$$.fragment,e),h(ge.$$.fragment,e),h(_e.$$.fragment,e),h(be.$$.fragment,e),h(ye.$$.fragment,e),h(ve.$$.fragment,e),h(Ze.$$.fragment,e),h(Ue.$$.fragment,e),h(ke.$$.fragment,e),h(Ie.$$.fragment,e),h(Xe.$$.fragment,e),h(Ge.$$.fragment,e),h(W.$$.fragment,e),h(Se.$$.fragment,e),h(Ne.$$.fragment,e),h($e.$$.fragment,e),h(Le.$$.fragment,e),h(Be.$$.fragment,e),h(ze.$$.fragment,e),h(De.$$.fragment,e),h(Re.$$.fragment,e),h(Qe.$$.fragment,e),h(Ee.$$.fragment,e),h(Ye.$$.fragment,e),h(Fe.$$.fragment,e),h(He.$$.fragment,e),h(qe.$$.fragment,e),h(Ae.$$.fragment,e),vo=!1},d(e){e&&(t(D),t(Z),t(Oe),t(Gt),t(Q),t(Pt),t(E),t(St),t(Y),t(Nt),t(F),t($t),t(H),t(Lt),t(q),t(Bt),t(A),t(zt),t(O),t(Dt),t(Rt),t(Qt),t(te),t(Et),t(Yt),t(ne),t(Ft),t(se),t(Ht),t(ie),t(qt),t(At),t(Ke),t(Ot),t(Kt),t(eo),t(de),t(to),t(tt),t(oo),t(V),t(no),t(at),t(so),t(io),t(Te),t(ao),t(ro),t(we),t(lo),t(U),t(po),t(co),t(Ve),t(mo),t(k),t(fo),t(Pe),t(ho),t(uo),t(go),t(_),t(_o),t(bo),t(b),t(To),t(yo),t(C),t(wo),t(xo),t(It)),t(T),u(R,e),u(K,e),u(ee,e),u(oe,e),u(ae,e),u(re),u(le,e),u(pe,e),u(me),u(fe),u(ue),u(ge),u(_e),u(be,e),u(ye,e),u(ve),u(Ze),u(Ue),u(ke,e),u(Ie),u(Xe),u(Ge),u(W,e),u(Se,e),u(Ne),u($e),u(Le),u(Be),u(ze),u(De,e),u(Re),u(Qe),u(Ee),u(Ye),u(Fe),u(He,e),u(qe),u(Ae,e)}}}const Is='{"title":"Text2Video-Zero","local":"text2video-zero","sections":[{"title":"Usage example","local":"usage-example","sections":[{"title":"Text-To-Video","local":"text-to-video","sections":[{"title":"SDXL Support","local":"sdxl-support","sections":[],"depth":4}],"depth":3},{"title":"Text-To-Video with Pose Control","local":"text-to-video-with-pose-control","sections":[{"title":"SDXL Support","local":"sdxl-support","sections":[],"depth":4}],"depth":3},{"title":"Text-To-Video with Edge Control","local":"text-to-video-with-edge-control","sections":[],"depth":3},{"title":"Video Instruct-Pix2Pix","local":"video-instruct-pix2pix","sections":[],"depth":3},{"title":"DreamBooth specialization","local":"dreambooth-specialization","sections":[],"depth":3}],"depth":2},{"title":"TextToVideoZeroPipeline","local":"diffusers.TextToVideoZeroPipeline","sections":[],"depth":2},{"title":"TextToVideoZeroSDXLPipeline","local":"diffusers.TextToVideoZeroSDXLPipeline","sections":[],"depth":2},{"title":"TextToVideoPipelineOutput","local":"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput","sections":[],"depth":2}],"depth":1}';function Cs(Wt){return xs(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class $s extends Ms{constructor(T){super(),Zs(this,T,Cs,js,ws,{})}}export{$s as component}; | |
Xet Storage Details
- Size:
- 104 kB
- Xet hash:
- cc118d7949c3f534219ace5600dc54c21ee5c88ea06fa77c03626a17e3a6f080
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.