Buckets:
| import{s as Ms,o as Zs,n as Js}from"../chunks/scheduler.8c3d61f6.js";import{S as ks,i as Us,g as a,s,r as d,m as xs,A as Vs,h as r,f as t,c as i,j as g,u as c,x as p,n as vs,k as y,y as n,a as l,v as m,d as f,t as h,w as u}from"../chunks/index.da70eac4.js";import{T as js}from"../chunks/Tip.1d9b8c37.js";import{D as J}from"../chunks/Docstring.9419aa1d.js";import{C as v}from"../chunks/CodeBlock.a9c4becf.js";import{H as M,E as Is}from"../chunks/getInferenceSnippets.39110341.js";function Cs(Gt){let b,R='Make sure to check out the Schedulers <a href="../../using-diffusers/schedulers">guide</a> to learn how to explore the tradeoff between scheduler speed and quality, and see the <a href="../../using-diffusers/loading#reuse-a-pipeline">reuse components across pipelines</a> section to learn how to efficiently load the same components into multiple pipelines.';return{c(){b=a("p"),b.innerHTML=R},l(Z){b=r(Z,"P",{"data-svelte-h":!0}),p(b)!=="svelte-1qn15hi"&&(b.innerHTML=R)},m(Z,Ke){l(Z,b,Ke)},p:Js,d(Z){Z&&t(b)}}}function Xs(Gt){let b,R,Z,Ke,Q,Pt,X,In='<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>',St,E,Cn='<a href="https://huggingface.co/papers/2303.13439" rel="nofollow">Text2Video-Zero: Text-to-Image Diffusion Models are Zero-Shot Video Generators</a> is by Levon Khachatryan, Andranik Movsisyan, Vahram Tadevosyan, Roberto Henschel, <a href="https://www.ece.utexas.edu/people/faculty/atlas-wang" rel="nofollow">Zhangyang Wang</a>, Shant Navasardyan, <a href="https://www.humphreyshi.com" rel="nofollow">Humphrey Shi</a>.',Nt,Y,Xn="Text2Video-Zero enables zero-shot video generation using either:",$t,F,Wn="<li>A textual prompt</li> <li>A prompt combined with guidance from poses or edges</li> <li>Video Instruct-Pix2Pix (instruction-guided video editing)</li>",Lt,H,Gn="Results are temporally consistent and closely follow the guidance and textual prompts.",Bt,q,Pn='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/t2v_zero_teaser.png" alt="teaser-img"/>',zt,A,Sn="The abstract from the paper is:",Dt,O,Nn=`<em>Recent text-to-video generation approaches rely on computationally heavy training and require large-scale video datasets. In this paper, we introduce a new task of zero-shot text-to-video generation and propose a low-cost approach (without any training or optimization) by leveraging the power of existing text-to-image synthesis methods (e.g., Stable Diffusion), making them suitable for the video domain. | |
| Our key modifications include (i) enriching the latent codes of the generated frames with motion dynamics to keep the global scene and the background time consistent; and (ii) reprogramming frame-level self-attention using a new cross-frame attention of each frame on the first frame, to preserve the context, appearance, and identity of the foreground object. | |
| Experiments show that this leads to low overhead, yet high-quality and remarkably consistent video generation. Moreover, our approach is not limited to text-to-video synthesis but is also applicable to other tasks such as conditional and content-specialized video generation, and Video Instruct-Pix2Pix, i.e., instruction-guided video editing. | |
| As experiments show, our method performs comparably or sometimes better than recent approaches, despite not being trained on additional video data.</em>`,Rt,K,$n='You can find additional information about Text2Video-Zero on the <a href="https://text2video-zero.github.io/" rel="nofollow">project page</a>, <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, and <a href="https://github.com/Picsart-AI-Research/Text2Video-Zero" rel="nofollow">original codebase</a>.',Qt,ee,Et,te,Yt,oe,Ln="To generate a video from prompt, run the following Python code:",Ft,ne,Ht,se,Bn="You can change these parameters in the pipeline call:",qt,ie,zn='<li>Motion field strength (see the <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1):<ul><li><code>motion_field_strength_x</code> and <code>motion_field_strength_y</code>. Default: <code>motion_field_strength_x=12</code>, <code>motion_field_strength_y=12</code></li></ul></li> <li><code>T</code> and <code>T'</code> (see the <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1)<ul><li><code>t0</code> and <code>t1</code> in the range <code>{0, ..., num_inference_steps}</code>. Default: <code>t0=45</code>, <code>t1=48</code></li></ul></li> <li>Video length:<ul><li><code>video_length</code>, the number of frames video_length to be generated. Default: <code>video_length=8</code></li></ul></li>',At,ae,Dn="We can also generate longer videos by doing the processing in a chunk-by-chunk manner:",Ot,re,Kt,et,W,le,Ro,tt,Rn="TextToVideoZeroSDXLPipeline",Qo,eo,pe,to,de,oo,ce,Qn="To generate a video from prompt with additional pose control",no,ot,me,nt,En="Download a demo video",Eo,fe,so,V,j,st,Yn="Read video containing extracted pose images",Yo,he,Fo,it,Fn='To extract pose from actual video, read <a href="controlnet">ControlNet documentation</a>.',Ho,ue,at,Hn="Run <code>StableDiffusionControlNetPipeline</code> with our custom attention processor",qo,ge,io,rt,I,_e,Ao,lt,qn="Since our attention processor also works with SDXL, it can be utilized to generate a video from prompt using ControlNet models powered by SDXL:",Oo,Te,ao,be,ro,ye,An='To generate a video from prompt with additional Canny edge control, follow the same steps described above for pose-guided generation using <a href="https://huggingface.co/lllyasviel/sd-controlnet-canny" rel="nofollow">Canny edge ControlNet model</a>.',lo,we,po,xe,On='To perform text-guided video editing (with <a href="pix2pix">InstructPix2Pix</a>):',co,k,ve,pt,Kn="Download a demo video",Ko,Me,en,Ze,dt,es="Read video from path",tn,Je,on,ke,ct,ts="Run <code>StableDiffusionInstructPix2PixPipeline</code> with our custom attention processor",nn,Ue,mo,Ve,fo,je,os=`Methods <strong>Text-To-Video</strong>, <strong>Text-To-Video with Pose Control</strong> and <strong>Text-To-Video with Edge Control</strong> | |
| can run with custom <a href="../../training/dreambooth">DreamBooth</a> models, as shown below for | |
| <a href="https://huggingface.co/lllyasviel/sd-controlnet-canny" rel="nofollow">Canny edge ControlNet model</a> and | |
| <a href="https://huggingface.co/PAIR/text2video-zero-controlnet-canny-avatar" rel="nofollow">Avatar style DreamBooth</a> model:`,ho,U,Ie,mt,ns="Download a demo video",sn,Ce,an,Xe,ft,ss="Read video from path",rn,We,ln,Ge,ht,is="Run <code>StableDiffusionControlNetPipeline</code> with custom trained DreamBooth model",pn,Pe,uo,Se,as='You can filter out some available DreamBooth-trained models with <a href="https://huggingface.co/models?search=dreambooth" rel="nofollow">this link</a>.',go,G,_o,Ne,To,_,$e,dn,ut,rs="Pipeline for zero-shot text-to-video generation using Stable Diffusion.",cn,gt,ls=`This model inherits from <a href="/docs/diffusers/pr_11660/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods | |
| implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,mn,P,Le,fn,_t,ps="The call function to the pipeline for generation.",hn,S,Be,un,Tt,ds="Perform backward process given list of time steps.",gn,N,ze,_n,bt,cs="Encodes the prompt into text encoder hidden states.",Tn,$,De,bn,yt,ms="Perform DDPM forward process from time t0 to t1. This is the same as adding noise with corresponding variance.",bo,Re,yo,T,Qe,yn,wt,fs="Pipeline for zero-shot text-to-video generation using Stable Diffusion XL.",wn,xt,hs=`This model inherits from <a href="/docs/diffusers/pr_11660/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods | |
| implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,xn,L,Ee,vn,vt,us="Function invoked when calling the pipeline for generation.",Mn,B,Ye,Zn,Mt,gs="Perform backward process given list of time steps",Jn,z,Fe,kn,Zt,_s="Encodes the prompt into text encoder hidden states.",Un,D,He,Vn,Jt,Ts="Perform DDPM forward process from time t0 to t1. This is the same as adding noise with corresponding variance.",wo,qe,xo,C,Ae,jn,kt,bs="Output class for zero-shot text-to-video pipeline.",vo,Oe,Mo,Ct,Zo;return Q=new M({props:{title:"Text2Video-Zero",local:"text2video-zero",headingTag:"h1"}}),ee=new M({props:{title:"Usage example",local:"usage-example",headingTag:"h2"}}),te=new M({props:{title:"Text-To-Video",local:"text-to-video",headingTag:"h3"}}),ne=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9aZXJvUGlwZWxpbmUlMEFpbXBvcnQlMjBpbWFnZWlvJTBBJTBBbW9kZWxfaWQlMjAlM0QlMjAlMjJzdGFibGUtZGlmZnVzaW9uLXYxLTUlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIlMEFwaXBlJTIwJTNEJTIwVGV4dFRvVmlkZW9aZXJvUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2lkJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2KS50byglMjJjdWRhJTIyKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkElMjBwYW5kYSUyMGlzJTIwcGxheWluZyUyMGd1aXRhciUyMG9uJTIwdGltZXMlMjBzcXVhcmUlMjIlMEFyZXN1bHQlMjAlM0QlMjBwaXBlKHByb21wdCUzRHByb21wdCkuaW1hZ2VzJTBBcmVzdWx0JTIwJTNEJTIwJTVCKHIlMjAqJTIwMjU1KS5hc3R5cGUoJTIydWludDglMjIpJTIwZm9yJTIwciUyMGluJTIwcmVzdWx0JTVEJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoZeroPipeline | |
| <span class="hljs-keyword">import</span> imageio | |
| model_id = <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span> | |
| pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"A panda is playing guitar on times square"</span> | |
| result = pipe(prompt=prompt).images | |
| result = [(r * <span class="hljs-number">255</span>).astype(<span class="hljs-string">"uint8"</span>) <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> result] | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),re=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9aZXJvUGlwZWxpbmUlMEFpbXBvcnQlMjBudW1weSUyMGFzJTIwbnAlMEElMEFtb2RlbF9pZCUyMCUzRCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUwQXBpcGUlMjAlM0QlMjBUZXh0VG9WaWRlb1plcm9QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnRvKCUyMmN1ZGElMjIpJTBBc2VlZCUyMCUzRCUyMDAlMEF2aWRlb19sZW5ndGglMjAlM0QlMjAyNCUyMCUyMCUyMzI0JTIwJUMzJUI3JTIwNGZwcyUyMCUzRCUyMDYlMjBzZWNvbmRzJTBBY2h1bmtfc2l6ZSUyMCUzRCUyMDglMEFwcm9tcHQlMjAlM0QlMjAlMjJBJTIwcGFuZGElMjBpcyUyMHBsYXlpbmclMjBndWl0YXIlMjBvbiUyMHRpbWVzJTIwc3F1YXJlJTIyJTBBJTBBJTIzJTIwR2VuZXJhdGUlMjB0aGUlMjB2aWRlbyUyMGNodW5rLWJ5LWNodW5rJTBBcmVzdWx0JTIwJTNEJTIwJTVCJTVEJTBBY2h1bmtfaWRzJTIwJTNEJTIwbnAuYXJhbmdlKDAlMkMlMjB2aWRlb19sZW5ndGglMkMlMjBjaHVua19zaXplJTIwLSUyMDEpJTBBZ2VuZXJhdG9yJTIwJTNEJTIwdG9yY2guR2VuZXJhdG9yKGRldmljZSUzRCUyMmN1ZGElMjIpJTBBZm9yJTIwaSUyMGluJTIwcmFuZ2UobGVuKGNodW5rX2lkcykpJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMlByb2Nlc3NpbmclMjBjaHVuayUyMCU3QmklMjAlMkIlMjAxJTdEJTIwJTJGJTIwJTdCbGVuKGNodW5rX2lkcyklN0QlMjIpJTBBJTIwJTIwJTIwJTIwY2hfc3RhcnQlMjAlM0QlMjBjaHVua19pZHMlNUJpJTVEJTBBJTIwJTIwJTIwJTIwY2hfZW5kJTIwJTNEJTIwdmlkZW9fbGVuZ3RoJTIwaWYlMjBpJTIwJTNEJTNEJTIwbGVuKGNodW5rX2lkcyklMjAtJTIwMSUyMGVsc2UlMjBjaHVua19pZHMlNUJpJTIwJTJCJTIwMSU1RCUwQSUyMCUyMCUyMCUyMCUyMyUyMEF0dGFjaCUyMHRoZSUyMGZpcnN0JTIwZnJhbWUlMjBmb3IlMjBDcm9zcyUyMEZyYW1lJTIwQXR0ZW50aW9uJTBBJTIwJTIwJTIwJTIwZnJhbWVfaWRzJTIwJTNEJTIwJTVCMCU1RCUyMCUyQiUyMGxpc3QocmFuZ2UoY2hfc3RhcnQlMkMlMjBjaF9lbmQpKSUwQSUyMCUyMCUyMCUyMCUyMyUyMEZpeCUyMHRoZSUyMHNlZWQlMjBmb3IlMjB0aGUlMjB0ZW1wb3JhbCUyMGNvbnNpc3RlbmN5JTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yLm1hbnVhbF9zZWVkKHNlZWQpJTBBJTIwJTIwJTIwJTIwb3V0cHV0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0Rwcm9tcHQlMkMlMjB2aWRlb19sZW5ndGglM0RsZW4oZnJhbWVfaWRzKSUyQyUyMGdlbmVyYXRvciUzRGdlbmVyYXRvciUyQyUyMGZyYW1lX2lkcyUzRGZyYW1lX2lkcyklMEElMjAlMjAlMjAlMjByZXN1bHQuYXBwZW5kKG91dHB1dC5pbWFnZXMlNUIxJTNBJTVEKSUwQSUwQSUyMyUyMENvbmNhdGVuYXRlJTIwY2h1bmtzJTIwYW5kJTIwc2F2ZSUwQXJlc3VsdCUyMCUzRCUyMG5wLmNvbmNhdGVuYXRlKHJlc3VsdCklMEFyZXN1bHQlMjAlM0QlMjAlNUIociUyMColMjAyNTUpLmFzdHlwZSglMjJ1aW50OCUyMiklMjBmb3IlMjByJTIwaW4lMjByZXN1bHQlNUQlMEFpbWFnZWlvLm1pbXNhdmUoJTIydmlkZW8ubXA0JTIyJTJDJTIwcmVzdWx0JTJDJTIwZnBzJTNENCk=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoZeroPipeline | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| model_id = <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span> | |
| pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| seed = <span class="hljs-number">0</span> | |
| video_length = <span class="hljs-number">24</span> <span class="hljs-comment">#24 ÷ 4fps = 6 seconds</span> | |
| chunk_size = <span class="hljs-number">8</span> | |
| prompt = <span class="hljs-string">"A panda is playing guitar on times square"</span> | |
| <span class="hljs-comment"># Generate the video chunk-by-chunk</span> | |
| result = [] | |
| chunk_ids = np.arange(<span class="hljs-number">0</span>, video_length, chunk_size - <span class="hljs-number">1</span>) | |
| generator = torch.Generator(device=<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-built_in">len</span>(chunk_ids)): | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Processing chunk <span class="hljs-subst">{i + <span class="hljs-number">1</span>}</span> / <span class="hljs-subst">{<span class="hljs-built_in">len</span>(chunk_ids)}</span>"</span>) | |
| ch_start = chunk_ids[i] | |
| ch_end = video_length <span class="hljs-keyword">if</span> i == <span class="hljs-built_in">len</span>(chunk_ids) - <span class="hljs-number">1</span> <span class="hljs-keyword">else</span> chunk_ids[i + <span class="hljs-number">1</span>] | |
| <span class="hljs-comment"># Attach the first frame for Cross Frame Attention</span> | |
| frame_ids = [<span class="hljs-number">0</span>] + <span class="hljs-built_in">list</span>(<span class="hljs-built_in">range</span>(ch_start, ch_end)) | |
| <span class="hljs-comment"># Fix the seed for the temporal consistency</span> | |
| generator.manual_seed(seed) | |
| output = pipe(prompt=prompt, video_length=<span class="hljs-built_in">len</span>(frame_ids), generator=generator, frame_ids=frame_ids) | |
| result.append(output.images[<span class="hljs-number">1</span>:]) | |
| <span class="hljs-comment"># Concatenate chunks and save</span> | |
| result = np.concatenate(result) | |
| result = [(r * <span class="hljs-number">255</span>).astype(<span class="hljs-string">"uint8"</span>) <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> result] | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),le=new M({props:{title:"SDXL Support",local:"sdxl-support",headingTag:"h4"}}),pe=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9aZXJvU0RYTFBpcGVsaW5lJTBBJTBBbW9kZWxfaWQlMjAlM0QlMjAlMjJzdGFiaWxpdHlhaSUyRnN0YWJsZS1kaWZmdXNpb24teGwtYmFzZS0xLjAlMjIlMEFwaXBlJTIwJTNEJTIwVGV4dFRvVmlkZW9aZXJvU0RYTFBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9pZCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUyMHZhcmlhbnQlM0QlMjJmcDE2JTIyJTJDJTIwdXNlX3NhZmV0ZW5zb3JzJTNEVHJ1ZSUwQSkudG8oJTIyY3VkYSUyMik=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoZeroSDXLPipeline | |
| model_id = <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span> | |
| pipe = TextToVideoZeroSDXLPipeline.from_pretrained( | |
| model_id, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>, use_safetensors=<span class="hljs-literal">True</span> | |
| ).to(<span class="hljs-string">"cuda"</span>)`,wrap:!1}}),de=new M({props:{title:"Text-To-Video with Pose Control",local:"text-to-video-with-pose-control",headingTag:"h3"}}),fe=new v({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQSUwQWZpbGVuYW1lJTIwJTNEJTIwJTIyX19hc3NldHNfXyUyRnBvc2VzX3NrZWxldG9uX2dpZnMlMkZkYW5jZTFfY29yci5tcDQlMjIlMEFyZXBvX2lkJTIwJTNEJTIwJTIyUEFJUiUyRlRleHQyVmlkZW8tWmVybyUyMiUwQXZpZGVvX3BhdGglMjAlM0QlMjBoZl9odWJfZG93bmxvYWQocmVwb190eXBlJTNEJTIyc3BhY2UlMjIlMkMlMjByZXBvX2lkJTNEcmVwb19pZCUyQyUyMGZpbGVuYW1lJTNEZmlsZW5hbWUp",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download | |
| filename = <span class="hljs-string">"__assets__/poses_skeleton_gifs/dance1_corr.mp4"</span> | |
| repo_id = <span class="hljs-string">"PAIR/Text2Video-Zero"</span> | |
| video_path = hf_hub_download(repo_type=<span class="hljs-string">"space"</span>, repo_id=repo_id, filename=filename)`,wrap:!1}}),he=new v({props:{code:"ZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBaW1wb3J0JTIwaW1hZ2VpbyUwQSUwQXJlYWRlciUyMCUzRCUyMGltYWdlaW8uZ2V0X3JlYWRlcih2aWRlb19wYXRoJTJDJTIwJTIyZmZtcGVnJTIyKSUwQWZyYW1lX2NvdW50JTIwJTNEJTIwOCUwQXBvc2VfaW1hZ2VzJTIwJTNEJTIwJTVCSW1hZ2UuZnJvbWFycmF5KHJlYWRlci5nZXRfZGF0YShpKSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShmcmFtZV9jb3VudCklNUQ=",highlighted:`<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">import</span> imageio | |
| reader = imageio.get_reader(video_path, <span class="hljs-string">"ffmpeg"</span>) | |
| frame_count = <span class="hljs-number">8</span> | |
| pose_images = [Image.fromarray(reader.get_data(i)) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(frame_count)]`,wrap:!1}}),ge=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uQ29udHJvbE5ldFBpcGVsaW5lJTJDJTIwQ29udHJvbE5ldE1vZGVsJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMudGV4dF90b192aWRlb19zeW50aGVzaXMucGlwZWxpbmVfdGV4dF90b192aWRlb196ZXJvJTIwaW1wb3J0JTIwQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IlMEElMEFtb2RlbF9pZCUyMCUzRCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUwQWNvbnRyb2xuZXQlMjAlM0QlMjBDb250cm9sTmV0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMmxsbHlhc3ZpZWwlMkZzZC1jb250cm9sbmV0LW9wZW5wb3NlJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2KSUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25Db250cm9sTmV0UGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTIwY29udHJvbG5ldCUzRGNvbnRyb2xuZXQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwU2V0JTIwdGhlJTIwYXR0ZW50aW9uJTIwcHJvY2Vzc29yJTBBcGlwZS51bmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBcGlwZS5jb250cm9sbmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBJTBBJTIzJTIwZml4JTIwbGF0ZW50cyUyMGZvciUyMGFsbCUyMGZyYW1lcyUwQWxhdGVudHMlMjAlM0QlMjB0b3JjaC5yYW5kbigoMSUyQyUyMDQlMkMlMjA2NCUyQyUyMDY0KSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnJlcGVhdChsZW4ocG9zZV9pbWFnZXMpJTJDJTIwMSUyQyUyMDElMkMlMjAxKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkRhcnRoJTIwVmFkZXIlMjBkYW5jaW5nJTIwaW4lMjBhJTIwZGVzZXJ0JTIyJTBBcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0QlNUJwcm9tcHQlNUQlMjAqJTIwbGVuKHBvc2VfaW1hZ2VzKSUyQyUyMGltYWdlJTNEcG9zZV9pbWFnZXMlMkMlMjBsYXRlbnRzJTNEbGF0ZW50cykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionControlNetPipeline, ControlNetModel | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| model_id = <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span> | |
| controlnet = ControlNetModel.from_pretrained(<span class="hljs-string">"lllyasviel/sd-controlnet-openpose"</span>, torch_dtype=torch.float16) | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| model_id, controlnet=controlnet, torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># Set the attention processor</span> | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| pipe.controlnet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| <span class="hljs-comment"># fix latents for all frames</span> | |
| latents = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">4</span>, <span class="hljs-number">64</span>, <span class="hljs-number">64</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16).repeat(<span class="hljs-built_in">len</span>(pose_images), <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>) | |
| prompt = <span class="hljs-string">"Darth Vader dancing in a desert"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(pose_images), image=pose_images, latents=latents).images | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),_e=new M({props:{title:"SDXL Support",local:"sdxl-support",headingTag:"h4"}}),Te=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uWExDb250cm9sTmV0UGlwZWxpbmUlMkMlMjBDb250cm9sTmV0TW9kZWwlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy50ZXh0X3RvX3ZpZGVvX3N5bnRoZXNpcy5waXBlbGluZV90ZXh0X3RvX3ZpZGVvX3plcm8lMjBpbXBvcnQlMjBDcm9zc0ZyYW1lQXR0blByb2Nlc3NvciUwQSUwQWNvbnRyb2xuZXRfbW9kZWxfaWQlMjAlM0QlMjAndGhpYmF1ZCUyRmNvbnRyb2xuZXQtb3BlbnBvc2Utc2R4bC0xLjAnJTBBbW9kZWxfaWQlMjAlM0QlMjAnc3RhYmlsaXR5YWklMkZzdGFibGUtZGlmZnVzaW9uLXhsLWJhc2UtMS4wJyUwQSUwQWNvbnRyb2xuZXQlMjAlM0QlMjBDb250cm9sTmV0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKGNvbnRyb2xuZXRfbW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpJTBBcGlwZSUyMCUzRCUyMFN0YWJsZURpZmZ1c2lvbkNvbnRyb2xOZXRQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTA5bW9kZWxfaWQlMkMlMjBjb250cm9sbmV0JTNEY29udHJvbG5ldCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUwQSkudG8oJ2N1ZGEnKSUwQSUwQSUyMyUyMFNldCUyMHRoZSUyMGF0dGVudGlvbiUyMHByb2Nlc3NvciUwQXBpcGUudW5ldC5zZXRfYXR0bl9wcm9jZXNzb3IoQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IoYmF0Y2hfc2l6ZSUzRDIpKSUwQXBpcGUuY29udHJvbG5ldC5zZXRfYXR0bl9wcm9jZXNzb3IoQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IoYmF0Y2hfc2l6ZSUzRDIpKSUwQSUwQSUyMyUyMGZpeCUyMGxhdGVudHMlMjBmb3IlMjBhbGwlMjBmcmFtZXMlMEFsYXRlbnRzJTIwJTNEJTIwdG9yY2gucmFuZG4oKDElMkMlMjA0JTJDJTIwMTI4JTJDJTIwMTI4KSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnJlcGVhdChsZW4ocG9zZV9pbWFnZXMpJTJDJTIwMSUyQyUyMDElMkMlMjAxKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkRhcnRoJTIwVmFkZXIlMjBkYW5jaW5nJTIwaW4lMjBhJTIwZGVzZXJ0JTIyJTBBcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0QlNUJwcm9tcHQlNUQlMjAqJTIwbGVuKHBvc2VfaW1hZ2VzKSUyQyUyMGltYWdlJTNEcG9zZV9pbWFnZXMlMkMlMjBsYXRlbnRzJTNEbGF0ZW50cykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionXLControlNetPipeline, ControlNetModel | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| controlnet_model_id = <span class="hljs-string">'thibaud/controlnet-openpose-sdxl-1.0'</span> | |
| model_id = <span class="hljs-string">'stabilityai/stable-diffusion-xl-base-1.0'</span> | |
| controlnet = ControlNetModel.from_pretrained(controlnet_model_id, torch_dtype=torch.float16) | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| model_id, controlnet=controlnet, torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">'cuda'</span>) | |
| <span class="hljs-comment"># Set the attention processor</span> | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| pipe.controlnet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| <span class="hljs-comment"># fix latents for all frames</span> | |
| latents = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">4</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16).repeat(<span class="hljs-built_in">len</span>(pose_images), <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>) | |
| prompt = <span class="hljs-string">"Darth Vader dancing in a desert"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(pose_images), image=pose_images, latents=latents).images | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),be=new M({props:{title:"Text-To-Video with Edge Control",local:"text-to-video-with-edge-control",headingTag:"h3"}}),we=new M({props:{title:"Video Instruct-Pix2Pix",local:"video-instruct-pix2pix",headingTag:"h3"}}),Me=new v({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQSUwQWZpbGVuYW1lJTIwJTNEJTIwJTIyX19hc3NldHNfXyUyRnBpeDJwaXglMjB2aWRlbyUyRmNhbWVsLm1wNCUyMiUwQXJlcG9faWQlMjAlM0QlMjAlMjJQQUlSJTJGVGV4dDJWaWRlby1aZXJvJTIyJTBBdmlkZW9fcGF0aCUyMCUzRCUyMGhmX2h1Yl9kb3dubG9hZChyZXBvX3R5cGUlM0QlMjJzcGFjZSUyMiUyQyUyMHJlcG9faWQlM0RyZXBvX2lkJTJDJTIwZmlsZW5hbWUlM0RmaWxlbmFtZSk=",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download | |
| filename = <span class="hljs-string">"__assets__/pix2pix video/camel.mp4"</span> | |
| repo_id = <span class="hljs-string">"PAIR/Text2Video-Zero"</span> | |
| video_path = hf_hub_download(repo_type=<span class="hljs-string">"space"</span>, repo_id=repo_id, filename=filename)`,wrap:!1}}),Je=new v({props:{code:"ZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBaW1wb3J0JTIwaW1hZ2VpbyUwQSUwQXJlYWRlciUyMCUzRCUyMGltYWdlaW8uZ2V0X3JlYWRlcih2aWRlb19wYXRoJTJDJTIwJTIyZmZtcGVnJTIyKSUwQWZyYW1lX2NvdW50JTIwJTNEJTIwOCUwQXZpZGVvJTIwJTNEJTIwJTVCSW1hZ2UuZnJvbWFycmF5KHJlYWRlci5nZXRfZGF0YShpKSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShmcmFtZV9jb3VudCklNUQ=",highlighted:`<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">import</span> imageio | |
| reader = imageio.get_reader(video_path, <span class="hljs-string">"ffmpeg"</span>) | |
| frame_count = <span class="hljs-number">8</span> | |
| video = [Image.fromarray(reader.get_data(i)) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(frame_count)]`,wrap:!1}}),Ue=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uSW5zdHJ1Y3RQaXgyUGl4UGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy50ZXh0X3RvX3ZpZGVvX3N5bnRoZXNpcy5waXBlbGluZV90ZXh0X3RvX3ZpZGVvX3plcm8lMjBpbXBvcnQlMjBDcm9zc0ZyYW1lQXR0blByb2Nlc3NvciUwQSUwQW1vZGVsX2lkJTIwJTNEJTIwJTIydGltYnJvb2tzJTJGaW5zdHJ1Y3QtcGl4MnBpeCUyMiUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25JbnN0cnVjdFBpeDJQaXhQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnRvKCUyMmN1ZGElMjIpJTBBcGlwZS51bmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMykpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIybWFrZSUyMGl0JTIwVmFuJTIwR29naCUyMFN0YXJyeSUyME5pZ2h0JTIwc3R5bGUlMjIlMEFyZXN1bHQlMjAlM0QlMjBwaXBlKHByb21wdCUzRCU1QnByb21wdCU1RCUyMColMjBsZW4odmlkZW8pJTJDJTIwaW1hZ2UlM0R2aWRlbykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMmVkaXRlZF92aWRlby5tcDQlMjIlMkMlMjByZXN1bHQlMkMlMjBmcHMlM0Q0KQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionInstructPix2PixPipeline | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| model_id = <span class="hljs-string">"timbrooks/instruct-pix2pix"</span> | |
| pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">3</span>)) | |
| prompt = <span class="hljs-string">"make it Van Gogh Starry Night style"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(video), image=video).images | |
| imageio.mimsave(<span class="hljs-string">"edited_video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),Ve=new M({props:{title:"DreamBooth specialization",local:"dreambooth-specialization",headingTag:"h3"}}),Ce=new v({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQSUwQWZpbGVuYW1lJTIwJTNEJTIwJTIyX19hc3NldHNfXyUyRmNhbm55X3ZpZGVvc19tcDQlMkZnaXJsX3R1cm5pbmcubXA0JTIyJTBBcmVwb19pZCUyMCUzRCUyMCUyMlBBSVIlMkZUZXh0MlZpZGVvLVplcm8lMjIlMEF2aWRlb19wYXRoJTIwJTNEJTIwaGZfaHViX2Rvd25sb2FkKHJlcG9fdHlwZSUzRCUyMnNwYWNlJTIyJTJDJTIwcmVwb19pZCUzRHJlcG9faWQlMkMlMjBmaWxlbmFtZSUzRGZpbGVuYW1lKQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download | |
| filename = <span class="hljs-string">"__assets__/canny_videos_mp4/girl_turning.mp4"</span> | |
| repo_id = <span class="hljs-string">"PAIR/Text2Video-Zero"</span> | |
| video_path = hf_hub_download(repo_type=<span class="hljs-string">"space"</span>, repo_id=repo_id, filename=filename)`,wrap:!1}}),We=new v({props:{code:"ZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBaW1wb3J0JTIwaW1hZ2VpbyUwQSUwQXJlYWRlciUyMCUzRCUyMGltYWdlaW8uZ2V0X3JlYWRlcih2aWRlb19wYXRoJTJDJTIwJTIyZmZtcGVnJTIyKSUwQWZyYW1lX2NvdW50JTIwJTNEJTIwOCUwQWNhbm55X2VkZ2VzJTIwJTNEJTIwJTVCSW1hZ2UuZnJvbWFycmF5KHJlYWRlci5nZXRfZGF0YShpKSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShmcmFtZV9jb3VudCklNUQ=",highlighted:`<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">import</span> imageio | |
| reader = imageio.get_reader(video_path, <span class="hljs-string">"ffmpeg"</span>) | |
| frame_count = <span class="hljs-number">8</span> | |
| canny_edges = [Image.fromarray(reader.get_data(i)) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(frame_count)]`,wrap:!1}}),Pe=new v({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uQ29udHJvbE5ldFBpcGVsaW5lJTJDJTIwQ29udHJvbE5ldE1vZGVsJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMudGV4dF90b192aWRlb19zeW50aGVzaXMucGlwZWxpbmVfdGV4dF90b192aWRlb196ZXJvJTIwaW1wb3J0JTIwQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IlMEElMEElMjMlMjBzZXQlMjBtb2RlbCUyMGlkJTIwdG8lMjBjdXN0b20lMjBtb2RlbCUwQW1vZGVsX2lkJTIwJTNEJTIwJTIyUEFJUiUyRnRleHQydmlkZW8temVyby1jb250cm9sbmV0LWNhbm55LWF2YXRhciUyMiUwQWNvbnRyb2xuZXQlMjAlM0QlMjBDb250cm9sTmV0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMmxsbHlhc3ZpZWwlMkZzZC1jb250cm9sbmV0LWNhbm55JTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2KSUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25Db250cm9sTmV0UGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTIwY29udHJvbG5ldCUzRGNvbnRyb2xuZXQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwU2V0JTIwdGhlJTIwYXR0ZW50aW9uJTIwcHJvY2Vzc29yJTBBcGlwZS51bmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBcGlwZS5jb250cm9sbmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBJTBBJTIzJTIwZml4JTIwbGF0ZW50cyUyMGZvciUyMGFsbCUyMGZyYW1lcyUwQWxhdGVudHMlMjAlM0QlMjB0b3JjaC5yYW5kbigoMSUyQyUyMDQlMkMlMjA2NCUyQyUyMDY0KSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnJlcGVhdChsZW4oY2FubnlfZWRnZXMpJTJDJTIwMSUyQyUyMDElMkMlMjAxKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMm9pbCUyMHBhaW50aW5nJTIwb2YlMjBhJTIwYmVhdXRpZnVsJTIwZ2lybCUyMGF2YXRhciUyMHN0eWxlJTIyJTBBcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0QlNUJwcm9tcHQlNUQlMjAqJTIwbGVuKGNhbm55X2VkZ2VzKSUyQyUyMGltYWdlJTNEY2FubnlfZWRnZXMlMkMlMjBsYXRlbnRzJTNEbGF0ZW50cykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionControlNetPipeline, ControlNetModel | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| <span class="hljs-comment"># set model id to custom model</span> | |
| model_id = <span class="hljs-string">"PAIR/text2video-zero-controlnet-canny-avatar"</span> | |
| controlnet = ControlNetModel.from_pretrained(<span class="hljs-string">"lllyasviel/sd-controlnet-canny"</span>, torch_dtype=torch.float16) | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| model_id, controlnet=controlnet, torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># Set the attention processor</span> | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| pipe.controlnet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| <span class="hljs-comment"># fix latents for all frames</span> | |
| latents = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">4</span>, <span class="hljs-number">64</span>, <span class="hljs-number">64</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16).repeat(<span class="hljs-built_in">len</span>(canny_edges), <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>) | |
| prompt = <span class="hljs-string">"oil painting of a beautiful girl avatar style"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(canny_edges), image=canny_edges, latents=latents).images | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),G=new js({props:{$$slots:{default:[Cs]},$$scope:{ctx:Gt}}}),Ne=new M({props:{title:"TextToVideoZeroPipeline",local:"diffusers.TextToVideoZeroPipeline",headingTag:"h2"}}),$e=new J({props:{name:"class diffusers.TextToVideoZeroPipeline",anchor:"diffusers.TextToVideoZeroPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": CLIPTextModel"},{name:"tokenizer",val:": CLIPTokenizer"},{name:"unet",val:": UNet2DConditionModel"},{name:"scheduler",val:": KarrasDiffusionSchedulers"},{name:"safety_checker",val:": StableDiffusionSafetyChecker"},{name:"feature_extractor",val:": CLIPImageProcessor"},{name:"requires_safety_checker",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_11660/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.TextToVideoZeroPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>CLIPTextModel</code>) — | |
| Frozen text-encoder (<a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a>).`,name:"text_encoder"},{anchor:"diffusers.TextToVideoZeroPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) — | |
| A <a href="https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a> to tokenize text.`,name:"tokenizer"},{anchor:"diffusers.TextToVideoZeroPipeline.unet",description:`<strong>unet</strong> (<a href="/docs/diffusers/pr_11660/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a>) — | |
| A <a href="/docs/diffusers/pr_11660/en/api/models/unet3d-cond#diffusers.UNet3DConditionModel">UNet3DConditionModel</a> to denoise the encoded video latents.`,name:"unet"},{anchor:"diffusers.TextToVideoZeroPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_11660/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) — | |
| A scheduler to be used in combination with <code>unet</code> to denoise the encoded image latents. Can be one of | |
| <a href="/docs/diffusers/pr_11660/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, <a href="/docs/diffusers/pr_11660/en/api/schedulers/lms_discrete#diffusers.LMSDiscreteScheduler">LMSDiscreteScheduler</a>, or <a href="/docs/diffusers/pr_11660/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a>.`,name:"scheduler"},{anchor:"diffusers.TextToVideoZeroPipeline.safety_checker",description:`<strong>safety_checker</strong> (<code>StableDiffusionSafetyChecker</code>) — | |
| Classification module that estimates whether generated images could be considered offensive or harmful. | |
| Please refer to the <a href="https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5" rel="nofollow">model card</a> for | |
| more details about a model’s potential harms.`,name:"safety_checker"},{anchor:"diffusers.TextToVideoZeroPipeline.feature_extractor",description:`<strong>feature_extractor</strong> (<code>CLIPImageProcessor</code>) — | |
| A <code>CLIPImageProcessor</code> to extract features from generated images; used as inputs to the <code>safety_checker</code>.`,name:"feature_extractor"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L298"}}),Le=new J({props:{name:"__call__",anchor:"diffusers.TextToVideoZeroPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"video_length",val:": typing.Optional[int] = 8"},{name:"height",val:": typing.Optional[int] = None"},{name:"width",val:": typing.Optional[int] = None"},{name:"num_inference_steps",val:": int = 50"},{name:"guidance_scale",val:": float = 7.5"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"motion_field_strength_x",val:": float = 12"},{name:"motion_field_strength_y",val:": float = 12"},{name:"output_type",val:": typing.Optional[str] = 'tensor'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": typing.Optional[typing.Callable[[int, int, torch.Tensor], NoneType]] = None"},{name:"callback_steps",val:": typing.Optional[int] = 1"},{name:"t0",val:": int = 44"},{name:"t1",val:": int = 47"},{name:"frame_ids",val:": typing.Optional[typing.List[int]] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide image generation. If not defined, you need to pass <code>prompt_embeds</code>.`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.video_length",description:`<strong>video_length</strong> (<code>int</code>, <em>optional</em>, defaults to 8) — | |
| The number of generated video frames.`,name:"video_length"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>self.unet.config.sample_size * self.vae_scale_factor</code>) — | |
| The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>self.unet.config.sample_size * self.vae_scale_factor</code>) — | |
| The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) — | |
| A higher guidance scale value encourages the model to generate images closely linked to the text | |
| <code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide what to not include in video generation. If not defined, you need to | |
| pass <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (<code>guidance_scale < 1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) from the <a href="https://huggingface.co/papers/2010.02502" rel="nofollow">DDIM</a> paper. Only | |
| applies to the <a href="/docs/diffusers/pr_11660/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, and is ignored in other schedulers.`,name:"eta"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor is generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"np"</code>) — | |
| The output format of the generated video. Choose between <code>"latent"</code> and <code>"np"</code>.`,name:"output_type"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a | |
| <a href="/docs/diffusers/pr_11660/en/api/pipelines/text_to_video_zero#diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput">TextToVideoPipelineOutput</a> instead of | |
| a plain tuple.`,name:"return_dict"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.motion_field_strength_x",description:`<strong>motion_field_strength_x</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along x-axis. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"motion_field_strength_x"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.motion_field_strength_y",description:`<strong>motion_field_strength_y</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along y-axis. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"motion_field_strength_y"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.t0",description:`<strong>t0</strong> (<code>int</code>, <em>optional</em>, defaults to 44) — | |
| Timestep t0. Should be in the range [0, num_inference_steps - 1]. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.t1",description:`<strong>t1</strong> (<code>int</code>, <em>optional</em>, defaults to 47) — | |
| Timestep t0. Should be in the range [t0 + 1, num_inference_steps - 1]. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t1"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.frame_ids",description:`<strong>frame_ids</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Indexes of the frames that are being generated. This is used when generating longer videos | |
| chunk-by-chunk.`,name:"frame_ids"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L543",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The output contains a <code>ndarray</code> of the generated video, when <code>output_type</code> != <code>"latent"</code>, otherwise a | |
| latent code of generated videos and a list of <code>bool</code>s indicating whether the corresponding generated | |
| video contains “not-safe-for-work” (nsfw) content..</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/diffusers/pr_11660/en/api/pipelines/text_to_video_zero#diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput" | |
| >TextToVideoPipelineOutput</a></p> | |
| `}}),Be=new J({props:{name:"backward_loop",anchor:"diffusers.TextToVideoZeroPipeline.backward_loop",parameters:[{name:"latents",val:""},{name:"timesteps",val:""},{name:"prompt_embeds",val:""},{name:"guidance_scale",val:""},{name:"callback",val:""},{name:"callback_steps",val:""},{name:"num_warmup_steps",val:""},{name:"extra_step_kwargs",val:""},{name:"cross_attention_kwargs",val:" = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.latents",description:`<strong>latents</strong> — | |
| Latents at time timesteps[0].`,name:"latents"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.timesteps",description:`<strong>timesteps</strong> — | |
| Time steps along which to perform backward process.`,name:"timesteps"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.prompt_embeds",description:`<strong>prompt_embeds</strong> — | |
| Pre-generated text embeddings.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.guidance_scale",description:`<strong>guidance_scale</strong> — | |
| A higher guidance scale value encourages the model to generate images closely linked to the text | |
| <code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.extra_step_kwargs",description:`<strong>extra_step_kwargs</strong> — | |
| Extra_step_kwargs.`,name:"extra_step_kwargs"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow"><code>self.processor</code></a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.num_warmup_steps",description:`<strong>num_warmup_steps</strong> — | |
| number of warmup steps.`,name:"num_warmup_steps"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L388",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Latents of backward process output at time timesteps[-1].</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>latents</p> | |
| `}}),ze=new J({props:{name:"encode_prompt",anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt",parameters:[{name:"prompt",val:""},{name:"device",val:""},{name:"num_images_per_prompt",val:""},{name:"do_classifier_free_guidance",val:""},{name:"negative_prompt",val:" = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"lora_scale",val:": typing.Optional[float] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>): | |
| torch device`,name:"device"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.lora_scale",description:`<strong>lora_scale</strong> (<code>float</code>, <em>optional</em>) — | |
| A LoRA scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.`,name:"lora_scale"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L815"}}),De=new J({props:{name:"forward_loop",anchor:"diffusers.TextToVideoZeroPipeline.forward_loop",parameters:[{name:"x_t0",val:""},{name:"t0",val:""},{name:"t1",val:""},{name:"generator",val:""}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.x_t0",description:`<strong>x_t0</strong> — | |
| Latent code at time t0.`,name:"x_t0"},{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.t0",description:`<strong>t0</strong> — | |
| Timestep at t0.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.t1",description:`<strong>t1</strong> — | |
| Timestamp at t1.`,name:"t1"},{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L364",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Forward process applied to x_t0 from time t0 to t1.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>x_t1</p> | |
| `}}),Re=new M({props:{title:"TextToVideoZeroSDXLPipeline",local:"diffusers.TextToVideoZeroSDXLPipeline",headingTag:"h2"}}),Qe=new J({props:{name:"class diffusers.TextToVideoZeroSDXLPipeline",anchor:"diffusers.TextToVideoZeroSDXLPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": CLIPTextModel"},{name:"text_encoder_2",val:": CLIPTextModelWithProjection"},{name:"tokenizer",val:": CLIPTokenizer"},{name:"tokenizer_2",val:": CLIPTokenizer"},{name:"unet",val:": UNet2DConditionModel"},{name:"scheduler",val:": KarrasDiffusionSchedulers"},{name:"image_encoder",val:": CLIPVisionModelWithProjection = None"},{name:"feature_extractor",val:": CLIPImageProcessor = None"},{name:"force_zeros_for_empty_prompt",val:": bool = True"},{name:"add_watermarker",val:": typing.Optional[bool] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_11660/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>CLIPTextModel</code>) — | |
| Frozen text-encoder. Stable Diffusion XL uses the text portion of | |
| <a href="https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel" rel="nofollow">CLIP</a>, specifically | |
| the <a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a> variant.`,name:"text_encoder"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.text_encoder_2",description:`<strong>text_encoder_2</strong> (<code> CLIPTextModelWithProjection</code>) — | |
| Second frozen text-encoder. Stable Diffusion XL uses the text and pool portion of | |
| <a href="https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection" rel="nofollow">CLIP</a>, | |
| specifically the | |
| <a href="https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k" rel="nofollow">laion/CLIP-ViT-bigG-14-laion2B-39B-b160k</a> | |
| variant.`,name:"text_encoder_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) — | |
| Tokenizer of class | |
| <a href="https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a>.`,name:"tokenizer"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.tokenizer_2",description:`<strong>tokenizer_2</strong> (<code>CLIPTokenizer</code>) — | |
| Second Tokenizer of class | |
| <a href="https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a>.`,name:"tokenizer_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.unet",description:'<strong>unet</strong> (<a href="/docs/diffusers/pr_11660/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a>) — Conditional U-Net architecture to denoise the encoded image latents.',name:"unet"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_11660/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) — | |
| A scheduler to be used in combination with <code>unet</code> to denoise the encoded image latents. Can be one of | |
| <a href="/docs/diffusers/pr_11660/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, <a href="/docs/diffusers/pr_11660/en/api/schedulers/lms_discrete#diffusers.LMSDiscreteScheduler">LMSDiscreteScheduler</a>, or <a href="/docs/diffusers/pr_11660/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a>.`,name:"scheduler"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L348"}}),Ee=new J({props:{name:"__call__",anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"prompt_2",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"video_length",val:": typing.Optional[int] = 8"},{name:"height",val:": typing.Optional[int] = None"},{name:"width",val:": typing.Optional[int] = None"},{name:"num_inference_steps",val:": int = 50"},{name:"denoising_end",val:": typing.Optional[float] = None"},{name:"guidance_scale",val:": float = 7.5"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"negative_prompt_2",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"frame_ids",val:": typing.Optional[typing.List[int]] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"motion_field_strength_x",val:": float = 12"},{name:"motion_field_strength_y",val:": float = 12"},{name:"output_type",val:": typing.Optional[str] = 'tensor'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": typing.Optional[typing.Callable[[int, int, torch.Tensor], NoneType]] = None"},{name:"callback_steps",val:": int = 1"},{name:"cross_attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"guidance_rescale",val:": float = 0.0"},{name:"original_size",val:": typing.Optional[typing.Tuple[int, int]] = None"},{name:"crops_coords_top_left",val:": typing.Tuple[int, int] = (0, 0)"},{name:"target_size",val:": typing.Optional[typing.Tuple[int, int]] = None"},{name:"t0",val:": int = 44"},{name:"t1",val:": int = 47"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.prompt_2",description:`<strong>prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to be sent to the <code>tokenizer_2</code> and <code>text_encoder_2</code>. If not defined, <code>prompt</code> is | |
| used in both text-encoders`,name:"prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.video_length",description:`<strong>video_length</strong> (<code>int</code>, <em>optional</em>, defaults to 8) — | |
| The number of generated video frames.`,name:"video_length"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size * self.vae_scale_factor) — | |
| The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size * self.vae_scale_factor) — | |
| The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.denoising_end",description:`<strong>denoising_end</strong> (<code>float</code>, <em>optional</em>) — | |
| When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be | |
| completed before it is intentionally prematurely terminated. As a result, the returned sample will | |
| still retain a substantial amount of noise as determined by the discrete timesteps selected by the | |
| scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a | |
| “Mixture of Denoisers” multi-pipeline setup, as elaborated in <a href="https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output" rel="nofollow"><strong>Refining the Image | |
| Output</strong></a>`,name:"denoising_end"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) — | |
| Guidance scale as defined in <a href="https://huggingface.co/papers/2207.12598" rel="nofollow">Classifier-Free Diffusion | |
| Guidance</a>. <code>guidance_scale</code> is defined as <code>w</code> of equation 2. | |
| of <a href="https://huggingface.co/papers/2205.11487" rel="nofollow">Imagen Paper</a>. Guidance scale is enabled by setting | |
| <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to | |
| the text <code>prompt</code>, usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_prompt_2",description:`<strong>negative_prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation to be sent to <code>tokenizer_2</code> and | |
| <code>text_encoder_2</code>. If not defined, <code>negative_prompt</code> is used in both text-encoders`,name:"negative_prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) in the DDIM paper: <a href="https://huggingface.co/papers/2010.02502" rel="nofollow">https://huggingface.co/papers/2010.02502</a>. Only | |
| applies to <a href="/docs/diffusers/pr_11660/en/api/schedulers/ddim#diffusers.DDIMScheduler">schedulers.DDIMScheduler</a>, will be ignored for others.`,name:"eta"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a> | |
| to make generation deterministic.`,name:"generator"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.frame_ids",description:`<strong>frame_ids</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Indexes of the frames that are being generated. This is used when generating longer videos | |
| chunk-by-chunk.`,name:"frame_ids"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.pooled_prompt_embeds",description:`<strong>pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. | |
| If not provided, pooled text embeddings will be generated from <code>prompt</code> input argument.`,name:"pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_pooled_prompt_embeds",description:`<strong>negative_pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, pooled negative_prompt_embeds will be generated from <code>negative_prompt</code> | |
| input argument.`,name:"negative_pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will ge generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.motion_field_strength_x",description:`<strong>motion_field_strength_x</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along x-axis. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"motion_field_strength_x"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.motion_field_strength_y",description:`<strong>motion_field_strength_y</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along y-axis. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"motion_field_strength_y"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generate image. Choose between | |
| <a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput</code> instead | |
| of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that will be called every <code>callback_steps</code> steps during inference. The function will be | |
| called with the following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function will be called. If not specified, the callback will be | |
| called at every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py" rel="nofollow">diffusers.cross_attention</a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.guidance_rescale",description:`<strong>guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to 0.7) — | |
| Guidance rescale factor proposed by <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are | |
| Flawed</a> <code>guidance_scale</code> is defined as <code>φ</code> in equation 16. of | |
| <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are | |
| Flawed</a>. Guidance rescale factor should fix overexposure when | |
| using zero terminal SNR.`,name:"guidance_rescale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.original_size",description:`<strong>original_size</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to (1024, 1024)) — | |
| If <code>original_size</code> is not the same as <code>target_size</code> the image will appear to be down- or upsampled. | |
| <code>original_size</code> defaults to <code>(width, height)</code> if not specified. Part of SDXL’s micro-conditioning as | |
| explained in section 2.2 of | |
| <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">https://huggingface.co/papers/2307.01952</a>.`,name:"original_size"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.crops_coords_top_left",description:`<strong>crops_coords_top_left</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to (0, 0)) — | |
| <code>crops_coords_top_left</code> can be used to generate an image that appears to be “cropped” from the position | |
| <code>crops_coords_top_left</code> downwards. Favorable, well-centered images are usually achieved by setting | |
| <code>crops_coords_top_left</code> to (0, 0). Part of SDXL’s micro-conditioning as explained in section 2.2 of | |
| <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">https://huggingface.co/papers/2307.01952</a>.`,name:"crops_coords_top_left"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.target_size",description:`<strong>target_size</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to (1024, 1024)) — | |
| For most cases, <code>target_size</code> should be set to the desired height and width of the generated image. If | |
| not specified it will default to <code>(width, height)</code>. Part of SDXL’s micro-conditioning as explained in | |
| section 2.2 of <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">https://huggingface.co/papers/2307.01952</a>.`,name:"target_size"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.t0",description:`<strong>t0</strong> (<code>int</code>, <em>optional</em>, defaults to 44) — | |
| Timestep t0. Should be in the range [0, num_inference_steps - 1]. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.t1",description:`<strong>t1</strong> (<code>int</code>, <em>optional</em>, defaults to 47) — | |
| Timestep t0. Should be in the range [t0 + 1, num_inference_steps - 1]. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t1"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L949",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>~pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoSDXLPipelineOutput</code> or | |
| <code>tuple</code>: <code>~pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoSDXLPipelineOutput</code> | |
| if <code>return_dict</code> is True, otherwise a <code>tuple</code>. When returning a tuple, the first element is a list with the | |
| generated images.</p> | |
| `}}),Ye=new J({props:{name:"backward_loop",anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop",parameters:[{name:"latents",val:""},{name:"timesteps",val:""},{name:"prompt_embeds",val:""},{name:"guidance_scale",val:""},{name:"callback",val:""},{name:"callback_steps",val:""},{name:"num_warmup_steps",val:""},{name:"extra_step_kwargs",val:""},{name:"add_text_embeds",val:""},{name:"add_time_ids",val:""},{name:"cross_attention_kwargs",val:" = None"},{name:"guidance_rescale",val:": float = 0.0"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.latents",description:`<strong>latents</strong> — | |
| Latents at time timesteps[0].`,name:"latents"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.timesteps",description:`<strong>timesteps</strong> — | |
| Time steps along which to perform backward process.`,name:"timesteps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.prompt_embeds",description:`<strong>prompt_embeds</strong> — | |
| Pre-generated text embeddings.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.guidance_scale",description:`<strong>guidance_scale</strong> — | |
| A higher guidance scale value encourages the model to generate images closely linked to the text | |
| <code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.extra_step_kwargs",description:`<strong>extra_step_kwargs</strong> — | |
| Extra_step_kwargs.`,name:"extra_step_kwargs"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow"><code>self.processor</code></a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.num_warmup_steps",description:`<strong>num_warmup_steps</strong> — | |
| number of warmup steps.`,name:"num_warmup_steps"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L860",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>latents of backward process output at time timesteps[-1]</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>latents</p> | |
| `}}),Fe=new J({props:{name:"encode_prompt",anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt",parameters:[{name:"prompt",val:": str"},{name:"prompt_2",val:": typing.Optional[str] = None"},{name:"device",val:": typing.Optional[torch.device] = None"},{name:"num_images_per_prompt",val:": int = 1"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"negative_prompt",val:": typing.Optional[str] = None"},{name:"negative_prompt_2",val:": typing.Optional[str] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"lora_scale",val:": typing.Optional[float] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.prompt_2",description:`<strong>prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to be sent to the <code>tokenizer_2</code> and <code>text_encoder_2</code>. If not defined, <code>prompt</code> is | |
| used in both text-encoders`,name:"prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>): | |
| torch device`,name:"device"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_prompt_2",description:`<strong>negative_prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation to be sent to <code>tokenizer_2</code> and | |
| <code>text_encoder_2</code>. If not defined, <code>negative_prompt</code> is used in both text-encoders`,name:"negative_prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.pooled_prompt_embeds",description:`<strong>pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. | |
| If not provided, pooled text embeddings will be generated from <code>prompt</code> input argument.`,name:"pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_pooled_prompt_embeds",description:`<strong>negative_pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, pooled negative_prompt_embeds will be generated from <code>negative_prompt</code> | |
| input argument.`,name:"negative_pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.lora_scale",description:`<strong>lora_scale</strong> (<code>float</code>, <em>optional</em>) — | |
| A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.`,name:"lora_scale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L597"}}),He=new J({props:{name:"forward_loop",anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop",parameters:[{name:"x_t0",val:""},{name:"t0",val:""},{name:"t1",val:""},{name:"generator",val:""}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.x_t0",description:`<strong>x_t0</strong> — | |
| Latent code at time t0.`,name:"x_t0"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.t0",description:`<strong>t0</strong> — | |
| Timestep at t0.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.t1",description:`<strong>t1</strong> — | |
| Timestamp at t1.`,name:"t1"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L836",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Forward process applied to x_t0 from time t0 to t1.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>x_t1</p> | |
| `}}),qe=new M({props:{title:"TextToVideoPipelineOutput",local:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput",headingTag:"h2"}}),Ae=new J({props:{name:"class diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput",anchor:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput",parameters:[{name:"images",val:": typing.Union[typing.List[PIL.Image.Image], numpy.ndarray]"},{name:"nsfw_content_detected",val:": typing.Optional[typing.List[bool]]"}],parametersDescription:[{anchor:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput.images",description:`<strong>images</strong> (<code>[List[PIL.Image.Image]</code>, <code>np.ndarray</code>]) — | |
| List of denoised PIL images of length <code>batch_size</code> or NumPy array of shape <code>(batch_size, height, width, num_channels)</code>.`,name:"images"},{anchor:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput.nsfw_content_detected",description:`<strong>nsfw_content_detected</strong> (<code>[List[bool]]</code>) — | |
| List indicating whether the corresponding generated image contains “not-safe-for-work” (nsfw) content or | |
| <code>None</code> if safety checking could not be performed.`,name:"nsfw_content_detected"}],source:"https://github.com/huggingface/diffusers/blob/vr_11660/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L196"}}),Oe=new Is({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/text_to_video_zero.md"}}),{c(){b=a("meta"),R=s(),Z=a("p"),Ke=s(),d(Q.$$.fragment),Pt=s(),X=a("div"),X.innerHTML=In,St=s(),E=a("p"),E.innerHTML=Cn,Nt=s(),Y=a("p"),Y.textContent=Xn,$t=s(),F=a("ol"),F.innerHTML=Wn,Lt=s(),H=a("p"),H.textContent=Gn,Bt=s(),q=a("p"),q.innerHTML=Pn,zt=s(),A=a("p"),A.textContent=Sn,Dt=s(),O=a("p"),O.innerHTML=Nn,Rt=s(),K=a("p"),K.innerHTML=$n,Qt=s(),d(ee.$$.fragment),Et=s(),d(te.$$.fragment),Yt=s(),oe=a("p"),oe.textContent=Ln,Ft=s(),d(ne.$$.fragment),Ht=s(),se=a("p"),se.textContent=Bn,qt=s(),ie=a("ul"),ie.innerHTML=zn,At=s(),ae=a("p"),ae.textContent=Dn,Ot=s(),d(re.$$.fragment),Kt=s(),et=a("ul"),W=a("li"),d(le.$$.fragment),Ro=xs("In order to use the SDXL model when generating a video from prompt, use the "),tt=a("code"),tt.textContent=Rn,Qo=xs(" pipeline:"),eo=s(),d(pe.$$.fragment),to=s(),d(de.$$.fragment),oo=s(),ce=a("p"),ce.textContent=Qn,no=s(),ot=a("ol"),me=a("li"),nt=a("p"),nt.textContent=En,Eo=s(),d(fe.$$.fragment),so=s(),V=a("ol"),j=a("li"),st=a("p"),st.textContent=Yn,Yo=s(),d(he.$$.fragment),Fo=s(),it=a("p"),it.innerHTML=Fn,Ho=s(),ue=a("li"),at=a("p"),at.innerHTML=Hn,qo=s(),d(ge.$$.fragment),io=s(),rt=a("ul"),I=a("li"),d(_e.$$.fragment),Ao=s(),lt=a("p"),lt.textContent=qn,Oo=s(),d(Te.$$.fragment),ao=s(),d(be.$$.fragment),ro=s(),ye=a("p"),ye.innerHTML=An,lo=s(),d(we.$$.fragment),po=s(),xe=a("p"),xe.innerHTML=On,co=s(),k=a("ol"),ve=a("li"),pt=a("p"),pt.textContent=Kn,Ko=s(),d(Me.$$.fragment),en=s(),Ze=a("li"),dt=a("p"),dt.textContent=es,tn=s(),d(Je.$$.fragment),on=s(),ke=a("li"),ct=a("p"),ct.innerHTML=ts,nn=s(),d(Ue.$$.fragment),mo=s(),d(Ve.$$.fragment),fo=s(),je=a("p"),je.innerHTML=os,ho=s(),U=a("ol"),Ie=a("li"),mt=a("p"),mt.textContent=ns,sn=s(),d(Ce.$$.fragment),an=s(),Xe=a("li"),ft=a("p"),ft.textContent=ss,rn=s(),d(We.$$.fragment),ln=s(),Ge=a("li"),ht=a("p"),ht.innerHTML=is,pn=s(),d(Pe.$$.fragment),uo=s(),Se=a("p"),Se.innerHTML=as,go=s(),d(G.$$.fragment),_o=s(),d(Ne.$$.fragment),To=s(),_=a("div"),d($e.$$.fragment),dn=s(),ut=a("p"),ut.textContent=rs,cn=s(),gt=a("p"),gt.innerHTML=ls,mn=s(),P=a("div"),d(Le.$$.fragment),fn=s(),_t=a("p"),_t.textContent=ps,hn=s(),S=a("div"),d(Be.$$.fragment),un=s(),Tt=a("p"),Tt.textContent=ds,gn=s(),N=a("div"),d(ze.$$.fragment),_n=s(),bt=a("p"),bt.textContent=cs,Tn=s(),$=a("div"),d(De.$$.fragment),bn=s(),yt=a("p"),yt.textContent=ms,bo=s(),d(Re.$$.fragment),yo=s(),T=a("div"),d(Qe.$$.fragment),yn=s(),wt=a("p"),wt.textContent=fs,wn=s(),xt=a("p"),xt.innerHTML=hs,xn=s(),L=a("div"),d(Ee.$$.fragment),vn=s(),vt=a("p"),vt.textContent=us,Mn=s(),B=a("div"),d(Ye.$$.fragment),Zn=s(),Mt=a("p"),Mt.textContent=gs,Jn=s(),z=a("div"),d(Fe.$$.fragment),kn=s(),Zt=a("p"),Zt.textContent=_s,Un=s(),D=a("div"),d(He.$$.fragment),Vn=s(),Jt=a("p"),Jt.textContent=Ts,wo=s(),d(qe.$$.fragment),xo=s(),C=a("div"),d(Ae.$$.fragment),jn=s(),kt=a("p"),kt.textContent=bs,vo=s(),d(Oe.$$.fragment),Mo=s(),Ct=a("p"),this.h()},l(e){const o=Vs("svelte-u9bgzb",document.head);b=r(o,"META",{name:!0,content:!0}),o.forEach(t),R=i(e),Z=r(e,"P",{}),g(Z).forEach(t),Ke=i(e),c(Q.$$.fragment,e),Pt=i(e),X=r(e,"DIV",{class:!0,"data-svelte-h":!0}),p(X)!=="svelte-si9ct8"&&(X.innerHTML=In),St=i(e),E=r(e,"P",{"data-svelte-h":!0}),p(E)!=="svelte-16xpzkf"&&(E.innerHTML=Cn),Nt=i(e),Y=r(e,"P",{"data-svelte-h":!0}),p(Y)!=="svelte-1jaz0ks"&&(Y.textContent=Xn),$t=i(e),F=r(e,"OL",{"data-svelte-h":!0}),p(F)!=="svelte-19ca1wn"&&(F.innerHTML=Wn),Lt=i(e),H=r(e,"P",{"data-svelte-h":!0}),p(H)!=="svelte-yhxhyq"&&(H.textContent=Gn),Bt=i(e),q=r(e,"P",{"data-svelte-h":!0}),p(q)!=="svelte-uupw0l"&&(q.innerHTML=Pn),zt=i(e),A=r(e,"P",{"data-svelte-h":!0}),p(A)!=="svelte-1cwsb16"&&(A.textContent=Sn),Dt=i(e),O=r(e,"P",{"data-svelte-h":!0}),p(O)!=="svelte-1rtg145"&&(O.innerHTML=Nn),Rt=i(e),K=r(e,"P",{"data-svelte-h":!0}),p(K)!=="svelte-18vykkr"&&(K.innerHTML=$n),Qt=i(e),c(ee.$$.fragment,e),Et=i(e),c(te.$$.fragment,e),Yt=i(e),oe=r(e,"P",{"data-svelte-h":!0}),p(oe)!=="svelte-1dpixty"&&(oe.textContent=Ln),Ft=i(e),c(ne.$$.fragment,e),Ht=i(e),se=r(e,"P",{"data-svelte-h":!0}),p(se)!=="svelte-rs2kss"&&(se.textContent=Bn),qt=i(e),ie=r(e,"UL",{"data-svelte-h":!0}),p(ie)!=="svelte-1w3gvi6"&&(ie.innerHTML=zn),At=i(e),ae=r(e,"P",{"data-svelte-h":!0}),p(ae)!=="svelte-1xdujjh"&&(ae.textContent=Dn),Ot=i(e),c(re.$$.fragment,e),Kt=i(e),et=r(e,"UL",{});var Xt=g(et);W=r(Xt,"LI",{});var Wt=g(W);c(le.$$.fragment,Wt),Ro=vs(Wt,"In order to use the SDXL model when generating a video from prompt, use the "),tt=r(Wt,"CODE",{"data-svelte-h":!0}),p(tt)!=="svelte-7o0i0w"&&(tt.textContent=Rn),Qo=vs(Wt," pipeline:"),Wt.forEach(t),Xt.forEach(t),eo=i(e),c(pe.$$.fragment,e),to=i(e),c(de.$$.fragment,e),oo=i(e),ce=r(e,"P",{"data-svelte-h":!0}),p(ce)!=="svelte-13nlg5v"&&(ce.textContent=Qn),no=i(e),ot=r(e,"OL",{});var ys=g(ot);me=r(ys,"LI",{});var Jo=g(me);nt=r(Jo,"P",{"data-svelte-h":!0}),p(nt)!=="svelte-1xo2nq1"&&(nt.textContent=En),Eo=i(Jo),c(fe.$$.fragment,Jo),Jo.forEach(t),ys.forEach(t),so=i(e),V=r(e,"OL",{start:!0});var ko=g(V);j=r(ko,"LI",{});var Ut=g(j);st=r(Ut,"P",{"data-svelte-h":!0}),p(st)!=="svelte-7pm43o"&&(st.textContent=Yn),Yo=i(Ut),c(he.$$.fragment,Ut),Fo=i(Ut),it=r(Ut,"P",{"data-svelte-h":!0}),p(it)!=="svelte-mij11h"&&(it.innerHTML=Fn),Ut.forEach(t),Ho=i(ko),ue=r(ko,"LI",{});var Uo=g(ue);at=r(Uo,"P",{"data-svelte-h":!0}),p(at)!=="svelte-1wsv5bt"&&(at.innerHTML=Hn),qo=i(Uo),c(ge.$$.fragment,Uo),Uo.forEach(t),ko.forEach(t),io=i(e),rt=r(e,"UL",{});var ws=g(rt);I=r(ws,"LI",{});var Vt=g(I);c(_e.$$.fragment,Vt),Ao=i(Vt),lt=r(Vt,"P",{"data-svelte-h":!0}),p(lt)!=="svelte-bcy3mo"&&(lt.textContent=qn),Oo=i(Vt),c(Te.$$.fragment,Vt),Vt.forEach(t),ws.forEach(t),ao=i(e),c(be.$$.fragment,e),ro=i(e),ye=r(e,"P",{"data-svelte-h":!0}),p(ye)!=="svelte-1fgx3w6"&&(ye.innerHTML=An),lo=i(e),c(we.$$.fragment,e),po=i(e),xe=r(e,"P",{"data-svelte-h":!0}),p(xe)!=="svelte-o0vmiz"&&(xe.innerHTML=On),co=i(e),k=r(e,"OL",{});var jt=g(k);ve=r(jt,"LI",{});var Vo=g(ve);pt=r(Vo,"P",{"data-svelte-h":!0}),p(pt)!=="svelte-1xo2nq1"&&(pt.textContent=Kn),Ko=i(Vo),c(Me.$$.fragment,Vo),Vo.forEach(t),en=i(jt),Ze=r(jt,"LI",{});var jo=g(Ze);dt=r(jo,"P",{"data-svelte-h":!0}),p(dt)!=="svelte-1wln6n0"&&(dt.textContent=es),tn=i(jo),c(Je.$$.fragment,jo),jo.forEach(t),on=i(jt),ke=r(jt,"LI",{});var Io=g(ke);ct=r(Io,"P",{"data-svelte-h":!0}),p(ct)!=="svelte-14jy04z"&&(ct.innerHTML=ts),nn=i(Io),c(Ue.$$.fragment,Io),Io.forEach(t),jt.forEach(t),mo=i(e),c(Ve.$$.fragment,e),fo=i(e),je=r(e,"P",{"data-svelte-h":!0}),p(je)!=="svelte-x1pkth"&&(je.innerHTML=os),ho=i(e),U=r(e,"OL",{});var It=g(U);Ie=r(It,"LI",{});var Co=g(Ie);mt=r(Co,"P",{"data-svelte-h":!0}),p(mt)!=="svelte-1xo2nq1"&&(mt.textContent=ns),sn=i(Co),c(Ce.$$.fragment,Co),Co.forEach(t),an=i(It),Xe=r(It,"LI",{});var Xo=g(Xe);ft=r(Xo,"P",{"data-svelte-h":!0}),p(ft)!=="svelte-1wln6n0"&&(ft.textContent=ss),rn=i(Xo),c(We.$$.fragment,Xo),Xo.forEach(t),ln=i(It),Ge=r(It,"LI",{});var Wo=g(Ge);ht=r(Wo,"P",{"data-svelte-h":!0}),p(ht)!=="svelte-okpmrc"&&(ht.innerHTML=is),pn=i(Wo),c(Pe.$$.fragment,Wo),Wo.forEach(t),It.forEach(t),uo=i(e),Se=r(e,"P",{"data-svelte-h":!0}),p(Se)!=="svelte-18tnclt"&&(Se.innerHTML=as),go=i(e),c(G.$$.fragment,e),_o=i(e),c(Ne.$$.fragment,e),To=i(e),_=r(e,"DIV",{class:!0});var w=g(_);c($e.$$.fragment,w),dn=i(w),ut=r(w,"P",{"data-svelte-h":!0}),p(ut)!=="svelte-1q57293"&&(ut.textContent=rs),cn=i(w),gt=r(w,"P",{"data-svelte-h":!0}),p(gt)!=="svelte-1ffteuy"&&(gt.innerHTML=ls),mn=i(w),P=r(w,"DIV",{class:!0});var Go=g(P);c(Le.$$.fragment,Go),fn=i(Go),_t=r(Go,"P",{"data-svelte-h":!0}),p(_t)!=="svelte-50j04k"&&(_t.textContent=ps),Go.forEach(t),hn=i(w),S=r(w,"DIV",{class:!0});var Po=g(S);c(Be.$$.fragment,Po),un=i(Po),Tt=r(Po,"P",{"data-svelte-h":!0}),p(Tt)!=="svelte-1cxzr1t"&&(Tt.textContent=ds),Po.forEach(t),gn=i(w),N=r(w,"DIV",{class:!0});var So=g(N);c(ze.$$.fragment,So),_n=i(So),bt=r(So,"P",{"data-svelte-h":!0}),p(bt)!=="svelte-16q0ax1"&&(bt.textContent=cs),So.forEach(t),Tn=i(w),$=r(w,"DIV",{class:!0});var No=g($);c(De.$$.fragment,No),bn=i(No),yt=r(No,"P",{"data-svelte-h":!0}),p(yt)!=="svelte-1d8vbe1"&&(yt.textContent=ms),No.forEach(t),w.forEach(t),bo=i(e),c(Re.$$.fragment,e),yo=i(e),T=r(e,"DIV",{class:!0});var x=g(T);c(Qe.$$.fragment,x),yn=i(x),wt=r(x,"P",{"data-svelte-h":!0}),p(wt)!=="svelte-pyonrv"&&(wt.textContent=fs),wn=i(x),xt=r(x,"P",{"data-svelte-h":!0}),p(xt)!=="svelte-1ffteuy"&&(xt.innerHTML=hs),xn=i(x),L=r(x,"DIV",{class:!0});var $o=g(L);c(Ee.$$.fragment,$o),vn=i($o),vt=r($o,"P",{"data-svelte-h":!0}),p(vt)!=="svelte-v78lg8"&&(vt.textContent=us),$o.forEach(t),Mn=i(x),B=r(x,"DIV",{class:!0});var Lo=g(B);c(Ye.$$.fragment,Lo),Zn=i(Lo),Mt=r(Lo,"P",{"data-svelte-h":!0}),p(Mt)!=="svelte-jp6j47"&&(Mt.textContent=gs),Lo.forEach(t),Jn=i(x),z=r(x,"DIV",{class:!0});var Bo=g(z);c(Fe.$$.fragment,Bo),kn=i(Bo),Zt=r(Bo,"P",{"data-svelte-h":!0}),p(Zt)!=="svelte-16q0ax1"&&(Zt.textContent=_s),Bo.forEach(t),Un=i(x),D=r(x,"DIV",{class:!0});var zo=g(D);c(He.$$.fragment,zo),Vn=i(zo),Jt=r(zo,"P",{"data-svelte-h":!0}),p(Jt)!=="svelte-1d8vbe1"&&(Jt.textContent=Ts),zo.forEach(t),x.forEach(t),wo=i(e),c(qe.$$.fragment,e),xo=i(e),C=r(e,"DIV",{class:!0});var Do=g(C);c(Ae.$$.fragment,Do),jn=i(Do),kt=r(Do,"P",{"data-svelte-h":!0}),p(kt)!=="svelte-1dgz4ei"&&(kt.textContent=bs),Do.forEach(t),vo=i(e),c(Oe.$$.fragment,e),Mo=i(e),Ct=r(e,"P",{}),g(Ct).forEach(t),this.h()},h(){y(b,"name","hf:doc:metadata"),y(b,"content",Ws),y(X,"class","flex flex-wrap space-x-1"),y(V,"start","2"),y(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(_,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(T,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,o){n(document.head,b),l(e,R,o),l(e,Z,o),l(e,Ke,o),m(Q,e,o),l(e,Pt,o),l(e,X,o),l(e,St,o),l(e,E,o),l(e,Nt,o),l(e,Y,o),l(e,$t,o),l(e,F,o),l(e,Lt,o),l(e,H,o),l(e,Bt,o),l(e,q,o),l(e,zt,o),l(e,A,o),l(e,Dt,o),l(e,O,o),l(e,Rt,o),l(e,K,o),l(e,Qt,o),m(ee,e,o),l(e,Et,o),m(te,e,o),l(e,Yt,o),l(e,oe,o),l(e,Ft,o),m(ne,e,o),l(e,Ht,o),l(e,se,o),l(e,qt,o),l(e,ie,o),l(e,At,o),l(e,ae,o),l(e,Ot,o),m(re,e,o),l(e,Kt,o),l(e,et,o),n(et,W),m(le,W,null),n(W,Ro),n(W,tt),n(W,Qo),l(e,eo,o),m(pe,e,o),l(e,to,o),m(de,e,o),l(e,oo,o),l(e,ce,o),l(e,no,o),l(e,ot,o),n(ot,me),n(me,nt),n(me,Eo),m(fe,me,null),l(e,so,o),l(e,V,o),n(V,j),n(j,st),n(j,Yo),m(he,j,null),n(j,Fo),n(j,it),n(V,Ho),n(V,ue),n(ue,at),n(ue,qo),m(ge,ue,null),l(e,io,o),l(e,rt,o),n(rt,I),m(_e,I,null),n(I,Ao),n(I,lt),n(I,Oo),m(Te,I,null),l(e,ao,o),m(be,e,o),l(e,ro,o),l(e,ye,o),l(e,lo,o),m(we,e,o),l(e,po,o),l(e,xe,o),l(e,co,o),l(e,k,o),n(k,ve),n(ve,pt),n(ve,Ko),m(Me,ve,null),n(k,en),n(k,Ze),n(Ze,dt),n(Ze,tn),m(Je,Ze,null),n(k,on),n(k,ke),n(ke,ct),n(ke,nn),m(Ue,ke,null),l(e,mo,o),m(Ve,e,o),l(e,fo,o),l(e,je,o),l(e,ho,o),l(e,U,o),n(U,Ie),n(Ie,mt),n(Ie,sn),m(Ce,Ie,null),n(U,an),n(U,Xe),n(Xe,ft),n(Xe,rn),m(We,Xe,null),n(U,ln),n(U,Ge),n(Ge,ht),n(Ge,pn),m(Pe,Ge,null),l(e,uo,o),l(e,Se,o),l(e,go,o),m(G,e,o),l(e,_o,o),m(Ne,e,o),l(e,To,o),l(e,_,o),m($e,_,null),n(_,dn),n(_,ut),n(_,cn),n(_,gt),n(_,mn),n(_,P),m(Le,P,null),n(P,fn),n(P,_t),n(_,hn),n(_,S),m(Be,S,null),n(S,un),n(S,Tt),n(_,gn),n(_,N),m(ze,N,null),n(N,_n),n(N,bt),n(_,Tn),n(_,$),m(De,$,null),n($,bn),n($,yt),l(e,bo,o),m(Re,e,o),l(e,yo,o),l(e,T,o),m(Qe,T,null),n(T,yn),n(T,wt),n(T,wn),n(T,xt),n(T,xn),n(T,L),m(Ee,L,null),n(L,vn),n(L,vt),n(T,Mn),n(T,B),m(Ye,B,null),n(B,Zn),n(B,Mt),n(T,Jn),n(T,z),m(Fe,z,null),n(z,kn),n(z,Zt),n(T,Un),n(T,D),m(He,D,null),n(D,Vn),n(D,Jt),l(e,wo,o),m(qe,e,o),l(e,xo,o),l(e,C,o),m(Ae,C,null),n(C,jn),n(C,kt),l(e,vo,o),m(Oe,e,o),l(e,Mo,o),l(e,Ct,o),Zo=!0},p(e,[o]){const Xt={};o&2&&(Xt.$$scope={dirty:o,ctx:e}),G.$set(Xt)},i(e){Zo||(f(Q.$$.fragment,e),f(ee.$$.fragment,e),f(te.$$.fragment,e),f(ne.$$.fragment,e),f(re.$$.fragment,e),f(le.$$.fragment,e),f(pe.$$.fragment,e),f(de.$$.fragment,e),f(fe.$$.fragment,e),f(he.$$.fragment,e),f(ge.$$.fragment,e),f(_e.$$.fragment,e),f(Te.$$.fragment,e),f(be.$$.fragment,e),f(we.$$.fragment,e),f(Me.$$.fragment,e),f(Je.$$.fragment,e),f(Ue.$$.fragment,e),f(Ve.$$.fragment,e),f(Ce.$$.fragment,e),f(We.$$.fragment,e),f(Pe.$$.fragment,e),f(G.$$.fragment,e),f(Ne.$$.fragment,e),f($e.$$.fragment,e),f(Le.$$.fragment,e),f(Be.$$.fragment,e),f(ze.$$.fragment,e),f(De.$$.fragment,e),f(Re.$$.fragment,e),f(Qe.$$.fragment,e),f(Ee.$$.fragment,e),f(Ye.$$.fragment,e),f(Fe.$$.fragment,e),f(He.$$.fragment,e),f(qe.$$.fragment,e),f(Ae.$$.fragment,e),f(Oe.$$.fragment,e),Zo=!0)},o(e){h(Q.$$.fragment,e),h(ee.$$.fragment,e),h(te.$$.fragment,e),h(ne.$$.fragment,e),h(re.$$.fragment,e),h(le.$$.fragment,e),h(pe.$$.fragment,e),h(de.$$.fragment,e),h(fe.$$.fragment,e),h(he.$$.fragment,e),h(ge.$$.fragment,e),h(_e.$$.fragment,e),h(Te.$$.fragment,e),h(be.$$.fragment,e),h(we.$$.fragment,e),h(Me.$$.fragment,e),h(Je.$$.fragment,e),h(Ue.$$.fragment,e),h(Ve.$$.fragment,e),h(Ce.$$.fragment,e),h(We.$$.fragment,e),h(Pe.$$.fragment,e),h(G.$$.fragment,e),h(Ne.$$.fragment,e),h($e.$$.fragment,e),h(Le.$$.fragment,e),h(Be.$$.fragment,e),h(ze.$$.fragment,e),h(De.$$.fragment,e),h(Re.$$.fragment,e),h(Qe.$$.fragment,e),h(Ee.$$.fragment,e),h(Ye.$$.fragment,e),h(Fe.$$.fragment,e),h(He.$$.fragment,e),h(qe.$$.fragment,e),h(Ae.$$.fragment,e),h(Oe.$$.fragment,e),Zo=!1},d(e){e&&(t(R),t(Z),t(Ke),t(Pt),t(X),t(St),t(E),t(Nt),t(Y),t($t),t(F),t(Lt),t(H),t(Bt),t(q),t(zt),t(A),t(Dt),t(O),t(Rt),t(K),t(Qt),t(Et),t(Yt),t(oe),t(Ft),t(Ht),t(se),t(qt),t(ie),t(At),t(ae),t(Ot),t(Kt),t(et),t(eo),t(to),t(oo),t(ce),t(no),t(ot),t(so),t(V),t(io),t(rt),t(ao),t(ro),t(ye),t(lo),t(po),t(xe),t(co),t(k),t(mo),t(fo),t(je),t(ho),t(U),t(uo),t(Se),t(go),t(_o),t(To),t(_),t(bo),t(yo),t(T),t(wo),t(xo),t(C),t(vo),t(Mo),t(Ct)),t(b),u(Q,e),u(ee,e),u(te,e),u(ne,e),u(re,e),u(le),u(pe,e),u(de,e),u(fe),u(he),u(ge),u(_e),u(Te),u(be,e),u(we,e),u(Me),u(Je),u(Ue),u(Ve,e),u(Ce),u(We),u(Pe),u(G,e),u(Ne,e),u($e),u(Le),u(Be),u(ze),u(De),u(Re,e),u(Qe),u(Ee),u(Ye),u(Fe),u(He),u(qe,e),u(Ae),u(Oe,e)}}}const Ws='{"title":"Text2Video-Zero","local":"text2video-zero","sections":[{"title":"Usage example","local":"usage-example","sections":[{"title":"Text-To-Video","local":"text-to-video","sections":[{"title":"SDXL Support","local":"sdxl-support","sections":[],"depth":4}],"depth":3},{"title":"Text-To-Video with Pose Control","local":"text-to-video-with-pose-control","sections":[{"title":"SDXL Support","local":"sdxl-support","sections":[],"depth":4}],"depth":3},{"title":"Text-To-Video with Edge Control","local":"text-to-video-with-edge-control","sections":[],"depth":3},{"title":"Video Instruct-Pix2Pix","local":"video-instruct-pix2pix","sections":[],"depth":3},{"title":"DreamBooth specialization","local":"dreambooth-specialization","sections":[],"depth":3}],"depth":2},{"title":"TextToVideoZeroPipeline","local":"diffusers.TextToVideoZeroPipeline","sections":[],"depth":2},{"title":"TextToVideoZeroSDXLPipeline","local":"diffusers.TextToVideoZeroSDXLPipeline","sections":[],"depth":2},{"title":"TextToVideoPipelineOutput","local":"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput","sections":[],"depth":2}],"depth":1}';function Gs(Gt){return Zs(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class zs extends ks{constructor(b){super(),Us(this,b,Gs,Xs,Ms,{})}}export{zs as component}; | |
Xet Storage Details
- Size:
- 105 kB
- Xet hash:
- 023c3a50e2cd93d617ed00631c3ce50b8b2817f393a6c3f9a9cee121b12d4ece
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.