Buckets:
| import{s as $o,o as Do,n as jt}from"../chunks/scheduler.8c3d61f6.js";import{S as So,i as Co,g as a,s,r as f,A as Uo,h as r,f as o,c as i,j as C,u,x as d,k as D,y as p,a as n,v as h,d as g,t as _,w as v}from"../chunks/index.da70eac4.js";import{T as jo}from"../chunks/Tip.1d9b8c37.js";import{D as Ze}from"../chunks/Docstring.fa488882.js";import{C as Je}from"../chunks/CodeBlock.a9c4becf.js";import{E as ko}from"../chunks/ExampleCodeBlock.ec9feb8f.js";import{H as X,E as Io}from"../chunks/index.dfbaf638.js";function Go(j){let l,Z="🧪 This pipeline is for research purposes only.";return{c(){l=a("p"),l.textContent=Z},l(m){l=r(m,"P",{"data-svelte-h":!0}),d(l)!=="svelte-1oxhjjd"&&(l.textContent=Z)},m(m,b){n(m,l,b)},p:jt,d(m){m&&o(l)}}}function Po(j){let l,Z='Make sure to check out the Schedulers <a href="../../using-diffusers/schedulers">guide</a> to learn how to explore the tradeoff between scheduler speed and quality, and see the <a href="../../using-diffusers/loading#reuse-a-pipeline">reuse components across pipelines</a> section to learn how to efficiently load the same components into multiple pipelines.';return{c(){l=a("p"),l.innerHTML=Z},l(m){l=r(m,"P",{"data-svelte-h":!0}),d(l)!=="svelte-1qn15hi"&&(l.innerHTML=Z)},m(m,b){n(m,l,b)},p:jt,d(m){m&&o(l)}}}function Wo(j){let l,Z="Examples:",m,b,w;return b=new Je({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9TRFBpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGV4cG9ydF90b192aWRlbyUwQSUwQXBpcGUlMjAlM0QlMjBUZXh0VG9WaWRlb1NEUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmRhbW8tdmlsYWIlMkZ0ZXh0LXRvLXZpZGVvLW1zLTEuN2IlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMkMlMjB2YXJpYW50JTNEJTIyZnAxNiUyMiUwQSklMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyU3BpZGVybWFuJTIwaXMlMjBzdXJmaW5nJTIyJTBBdmlkZW9fZnJhbWVzJTIwJTNEJTIwcGlwZShwcm9tcHQpLmZyYW1lcyU1QjAlNUQlMEF2aWRlb19wYXRoJTIwJTNEJTIwZXhwb3J0X3RvX3ZpZGVvKHZpZGVvX2ZyYW1lcyklMEF2aWRlb19wYXRo",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoSDPipeline | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| <span class="hljs-meta">>>> </span>pipe = TextToVideoSDPipeline.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"damo-vilab/text-to-video-ms-1.7b"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>pipe.enable_model_cpu_offload() | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"Spiderman is surfing"</span> | |
| <span class="hljs-meta">>>> </span>video_frames = pipe(prompt).frames[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>video_path = export_to_video(video_frames) | |
| <span class="hljs-meta">>>> </span>video_path`,wrap:!1}}),{c(){l=a("p"),l.textContent=Z,m=s(),f(b.$$.fragment)},l(c){l=r(c,"P",{"data-svelte-h":!0}),d(l)!=="svelte-kvfsh7"&&(l.textContent=Z),m=i(c),u(b.$$.fragment,c)},m(c,J){n(c,l,J),n(c,m,J),h(b,c,J),w=!0},p:jt,i(c){w||(g(b.$$.fragment,c),w=!0)},o(c){_(b.$$.fragment,c),w=!1},d(c){c&&(o(l),o(m)),v(b,c)}}}function Bo(j){let l,Z="Examples:",m,b,w;return b=new Je({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMkMlMjBEUE1Tb2x2ZXJNdWx0aXN0ZXBTY2hlZHVsZXIlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTBBJTBBcGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJjZXJzcGVuc2UlMkZ6ZXJvc2NvcGVfdjJfNTc2dyUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiklMEFwaXBlLnNjaGVkdWxlciUyMCUzRCUyMERQTVNvbHZlck11bHRpc3RlcFNjaGVkdWxlci5mcm9tX2NvbmZpZyhwaXBlLnNjaGVkdWxlci5jb25maWcpJTBBcGlwZS50byglMjJjdWRhJTIyKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMnNwaWRlcm1hbiUyMHJ1bm5pbmclMjBpbiUyMHRoZSUyMGRlc2VydCUyMiUwQXZpZGVvX2ZyYW1lcyUyMCUzRCUyMHBpcGUocHJvbXB0JTJDJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDQwJTJDJTIwaGVpZ2h0JTNEMzIwJTJDJTIwd2lkdGglM0Q1NzYlMkMlMjBudW1fZnJhbWVzJTNEMjQpLmZyYW1lcyU1QjAlNUQlMEElMjMlMjBzYWZlJTIwbG93LXJlcyUyMHZpZGVvJTBBdmlkZW9fcGF0aCUyMCUzRCUyMGV4cG9ydF90b192aWRlbyh2aWRlb19mcmFtZXMlMkMlMjBvdXRwdXRfdmlkZW9fcGF0aCUzRCUyMi4lMkZ2aWRlb181NzZfc3BpZGVybWFuLm1wNCUyMiklMEElMEElMjMlMjBsZXQncyUyMG9mZmxvYWQlMjB0aGUlMjB0ZXh0LXRvLWltYWdlJTIwbW9kZWwlMEFwaXBlLnRvKCUyMmNwdSUyMiklMEElMEElMjMlMjBhbmQlMjBsb2FkJTIwdGhlJTIwaW1hZ2UtdG8taW1hZ2UlMjBtb2RlbCUwQXBpcGUlMjAlM0QlMjBEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyY2Vyc3BlbnNlJTJGemVyb3Njb3BlX3YyX1hMJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTIwcmV2aXNpb24lM0QlMjJyZWZzJTJGcHIlMkYxNSUyMiUwQSklMEFwaXBlLnNjaGVkdWxlciUyMCUzRCUyMERQTVNvbHZlck11bHRpc3RlcFNjaGVkdWxlci5mcm9tX2NvbmZpZyhwaXBlLnNjaGVkdWxlci5jb25maWcpJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQSUwQSUyMyUyMFRoZSUyMFZBRSUyMGNvbnN1bWVzJTIwQSUyMExPVCUyMG9mJTIwbWVtb3J5JTJDJTIwbGV0J3MlMjBtYWtlJTIwc3VyZSUyMHdlJTIwcnVuJTIwaXQlMjBpbiUyMHNsaWNlZCUyMG1vZGUlMEFwaXBlLnZhZS5lbmFibGVfc2xpY2luZygpJTBBJTBBJTIzJTIwbm93JTIwbGV0J3MlMjB1cHNjYWxlJTIwaXQlMEF2aWRlbyUyMCUzRCUyMCU1QkltYWdlLmZyb21hcnJheShmcmFtZSkucmVzaXplKCgxMDI0JTJDJTIwNTc2KSklMjBmb3IlMjBmcmFtZSUyMGluJTIwdmlkZW9fZnJhbWVzJTVEJTBBJTBBJTIzJTIwYW5kJTIwZGVub2lzZSUyMGl0JTBBdmlkZW9fZnJhbWVzJTIwJTNEJTIwcGlwZShwcm9tcHQlMkMlMjB2aWRlbyUzRHZpZGVvJTJDJTIwc3RyZW5ndGglM0QwLjYpLmZyYW1lcyU1QjAlNUQlMEF2aWRlb19wYXRoJTIwJTNEJTIwZXhwb3J0X3RvX3ZpZGVvKHZpZGVvX2ZyYW1lcyUyQyUyMG91dHB1dF92aWRlb19wYXRoJTNEJTIyLiUyRnZpZGVvXzEwMjRfc3BpZGVybWFuLm1wNCUyMiklMEF2aWRlb19wYXRo",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline, DPMSolverMultistepScheduler | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| <span class="hljs-meta">>>> </span>pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"cerspense/zeroscope_v2_576w"</span>, torch_dtype=torch.float16) | |
| <span class="hljs-meta">>>> </span>pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"spiderman running in the desert"</span> | |
| <span class="hljs-meta">>>> </span>video_frames = pipe(prompt, num_inference_steps=<span class="hljs-number">40</span>, height=<span class="hljs-number">320</span>, width=<span class="hljs-number">576</span>, num_frames=<span class="hljs-number">24</span>).frames[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># safe low-res video</span> | |
| <span class="hljs-meta">>>> </span>video_path = export_to_video(video_frames, output_video_path=<span class="hljs-string">"./video_576_spiderman.mp4"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># let's offload the text-to-image model</span> | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cpu"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># and load the image-to-image model</span> | |
| <span class="hljs-meta">>>> </span>pipe = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"cerspense/zeroscope_v2_XL"</span>, torch_dtype=torch.float16, revision=<span class="hljs-string">"refs/pr/15"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) | |
| <span class="hljs-meta">>>> </span>pipe.enable_model_cpu_offload() | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># The VAE consumes A LOT of memory, let's make sure we run it in sliced mode</span> | |
| <span class="hljs-meta">>>> </span>pipe.vae.enable_slicing() | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># now let's upscale it</span> | |
| <span class="hljs-meta">>>> </span>video = [Image.fromarray(frame).resize((<span class="hljs-number">1024</span>, <span class="hljs-number">576</span>)) <span class="hljs-keyword">for</span> frame <span class="hljs-keyword">in</span> video_frames] | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># and denoise it</span> | |
| <span class="hljs-meta">>>> </span>video_frames = pipe(prompt, video=video, strength=<span class="hljs-number">0.6</span>).frames[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>video_path = export_to_video(video_frames, output_video_path=<span class="hljs-string">"./video_1024_spiderman.mp4"</span>) | |
| <span class="hljs-meta">>>> </span>video_path`,wrap:!1}}),{c(){l=a("p"),l.textContent=Z,m=s(),f(b.$$.fragment)},l(c){l=r(c,"P",{"data-svelte-h":!0}),d(l)!=="svelte-kvfsh7"&&(l.textContent=Z),m=i(c),u(b.$$.fragment,c)},m(c,J){n(c,l,J),n(c,m,J),h(b,c,J),w=!0},p:jt,i(c){w||(g(b.$$.fragment,c),w=!0)},o(c){_(b.$$.fragment,c),w=!1},d(c){c&&(o(l),o(m)),v(b,c)}}}function Xo(j){let l,Z,m,b,w,c,J,Ne,U,Qt='<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>',Ee,L,At='<a href="https://arxiv.org/abs/2308.06571" rel="nofollow">ModelScope Text-to-Video Technical Report</a> is by Jiuniu Wang, Hangjie Yuan, Dayou Chen, Yingya Zhang, Xiang Wang, Shiwei Zhang.',He,z,qt="The abstract from the paper is:",Ye,R,Ot='<em>This paper introduces ModelScopeT2V, a text-to-video synthesis model that evolves from a text-to-image synthesis model (i.e., Stable Diffusion). ModelScopeT2V incorporates spatio-temporal blocks to ensure consistent frame generation and smooth movement transitions. The model could adapt to varying frame numbers during training and inference, rendering it suitable for both image-text and video-text datasets. ModelScopeT2V brings together three components (i.e., VQGAN, a text encoder, and a denoising UNet), totally comprising 1.7 billion parameters, in which 0.5 billion parameters are dedicated to temporal capabilities. The model demonstrates superior performance over state-of-the-art methods across three evaluation metrics. The code and an online demo are available at <a href="https://modelscope.cn/models/damo/text-to-video-synthesis/summary" rel="nofollow">https://modelscope.cn/models/damo/text-to-video-synthesis/summary</a>.</em>',Fe,N,Kt='You can find additional information about Text-to-Video on the <a href="https://modelscope.cn/models/damo/text-to-video-synthesis/summary" rel="nofollow">project page</a>, <a href="https://github.com/modelscope/modelscope/" rel="nofollow">original codebase</a>, and try it out in a <a href="https://huggingface.co/spaces/damo-vilab/modelscope-text-to-video-synthesis" rel="nofollow">demo</a>. Official checkpoints can be found at <a href="https://huggingface.co/damo-vilab" rel="nofollow">damo-vilab</a> and <a href="https://huggingface.co/cerspense" rel="nofollow">cerspense</a>.',Qe,E,Ae,H,qe,Y,eo="Let’s start by generating a short video with the default length of 16 frames (2s at 8 fps):",Oe,F,Ke,Q,to=`Diffusers supports different optimization techniques to improve the latency | |
| and memory footprint of a pipeline. Since videos are often more memory-heavy than images, | |
| we can enable CPU offloading and VAE slicing to keep the memory footprint at bay.`,et,A,oo="Let’s generate a video of 8 seconds (64 frames) on the same GPU using CPU offloading and VAE slicing:",tt,q,ot,O,no="It just takes <strong>7 GBs of GPU memory</strong> to generate the 64 video frames using PyTorch 2.0, “fp16” precision and the techniques mentioned above.",nt,K,so="We can also use a different scheduler easily, using the same method we’d use for Stable Diffusion:",st,ee,it,te,io="Here are some sample outputs:",at,oe,ao=`<tbody><tr><td><center>An astronaut riding a horse. | |
| <br/> <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astr.gif" alt="An astronaut riding a horse." style="width: 300px;"/></center></td> <td><center>Darth vader surfing in waves. | |
| <br/> <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/vader.gif" alt="Darth vader surfing in waves." style="width: 300px;"/></center></td></tr></tbody>`,rt,ne,lt,se,ro=`Zeroscope are watermark-free model and have been trained on specific sizes such as <code>576x320</code> and <code>1024x576</code>. | |
| One should first generate a video using the lower resolution checkpoint <a href="https://huggingface.co/cerspense/zeroscope_v2_576w" rel="nofollow"><code>cerspense/zeroscope_v2_576w</code></a> with <a href="/docs/diffusers/pr_11415/en/api/pipelines/text_to_video#diffusers.TextToVideoSDPipeline">TextToVideoSDPipeline</a>, | |
| which can then be upscaled using <a href="/docs/diffusers/pr_11415/en/api/pipelines/text_to_video#diffusers.VideoToVideoSDPipeline">VideoToVideoSDPipeline</a> and <a href="https://huggingface.co/cerspense/zeroscope_v2_XL" rel="nofollow"><code>cerspense/zeroscope_v2_XL</code></a>.`,pt,ie,dt,ae,lo="Now the video can be upscaled:",ct,re,mt,le,po="Here are some sample outputs:",ft,pe,co=`<tbody><tr><td><center>Darth vader surfing in waves. | |
| <br/> <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/darthvader_cerpense.gif" alt="Darth vader surfing in waves." style="width: 576px;"/></center></td></tr></tbody>`,ut,de,ht,ce,mo="Video generation is memory-intensive and one way to reduce your memory usage is to set <code>enable_forward_chunking</code> on the pipeline’s UNet so you don’t run the entire feedforward layer at once. Breaking it up into chunks in a loop is more efficient.",gt,me,fo='Check out the <a href="text-img2vid">Text or image-to-video</a> guide for more details about how certain parameters can affect video generation and how to optimize inference by reducing memory usage.',_t,I,vt,fe,bt,T,ue,kt,Ve,uo="Pipeline for text-to-video generation.",$t,je,ho=`This model inherits from <a href="/docs/diffusers/pr_11415/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods | |
| implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,Dt,ke,go="The pipeline also inherits the following loading methods:",St,$e,_o='<li><a href="/docs/diffusers/pr_11415/en/api/loaders/textual_inversion#diffusers.loaders.TextualInversionLoaderMixin.load_textual_inversion">load_textual_inversion()</a> for loading textual inversion embeddings</li> <li><a href="/docs/diffusers/pr_11415/en/api/loaders/lora#diffusers.loaders.StableDiffusionLoraLoaderMixin.load_lora_weights">load_lora_weights()</a> for loading LoRA weights</li> <li><a href="/docs/diffusers/pr_11415/en/api/loaders/lora#diffusers.loaders.StableDiffusionLoraLoaderMixin.save_lora_weights">save_lora_weights()</a> for saving LoRA weights</li>',Ct,k,he,Ut,De,vo="The call function to the pipeline for generation.",It,G,Gt,P,ge,Pt,Se,bo="Encodes the prompt into text encoder hidden states.",Tt,_e,yt,y,ve,Wt,Ce,To="Pipeline for text-guided video-to-video generation.",Bt,Ue,yo=`This model inherits from <a href="/docs/diffusers/pr_11415/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods | |
| implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,Xt,Ie,wo="The pipeline also inherits the following loading methods:",Lt,Ge,xo='<li><a href="/docs/diffusers/pr_11415/en/api/loaders/textual_inversion#diffusers.loaders.TextualInversionLoaderMixin.load_textual_inversion">load_textual_inversion()</a> for loading textual inversion embeddings</li> <li><a href="/docs/diffusers/pr_11415/en/api/loaders/lora#diffusers.loaders.StableDiffusionLoraLoaderMixin.load_lora_weights">load_lora_weights()</a> for loading LoRA weights</li> <li><a href="/docs/diffusers/pr_11415/en/api/loaders/lora#diffusers.loaders.StableDiffusionLoraLoaderMixin.save_lora_weights">save_lora_weights()</a> for saving LoRA weights</li>',zt,$,be,Rt,Pe,Mo="The call function to the pipeline for generation.",Nt,W,Et,B,Te,Ht,We,Zo="Encodes the prompt into text encoder hidden states.",wt,ye,xt,V,we,Yt,Be,Jo="Output class for text-to-video pipelines.",Ft,Xe,Vo=`PIL image sequences of length <code>num_frames.</code> It can also be a NumPy array or Torch tensor of shape | |
| <code>(batch_size, num_frames, channels, height, width)</code>`,Mt,xe,Zt,Re,Jt;return w=new jo({props:{warning:!0,$$slots:{default:[Go]},$$scope:{ctx:j}}}),J=new X({props:{title:"Text-to-video",local:"text-to-video",headingTag:"h1"}}),E=new X({props:{title:"Usage example",local:"usage-example",headingTag:"h2"}}),H=new X({props:{title:"text-to-video-ms-1.7b",local:"text-to-video-ms-17b",headingTag:"h3"}}),F=new Je({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTBBJTBBcGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJkYW1vLXZpbGFiJTJGdGV4dC10by12aWRlby1tcy0xLjdiJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTIwdmFyaWFudCUzRCUyMmZwMTYlMjIpJTBBcGlwZSUyMCUzRCUyMHBpcGUudG8oJTIyY3VkYSUyMiklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJTcGlkZXJtYW4lMjBpcyUyMHN1cmZpbmclMjIlMEF2aWRlb19mcmFtZXMlMjAlM0QlMjBwaXBlKHByb21wdCkuZnJhbWVzJTVCMCU1RCUwQXZpZGVvX3BhdGglMjAlM0QlMjBleHBvcnRfdG9fdmlkZW8odmlkZW9fZnJhbWVzKSUwQXZpZGVvX3BhdGg=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"damo-vilab/text-to-video-ms-1.7b"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>) | |
| pipe = pipe.to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"Spiderman is surfing"</span> | |
| video_frames = pipe(prompt).frames[<span class="hljs-number">0</span>] | |
| video_path = export_to_video(video_frames) | |
| video_path`,wrap:!1}}),q=new Je({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTBBJTBBcGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJkYW1vLXZpbGFiJTJGdGV4dC10by12aWRlby1tcy0xLjdiJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTIwdmFyaWFudCUzRCUyMmZwMTYlMjIpJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQSUwQSUyMyUyMG1lbW9yeSUyMG9wdGltaXphdGlvbiUwQXBpcGUuZW5hYmxlX3ZhZV9zbGljaW5nKCklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJEYXJ0aCUyMFZhZGVyJTIwc3VyZmluZyUyMGElMjB3YXZlJTIyJTBBdmlkZW9fZnJhbWVzJTIwJTNEJTIwcGlwZShwcm9tcHQlMkMlMjBudW1fZnJhbWVzJTNENjQpLmZyYW1lcyU1QjAlNUQlMEF2aWRlb19wYXRoJTIwJTNEJTIwZXhwb3J0X3RvX3ZpZGVvKHZpZGVvX2ZyYW1lcyklMEF2aWRlb19wYXRo",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"damo-vilab/text-to-video-ms-1.7b"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>) | |
| pipe.enable_model_cpu_offload() | |
| <span class="hljs-comment"># memory optimization</span> | |
| pipe.enable_vae_slicing() | |
| prompt = <span class="hljs-string">"Darth Vader surfing a wave"</span> | |
| video_frames = pipe(prompt, num_frames=<span class="hljs-number">64</span>).frames[<span class="hljs-number">0</span>] | |
| video_path = export_to_video(video_frames) | |
| video_path`,wrap:!1}}),ee=new Je({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMkMlMjBEUE1Tb2x2ZXJNdWx0aXN0ZXBTY2hlZHVsZXIlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTBBJTBBcGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJkYW1vLXZpbGFiJTJGdGV4dC10by12aWRlby1tcy0xLjdiJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTIwdmFyaWFudCUzRCUyMmZwMTYlMjIpJTBBcGlwZS5zY2hlZHVsZXIlMjAlM0QlMjBEUE1Tb2x2ZXJNdWx0aXN0ZXBTY2hlZHVsZXIuZnJvbV9jb25maWcocGlwZS5zY2hlZHVsZXIuY29uZmlnKSUwQXBpcGUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKCklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJTcGlkZXJtYW4lMjBpcyUyMHN1cmZpbmclMjIlMEF2aWRlb19mcmFtZXMlMjAlM0QlMjBwaXBlKHByb21wdCUyQyUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0QyNSkuZnJhbWVzJTVCMCU1RCUwQXZpZGVvX3BhdGglMjAlM0QlMjBleHBvcnRfdG9fdmlkZW8odmlkZW9fZnJhbWVzKSUwQXZpZGVvX3BhdGg=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline, DPMSolverMultistepScheduler | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"damo-vilab/text-to-video-ms-1.7b"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>) | |
| pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) | |
| pipe.enable_model_cpu_offload() | |
| prompt = <span class="hljs-string">"Spiderman is surfing"</span> | |
| video_frames = pipe(prompt, num_inference_steps=<span class="hljs-number">25</span>).frames[<span class="hljs-number">0</span>] | |
| video_path = export_to_video(video_frames) | |
| video_path`,wrap:!1}}),ne=new X({props:{title:"cerspense/zeroscope_v2_576w & cerspense/zeroscope_v2_XL",local:"cerspensezeroscopev2576w--cerspensezeroscopev2xl",headingTag:"h3"}}),ie=new Je({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMkMlMjBEUE1Tb2x2ZXJNdWx0aXN0ZXBTY2hlZHVsZXIlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTBBZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBJTBBcGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJjZXJzcGVuc2UlMkZ6ZXJvc2NvcGVfdjJfNTc2dyUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiklMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBJTBBJTIzJTIwbWVtb3J5JTIwb3B0aW1pemF0aW9uJTBBcGlwZS51bmV0LmVuYWJsZV9mb3J3YXJkX2NodW5raW5nKGNodW5rX3NpemUlM0QxJTJDJTIwZGltJTNEMSklMEFwaXBlLmVuYWJsZV92YWVfc2xpY2luZygpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyRGFydGglMjBWYWRlciUyMHN1cmZpbmclMjBhJTIwd2F2ZSUyMiUwQXZpZGVvX2ZyYW1lcyUyMCUzRCUyMHBpcGUocHJvbXB0JTJDJTIwbnVtX2ZyYW1lcyUzRDI0KS5mcmFtZXMlNUIwJTVEJTBBdmlkZW9fcGF0aCUyMCUzRCUyMGV4cG9ydF90b192aWRlbyh2aWRlb19mcmFtZXMpJTBBdmlkZW9fcGF0aA==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline, DPMSolverMultistepScheduler | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| <span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"cerspense/zeroscope_v2_576w"</span>, torch_dtype=torch.float16) | |
| pipe.enable_model_cpu_offload() | |
| <span class="hljs-comment"># memory optimization</span> | |
| pipe.unet.enable_forward_chunking(chunk_size=<span class="hljs-number">1</span>, dim=<span class="hljs-number">1</span>) | |
| pipe.enable_vae_slicing() | |
| prompt = <span class="hljs-string">"Darth Vader surfing a wave"</span> | |
| video_frames = pipe(prompt, num_frames=<span class="hljs-number">24</span>).frames[<span class="hljs-number">0</span>] | |
| video_path = export_to_video(video_frames) | |
| video_path`,wrap:!1}}),re=new Je({props:{code:"cGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJjZXJzcGVuc2UlMkZ6ZXJvc2NvcGVfdjJfWEwlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpJTBBcGlwZS5zY2hlZHVsZXIlMjAlM0QlMjBEUE1Tb2x2ZXJNdWx0aXN0ZXBTY2hlZHVsZXIuZnJvbV9jb25maWcocGlwZS5zY2hlZHVsZXIuY29uZmlnKSUwQXBpcGUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKCklMEElMEElMjMlMjBtZW1vcnklMjBvcHRpbWl6YXRpb24lMEFwaXBlLnVuZXQuZW5hYmxlX2ZvcndhcmRfY2h1bmtpbmcoY2h1bmtfc2l6ZSUzRDElMkMlMjBkaW0lM0QxKSUwQXBpcGUuZW5hYmxlX3ZhZV9zbGljaW5nKCklMEElMEF2aWRlbyUyMCUzRCUyMCU1QkltYWdlLmZyb21hcnJheShmcmFtZSkucmVzaXplKCgxMDI0JTJDJTIwNTc2KSklMjBmb3IlMjBmcmFtZSUyMGluJTIwdmlkZW9fZnJhbWVzJTVEJTBBJTBBdmlkZW9fZnJhbWVzJTIwJTNEJTIwcGlwZShwcm9tcHQlMkMlMjB2aWRlbyUzRHZpZGVvJTJDJTIwc3RyZW5ndGglM0QwLjYpLmZyYW1lcyU1QjAlNUQlMEF2aWRlb19wYXRoJTIwJTNEJTIwZXhwb3J0X3RvX3ZpZGVvKHZpZGVvX2ZyYW1lcyklMEF2aWRlb19wYXRo",highlighted:`pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"cerspense/zeroscope_v2_XL"</span>, torch_dtype=torch.float16) | |
| pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) | |
| pipe.enable_model_cpu_offload() | |
| <span class="hljs-comment"># memory optimization</span> | |
| pipe.unet.enable_forward_chunking(chunk_size=<span class="hljs-number">1</span>, dim=<span class="hljs-number">1</span>) | |
| pipe.enable_vae_slicing() | |
| video = [Image.fromarray(frame).resize((<span class="hljs-number">1024</span>, <span class="hljs-number">576</span>)) <span class="hljs-keyword">for</span> frame <span class="hljs-keyword">in</span> video_frames] | |
| video_frames = pipe(prompt, video=video, strength=<span class="hljs-number">0.6</span>).frames[<span class="hljs-number">0</span>] | |
| video_path = export_to_video(video_frames) | |
| video_path`,wrap:!1}}),de=new X({props:{title:"Tips",local:"tips",headingTag:"h2"}}),I=new jo({props:{$$slots:{default:[Po]},$$scope:{ctx:j}}}),fe=new X({props:{title:"TextToVideoSDPipeline",local:"diffusers.TextToVideoSDPipeline",headingTag:"h2"}}),ue=new Ze({props:{name:"class diffusers.TextToVideoSDPipeline",anchor:"diffusers.TextToVideoSDPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": CLIPTextModel"},{name:"tokenizer",val:": CLIPTokenizer"},{name:"unet",val:": UNet3DConditionModel"},{name:"scheduler",val:": KarrasDiffusionSchedulers"}],parametersDescription:[{anchor:"diffusers.TextToVideoSDPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_11415/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.TextToVideoSDPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>CLIPTextModel</code>) — | |
| Frozen text-encoder (<a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a>).`,name:"text_encoder"},{anchor:"diffusers.TextToVideoSDPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) — | |
| A <a href="https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a> to tokenize text.`,name:"tokenizer"},{anchor:"diffusers.TextToVideoSDPipeline.unet",description:`<strong>unet</strong> (<a href="/docs/diffusers/pr_11415/en/api/models/unet3d-cond#diffusers.UNet3DConditionModel">UNet3DConditionModel</a>) — | |
| A <a href="/docs/diffusers/pr_11415/en/api/models/unet3d-cond#diffusers.UNet3DConditionModel">UNet3DConditionModel</a> to denoise the encoded video latents.`,name:"unet"},{anchor:"diffusers.TextToVideoSDPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_11415/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) — | |
| A scheduler to be used in combination with <code>unet</code> to denoise the encoded image latents. Can be one of | |
| <a href="/docs/diffusers/pr_11415/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, <a href="/docs/diffusers/pr_11415/en/api/schedulers/lms_discrete#diffusers.LMSDiscreteScheduler">LMSDiscreteScheduler</a>, or <a href="/docs/diffusers/pr_11415/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a>.`,name:"scheduler"}],source:"https://github.com/huggingface/diffusers/blob/vr_11415/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py#L70"}}),he=new Ze({props:{name:"__call__",anchor:"diffusers.TextToVideoSDPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"height",val:": typing.Optional[int] = None"},{name:"width",val:": typing.Optional[int] = None"},{name:"num_frames",val:": int = 16"},{name:"num_inference_steps",val:": int = 50"},{name:"guidance_scale",val:": float = 9.0"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'np'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": typing.Optional[typing.Callable[[int, int, torch.Tensor], NoneType]] = None"},{name:"callback_steps",val:": int = 1"},{name:"cross_attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoSDPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide image generation. If not defined, you need to pass <code>prompt_embeds</code>.`,name:"prompt"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>self.unet.config.sample_size * self.vae_scale_factor</code>) — | |
| The height in pixels of the generated video.`,name:"height"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>self.unet.config.sample_size * self.vae_scale_factor</code>) — | |
| The width in pixels of the generated video.`,name:"width"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, <em>optional</em>, defaults to 16) — | |
| The number of video frames that are generated. Defaults to 16 frames which at 8 frames per seconds | |
| amounts to 2 seconds of video.`,name:"num_frames"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality videos at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) — | |
| A higher guidance scale value encourages the model to generate images closely linked to the text | |
| <code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide what to not include in image generation. If not defined, you need to | |
| pass <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (<code>guidance_scale < 1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of images to generate per prompt.`,name:"num_images_per_prompt"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) from the <a href="https://arxiv.org/abs/2010.02502" rel="nofollow">DDIM</a> paper. Only applies | |
| to the <a href="/docs/diffusers/pr_11415/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, and is ignored in other schedulers.`,name:"eta"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor is generated by sampling using the supplied random <code>generator</code>. Latents should be of shape | |
| <code>(batch_size, num_channel, num_frames, height, width)</code>.`,name:"latents"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not | |
| provided, text embeddings are generated from the <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If | |
| not provided, <code>negative_prompt_embeds</code> are generated from the <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"np"</code>) — | |
| The output format of the generated video. Choose between <code>torch.Tensor</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <a href="/docs/diffusers/pr_11415/en/api/pipelines/text_to_video#diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput">TextToVideoSDPipelineOutput</a> instead | |
| of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow"><code>self.processor</code></a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_11415/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py#L444",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <a | |
| href="/docs/diffusers/pr_11415/en/api/pipelines/text_to_video#diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput" | |
| >TextToVideoSDPipelineOutput</a> is | |
| returned, otherwise a <code>tuple</code> is returned where the first element is a list with the generated frames.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/diffusers/pr_11415/en/api/pipelines/text_to_video#diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput" | |
| >TextToVideoSDPipelineOutput</a> or <code>tuple</code></p> | |
| `}}),G=new ko({props:{anchor:"diffusers.TextToVideoSDPipeline.__call__.example",$$slots:{default:[Wo]},$$scope:{ctx:j}}}),ge=new Ze({props:{name:"encode_prompt",anchor:"diffusers.TextToVideoSDPipeline.encode_prompt",parameters:[{name:"prompt",val:""},{name:"device",val:""},{name:"num_images_per_prompt",val:""},{name:"do_classifier_free_guidance",val:""},{name:"negative_prompt",val:" = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"lora_scale",val:": typing.Optional[float] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>): | |
| torch device`,name:"device"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.lora_scale",description:`<strong>lora_scale</strong> (<code>float</code>, <em>optional</em>) — | |
| A LoRA scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.`,name:"lora_scale"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_11415/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py#L154"}}),_e=new X({props:{title:"VideoToVideoSDPipeline",local:"diffusers.VideoToVideoSDPipeline",headingTag:"h2"}}),ve=new Ze({props:{name:"class diffusers.VideoToVideoSDPipeline",anchor:"diffusers.VideoToVideoSDPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": CLIPTextModel"},{name:"tokenizer",val:": CLIPTokenizer"},{name:"unet",val:": UNet3DConditionModel"},{name:"scheduler",val:": KarrasDiffusionSchedulers"}],parametersDescription:[{anchor:"diffusers.VideoToVideoSDPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_11415/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode videos to and from latent representations.`,name:"vae"},{anchor:"diffusers.VideoToVideoSDPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>CLIPTextModel</code>) — | |
| Frozen text-encoder (<a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a>).`,name:"text_encoder"},{anchor:"diffusers.VideoToVideoSDPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) — | |
| A <a href="https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a> to tokenize text.`,name:"tokenizer"},{anchor:"diffusers.VideoToVideoSDPipeline.unet",description:`<strong>unet</strong> (<a href="/docs/diffusers/pr_11415/en/api/models/unet3d-cond#diffusers.UNet3DConditionModel">UNet3DConditionModel</a>) — | |
| A <a href="/docs/diffusers/pr_11415/en/api/models/unet3d-cond#diffusers.UNet3DConditionModel">UNet3DConditionModel</a> to denoise the encoded video latents.`,name:"unet"},{anchor:"diffusers.VideoToVideoSDPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_11415/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) — | |
| A scheduler to be used in combination with <code>unet</code> to denoise the encoded image latents. Can be one of | |
| <a href="/docs/diffusers/pr_11415/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, <a href="/docs/diffusers/pr_11415/en/api/schedulers/lms_discrete#diffusers.LMSDiscreteScheduler">LMSDiscreteScheduler</a>, or <a href="/docs/diffusers/pr_11415/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a>.`,name:"scheduler"}],source:"https://github.com/huggingface/diffusers/blob/vr_11415/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py#L105"}}),be=new Ze({props:{name:"__call__",anchor:"diffusers.VideoToVideoSDPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"video",val:": typing.Union[typing.List[numpy.ndarray], torch.Tensor] = None"},{name:"strength",val:": float = 0.6"},{name:"num_inference_steps",val:": int = 50"},{name:"guidance_scale",val:": float = 15.0"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'np'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": typing.Optional[typing.Callable[[int, int, torch.Tensor], NoneType]] = None"},{name:"callback_steps",val:": int = 1"},{name:"cross_attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.VideoToVideoSDPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide image generation. If not defined, you need to pass <code>prompt_embeds</code>.`,name:"prompt"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.video",description:`<strong>video</strong> (<code>List[np.ndarray]</code> or <code>torch.Tensor</code>) — | |
| <code>video</code> frames or tensor representing a video batch to be used as the starting point for the process. | |
| Can also accept video latents as <code>image</code>, if passing latents directly, it will not be encoded again.`,name:"video"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.strength",description:`<strong>strength</strong> (<code>float</code>, <em>optional</em>, defaults to 0.8) — | |
| Indicates extent to transform the reference <code>video</code>. Must be between 0 and 1. <code>video</code> is used as a | |
| starting point, adding more noise to it the larger the <code>strength</code>. The number of denoising steps | |
| depends on the amount of noise initially added. When <code>strength</code> is 1, added noise is maximum and the | |
| denoising process runs for the full number of iterations specified in <code>num_inference_steps</code>. A value of | |
| 1 essentially ignores <code>video</code>.`,name:"strength"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality videos at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) — | |
| A higher guidance scale value encourages the model to generate images closely linked to the text | |
| <code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide what to not include in video generation. If not defined, you need to | |
| pass <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (<code>guidance_scale < 1</code>).`,name:"negative_prompt"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) from the <a href="https://arxiv.org/abs/2010.02502" rel="nofollow">DDIM</a> paper. Only applies | |
| to the <a href="/docs/diffusers/pr_11415/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, and is ignored in other schedulers.`,name:"eta"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor is generated by sampling using the supplied random <code>generator</code>. Latents should be of shape | |
| <code>(batch_size, num_channel, num_frames, height, width)</code>.`,name:"latents"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not | |
| provided, text embeddings are generated from the <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If | |
| not provided, <code>negative_prompt_embeds</code> are generated from the <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"np"</code>) — | |
| The output format of the generated video. Choose between <code>torch.Tensor</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <a href="/docs/diffusers/pr_11415/en/api/pipelines/text_to_video#diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput">TextToVideoSDPipelineOutput</a> instead | |
| of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step.`,name:"callback_steps"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow"><code>self.processor</code></a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_11415/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py#L509",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <a | |
| href="/docs/diffusers/pr_11415/en/api/pipelines/text_to_video#diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput" | |
| >TextToVideoSDPipelineOutput</a> is | |
| returned, otherwise a <code>tuple</code> is returned where the first element is a list with the generated frames.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/diffusers/pr_11415/en/api/pipelines/text_to_video#diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput" | |
| >TextToVideoSDPipelineOutput</a> or <code>tuple</code></p> | |
| `}}),W=new ko({props:{anchor:"diffusers.VideoToVideoSDPipeline.__call__.example",$$slots:{default:[Bo]},$$scope:{ctx:j}}}),Te=new Ze({props:{name:"encode_prompt",anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt",parameters:[{name:"prompt",val:""},{name:"device",val:""},{name:"num_images_per_prompt",val:""},{name:"do_classifier_free_guidance",val:""},{name:"negative_prompt",val:" = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"lora_scale",val:": typing.Optional[float] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>): | |
| torch device`,name:"device"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.lora_scale",description:`<strong>lora_scale</strong> (<code>float</code>, <em>optional</em>) — | |
| A LoRA scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.`,name:"lora_scale"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_11415/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py#L189"}}),ye=new X({props:{title:"TextToVideoSDPipelineOutput",local:"diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput",headingTag:"h2"}}),we=new Ze({props:{name:"class diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput",anchor:"diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput",parameters:[{name:"frames",val:": typing.Union[torch.Tensor, numpy.ndarray, typing.List[typing.List[PIL.Image.Image]]]"}],parametersDescription:[{anchor:"diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput.frames",description:`<strong>frames</strong> (<code>torch.Tensor</code>, <code>np.ndarray</code>, or List[List[PIL.Image.Image]]) — | |
| List of video outputs - It can be a nested list of length <code>batch_size,</code> with each sub-list containing | |
| denoised`,name:"frames"}],source:"https://github.com/huggingface/diffusers/blob/vr_11415/src/diffusers/pipelines/text_to_video_synthesis/pipeline_output.py#L13"}}),xe=new Io({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/text_to_video.md"}}),{c(){l=a("meta"),Z=s(),m=a("p"),b=s(),f(w.$$.fragment),c=s(),f(J.$$.fragment),Ne=s(),U=a("div"),U.innerHTML=Qt,Ee=s(),L=a("p"),L.innerHTML=At,He=s(),z=a("p"),z.textContent=qt,Ye=s(),R=a("p"),R.innerHTML=Ot,Fe=s(),N=a("p"),N.innerHTML=Kt,Qe=s(),f(E.$$.fragment),Ae=s(),f(H.$$.fragment),qe=s(),Y=a("p"),Y.textContent=eo,Oe=s(),f(F.$$.fragment),Ke=s(),Q=a("p"),Q.textContent=to,et=s(),A=a("p"),A.textContent=oo,tt=s(),f(q.$$.fragment),ot=s(),O=a("p"),O.innerHTML=no,nt=s(),K=a("p"),K.textContent=so,st=s(),f(ee.$$.fragment),it=s(),te=a("p"),te.textContent=io,at=s(),oe=a("table"),oe.innerHTML=ao,rt=s(),f(ne.$$.fragment),lt=s(),se=a("p"),se.innerHTML=ro,pt=s(),f(ie.$$.fragment),dt=s(),ae=a("p"),ae.textContent=lo,ct=s(),f(re.$$.fragment),mt=s(),le=a("p"),le.textContent=po,ft=s(),pe=a("table"),pe.innerHTML=co,ut=s(),f(de.$$.fragment),ht=s(),ce=a("p"),ce.innerHTML=mo,gt=s(),me=a("p"),me.innerHTML=fo,_t=s(),f(I.$$.fragment),vt=s(),f(fe.$$.fragment),bt=s(),T=a("div"),f(ue.$$.fragment),kt=s(),Ve=a("p"),Ve.textContent=uo,$t=s(),je=a("p"),je.innerHTML=ho,Dt=s(),ke=a("p"),ke.textContent=go,St=s(),$e=a("ul"),$e.innerHTML=_o,Ct=s(),k=a("div"),f(he.$$.fragment),Ut=s(),De=a("p"),De.textContent=vo,It=s(),f(G.$$.fragment),Gt=s(),P=a("div"),f(ge.$$.fragment),Pt=s(),Se=a("p"),Se.textContent=bo,Tt=s(),f(_e.$$.fragment),yt=s(),y=a("div"),f(ve.$$.fragment),Wt=s(),Ce=a("p"),Ce.textContent=To,Bt=s(),Ue=a("p"),Ue.innerHTML=yo,Xt=s(),Ie=a("p"),Ie.textContent=wo,Lt=s(),Ge=a("ul"),Ge.innerHTML=xo,zt=s(),$=a("div"),f(be.$$.fragment),Rt=s(),Pe=a("p"),Pe.textContent=Mo,Nt=s(),f(W.$$.fragment),Et=s(),B=a("div"),f(Te.$$.fragment),Ht=s(),We=a("p"),We.textContent=Zo,wt=s(),f(ye.$$.fragment),xt=s(),V=a("div"),f(we.$$.fragment),Yt=s(),Be=a("p"),Be.textContent=Jo,Ft=s(),Xe=a("p"),Xe.innerHTML=Vo,Mt=s(),f(xe.$$.fragment),Zt=s(),Re=a("p"),this.h()},l(e){const t=Uo("svelte-u9bgzb",document.head);l=r(t,"META",{name:!0,content:!0}),t.forEach(o),Z=i(e),m=r(e,"P",{}),C(m).forEach(o),b=i(e),u(w.$$.fragment,e),c=i(e),u(J.$$.fragment,e),Ne=i(e),U=r(e,"DIV",{class:!0,"data-svelte-h":!0}),d(U)!=="svelte-si9ct8"&&(U.innerHTML=Qt),Ee=i(e),L=r(e,"P",{"data-svelte-h":!0}),d(L)!=="svelte-17trsey"&&(L.innerHTML=At),He=i(e),z=r(e,"P",{"data-svelte-h":!0}),d(z)!=="svelte-1cwsb16"&&(z.textContent=qt),Ye=i(e),R=r(e,"P",{"data-svelte-h":!0}),d(R)!=="svelte-1ovx0um"&&(R.innerHTML=Ot),Fe=i(e),N=r(e,"P",{"data-svelte-h":!0}),d(N)!=="svelte-9292p9"&&(N.innerHTML=Kt),Qe=i(e),u(E.$$.fragment,e),Ae=i(e),u(H.$$.fragment,e),qe=i(e),Y=r(e,"P",{"data-svelte-h":!0}),d(Y)!=="svelte-a4ktac"&&(Y.textContent=eo),Oe=i(e),u(F.$$.fragment,e),Ke=i(e),Q=r(e,"P",{"data-svelte-h":!0}),d(Q)!=="svelte-1gwbshq"&&(Q.textContent=to),et=i(e),A=r(e,"P",{"data-svelte-h":!0}),d(A)!=="svelte-yoc5w4"&&(A.textContent=oo),tt=i(e),u(q.$$.fragment,e),ot=i(e),O=r(e,"P",{"data-svelte-h":!0}),d(O)!=="svelte-m9rmsr"&&(O.innerHTML=no),nt=i(e),K=r(e,"P",{"data-svelte-h":!0}),d(K)!=="svelte-1yxs7ur"&&(K.textContent=so),st=i(e),u(ee.$$.fragment,e),it=i(e),te=r(e,"P",{"data-svelte-h":!0}),d(te)!=="svelte-1pxzje"&&(te.textContent=io),at=i(e),oe=r(e,"TABLE",{"data-svelte-h":!0}),d(oe)!=="svelte-5s26g4"&&(oe.innerHTML=ao),rt=i(e),u(ne.$$.fragment,e),lt=i(e),se=r(e,"P",{"data-svelte-h":!0}),d(se)!=="svelte-1iu9mly"&&(se.innerHTML=ro),pt=i(e),u(ie.$$.fragment,e),dt=i(e),ae=r(e,"P",{"data-svelte-h":!0}),d(ae)!=="svelte-4ywac8"&&(ae.textContent=lo),ct=i(e),u(re.$$.fragment,e),mt=i(e),le=r(e,"P",{"data-svelte-h":!0}),d(le)!=="svelte-1pxzje"&&(le.textContent=po),ft=i(e),pe=r(e,"TABLE",{"data-svelte-h":!0}),d(pe)!=="svelte-9iw63g"&&(pe.innerHTML=co),ut=i(e),u(de.$$.fragment,e),ht=i(e),ce=r(e,"P",{"data-svelte-h":!0}),d(ce)!=="svelte-op8wgu"&&(ce.innerHTML=mo),gt=i(e),me=r(e,"P",{"data-svelte-h":!0}),d(me)!=="svelte-1lb9rti"&&(me.innerHTML=fo),_t=i(e),u(I.$$.fragment,e),vt=i(e),u(fe.$$.fragment,e),bt=i(e),T=r(e,"DIV",{class:!0});var x=C(T);u(ue.$$.fragment,x),kt=i(x),Ve=r(x,"P",{"data-svelte-h":!0}),d(Ve)!=="svelte-19ipoo4"&&(Ve.textContent=uo),$t=i(x),je=r(x,"P",{"data-svelte-h":!0}),d(je)!=="svelte-18o0u34"&&(je.innerHTML=ho),Dt=i(x),ke=r(x,"P",{"data-svelte-h":!0}),d(ke)!=="svelte-14s6m4u"&&(ke.textContent=go),St=i(x),$e=r(x,"UL",{"data-svelte-h":!0}),d($e)!=="svelte-187uv42"&&($e.innerHTML=_o),Ct=i(x),k=r(x,"DIV",{class:!0});var S=C(k);u(he.$$.fragment,S),Ut=i(S),De=r(S,"P",{"data-svelte-h":!0}),d(De)!=="svelte-50j04k"&&(De.textContent=vo),It=i(S),u(G.$$.fragment,S),S.forEach(o),Gt=i(x),P=r(x,"DIV",{class:!0});var Me=C(P);u(ge.$$.fragment,Me),Pt=i(Me),Se=r(Me,"P",{"data-svelte-h":!0}),d(Se)!=="svelte-16q0ax1"&&(Se.textContent=bo),Me.forEach(o),x.forEach(o),Tt=i(e),u(_e.$$.fragment,e),yt=i(e),y=r(e,"DIV",{class:!0});var M=C(y);u(ve.$$.fragment,M),Wt=i(M),Ce=r(M,"P",{"data-svelte-h":!0}),d(Ce)!=="svelte-1oyadpc"&&(Ce.textContent=To),Bt=i(M),Ue=r(M,"P",{"data-svelte-h":!0}),d(Ue)!=="svelte-18o0u34"&&(Ue.innerHTML=yo),Xt=i(M),Ie=r(M,"P",{"data-svelte-h":!0}),d(Ie)!=="svelte-14s6m4u"&&(Ie.textContent=wo),Lt=i(M),Ge=r(M,"UL",{"data-svelte-h":!0}),d(Ge)!=="svelte-187uv42"&&(Ge.innerHTML=xo),zt=i(M),$=r(M,"DIV",{class:!0});var Le=C($);u(be.$$.fragment,Le),Rt=i(Le),Pe=r(Le,"P",{"data-svelte-h":!0}),d(Pe)!=="svelte-50j04k"&&(Pe.textContent=Mo),Nt=i(Le),u(W.$$.fragment,Le),Le.forEach(o),Et=i(M),B=r(M,"DIV",{class:!0});var Vt=C(B);u(Te.$$.fragment,Vt),Ht=i(Vt),We=r(Vt,"P",{"data-svelte-h":!0}),d(We)!=="svelte-16q0ax1"&&(We.textContent=Zo),Vt.forEach(o),M.forEach(o),wt=i(e),u(ye.$$.fragment,e),xt=i(e),V=r(e,"DIV",{class:!0});var ze=C(V);u(we.$$.fragment,ze),Yt=i(ze),Be=r(ze,"P",{"data-svelte-h":!0}),d(Be)!=="svelte-1uuckb2"&&(Be.textContent=Jo),Ft=i(ze),Xe=r(ze,"P",{"data-svelte-h":!0}),d(Xe)!=="svelte-gk6g69"&&(Xe.innerHTML=Vo),ze.forEach(o),Mt=i(e),u(xe.$$.fragment,e),Zt=i(e),Re=r(e,"P",{}),C(Re).forEach(o),this.h()},h(){D(l,"name","hf:doc:metadata"),D(l,"content",Lo),D(U,"class","flex flex-wrap space-x-1"),D(k,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),D(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),D(T,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),D($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),D(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),D(y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),D(V,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){p(document.head,l),n(e,Z,t),n(e,m,t),n(e,b,t),h(w,e,t),n(e,c,t),h(J,e,t),n(e,Ne,t),n(e,U,t),n(e,Ee,t),n(e,L,t),n(e,He,t),n(e,z,t),n(e,Ye,t),n(e,R,t),n(e,Fe,t),n(e,N,t),n(e,Qe,t),h(E,e,t),n(e,Ae,t),h(H,e,t),n(e,qe,t),n(e,Y,t),n(e,Oe,t),h(F,e,t),n(e,Ke,t),n(e,Q,t),n(e,et,t),n(e,A,t),n(e,tt,t),h(q,e,t),n(e,ot,t),n(e,O,t),n(e,nt,t),n(e,K,t),n(e,st,t),h(ee,e,t),n(e,it,t),n(e,te,t),n(e,at,t),n(e,oe,t),n(e,rt,t),h(ne,e,t),n(e,lt,t),n(e,se,t),n(e,pt,t),h(ie,e,t),n(e,dt,t),n(e,ae,t),n(e,ct,t),h(re,e,t),n(e,mt,t),n(e,le,t),n(e,ft,t),n(e,pe,t),n(e,ut,t),h(de,e,t),n(e,ht,t),n(e,ce,t),n(e,gt,t),n(e,me,t),n(e,_t,t),h(I,e,t),n(e,vt,t),h(fe,e,t),n(e,bt,t),n(e,T,t),h(ue,T,null),p(T,kt),p(T,Ve),p(T,$t),p(T,je),p(T,Dt),p(T,ke),p(T,St),p(T,$e),p(T,Ct),p(T,k),h(he,k,null),p(k,Ut),p(k,De),p(k,It),h(G,k,null),p(T,Gt),p(T,P),h(ge,P,null),p(P,Pt),p(P,Se),n(e,Tt,t),h(_e,e,t),n(e,yt,t),n(e,y,t),h(ve,y,null),p(y,Wt),p(y,Ce),p(y,Bt),p(y,Ue),p(y,Xt),p(y,Ie),p(y,Lt),p(y,Ge),p(y,zt),p(y,$),h(be,$,null),p($,Rt),p($,Pe),p($,Nt),h(W,$,null),p(y,Et),p(y,B),h(Te,B,null),p(B,Ht),p(B,We),n(e,wt,t),h(ye,e,t),n(e,xt,t),n(e,V,t),h(we,V,null),p(V,Yt),p(V,Be),p(V,Ft),p(V,Xe),n(e,Mt,t),h(xe,e,t),n(e,Zt,t),n(e,Re,t),Jt=!0},p(e,[t]){const x={};t&2&&(x.$$scope={dirty:t,ctx:e}),w.$set(x);const S={};t&2&&(S.$$scope={dirty:t,ctx:e}),I.$set(S);const Me={};t&2&&(Me.$$scope={dirty:t,ctx:e}),G.$set(Me);const M={};t&2&&(M.$$scope={dirty:t,ctx:e}),W.$set(M)},i(e){Jt||(g(w.$$.fragment,e),g(J.$$.fragment,e),g(E.$$.fragment,e),g(H.$$.fragment,e),g(F.$$.fragment,e),g(q.$$.fragment,e),g(ee.$$.fragment,e),g(ne.$$.fragment,e),g(ie.$$.fragment,e),g(re.$$.fragment,e),g(de.$$.fragment,e),g(I.$$.fragment,e),g(fe.$$.fragment,e),g(ue.$$.fragment,e),g(he.$$.fragment,e),g(G.$$.fragment,e),g(ge.$$.fragment,e),g(_e.$$.fragment,e),g(ve.$$.fragment,e),g(be.$$.fragment,e),g(W.$$.fragment,e),g(Te.$$.fragment,e),g(ye.$$.fragment,e),g(we.$$.fragment,e),g(xe.$$.fragment,e),Jt=!0)},o(e){_(w.$$.fragment,e),_(J.$$.fragment,e),_(E.$$.fragment,e),_(H.$$.fragment,e),_(F.$$.fragment,e),_(q.$$.fragment,e),_(ee.$$.fragment,e),_(ne.$$.fragment,e),_(ie.$$.fragment,e),_(re.$$.fragment,e),_(de.$$.fragment,e),_(I.$$.fragment,e),_(fe.$$.fragment,e),_(ue.$$.fragment,e),_(he.$$.fragment,e),_(G.$$.fragment,e),_(ge.$$.fragment,e),_(_e.$$.fragment,e),_(ve.$$.fragment,e),_(be.$$.fragment,e),_(W.$$.fragment,e),_(Te.$$.fragment,e),_(ye.$$.fragment,e),_(we.$$.fragment,e),_(xe.$$.fragment,e),Jt=!1},d(e){e&&(o(Z),o(m),o(b),o(c),o(Ne),o(U),o(Ee),o(L),o(He),o(z),o(Ye),o(R),o(Fe),o(N),o(Qe),o(Ae),o(qe),o(Y),o(Oe),o(Ke),o(Q),o(et),o(A),o(tt),o(ot),o(O),o(nt),o(K),o(st),o(it),o(te),o(at),o(oe),o(rt),o(lt),o(se),o(pt),o(dt),o(ae),o(ct),o(mt),o(le),o(ft),o(pe),o(ut),o(ht),o(ce),o(gt),o(me),o(_t),o(vt),o(bt),o(T),o(Tt),o(yt),o(y),o(wt),o(xt),o(V),o(Mt),o(Zt),o(Re)),o(l),v(w,e),v(J,e),v(E,e),v(H,e),v(F,e),v(q,e),v(ee,e),v(ne,e),v(ie,e),v(re,e),v(de,e),v(I,e),v(fe,e),v(ue),v(he),v(G),v(ge),v(_e,e),v(ve),v(be),v(W),v(Te),v(ye,e),v(we),v(xe,e)}}}const Lo='{"title":"Text-to-video","local":"text-to-video","sections":[{"title":"Usage example","local":"usage-example","sections":[{"title":"text-to-video-ms-1.7b","local":"text-to-video-ms-17b","sections":[],"depth":3},{"title":"cerspense/zeroscope_v2_576w & cerspense/zeroscope_v2_XL","local":"cerspensezeroscopev2576w--cerspensezeroscopev2xl","sections":[],"depth":3}],"depth":2},{"title":"Tips","local":"tips","sections":[],"depth":2},{"title":"TextToVideoSDPipeline","local":"diffusers.TextToVideoSDPipeline","sections":[],"depth":2},{"title":"VideoToVideoSDPipeline","local":"diffusers.VideoToVideoSDPipeline","sections":[],"depth":2},{"title":"TextToVideoSDPipelineOutput","local":"diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput","sections":[],"depth":2}],"depth":1}';function zo(j){return Do(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ao extends So{constructor(l){super(),Co(this,l,zo,Xo,$o,{})}}export{Ao as component}; | |
Xet Storage Details
- Size:
- 68.7 kB
- Xet hash:
- bde94553d16d204497862e9049efbc6802fd6ac9d669c98e3a2a180f5590ffed
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.