Buckets:

hf-doc-build
/

doc

Files

xet

hf-doc-build/doc / diffusers /v0.25.0 /pt /using-diffusers /svd.html

rtrm

26 days ago

download

raw

23.5 kB

	<!-- META HERE --><meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"local":"stable-video-diffusion","sections":[{"local":"image-to-video-generation","sections":[{"local":"torchcompile","title":"Torch.compile"},{"local":"lowmemory","title":"Low-memory"},{"local":"microconditioning","title":"Micro-conditioning"}],"title":"Image to Video Generation"}],"title":"Stable Video Diffusion"}">
	<link href="/docs/diffusers/v0.25.0/pt/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/diffusers/v0.25.0/pt/_app/immutable/entry/start.66168f11.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.25.0/pt/_app/immutable/chunks/scheduler.182ea377.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.25.0/pt/_app/immutable/chunks/singletons.dd1a9a70.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.25.0/pt/_app/immutable/chunks/index.1f6d62f6.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.25.0/pt/_app/immutable/chunks/paths.50799595.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.25.0/pt/_app/immutable/entry/app.ca5b0bd9.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.25.0/pt/_app/immutable/chunks/index.008d68e4.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.25.0/pt/_app/immutable/nodes/0.fb7f344b.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.25.0/pt/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.25.0/pt/_app/immutable/nodes/182.c8082aaa.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.25.0/pt/_app/immutable/chunks/Tip.4f096367.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.25.0/pt/_app/immutable/chunks/IconCopyLink.96bbb92b.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.25.0/pt/_app/immutable/chunks/CodeBlock.5ed6eb7b.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.25.0/pt/_app/immutable/chunks/DocNotebookDropdown.bb388256.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.25.0/pt/_app/immutable/chunks/globals.7f7f1b26.js"><!-- HEAD_svelte-1phssyn_START --><!-- HEAD_svelte-1phssyn_END --> <h1 class="relative group"><a id="stable-video-diffusion" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#stable-video-diffusion"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span data-svelte-h="svelte-dwyljo">Stable Video Diffusion</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <p data-svelte-h="svelte-1vbef9r"><a href="https://static1.squarespace.com/static/6213c340453c3f502425776e/t/655ce779b9d47d342a93c890/1700587395994/stable_video_diffusion.pdf" rel="nofollow">Stable Video Diffusion</a> is a powerful image-to-video generation model that can generate high resolution (576x1024) 2-4 second videos conditioned on the input image.</p> <p data-svelte-h="svelte-xwqusf">This guide will show you how to use SVD to short generate videos from images.</p> <p data-svelte-h="svelte-cwruts">Before you begin, make sure you have the following libraries installed:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre><!-- HTML_TAG_START -->!pip install -q -U diffusers transformers accelerate <!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="image-to-video-generation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#image-to-video-generation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span data-svelte-h="svelte-qnqgpi">Image to Video Generation</span></h2> <p data-svelte-h="svelte-d5kpr9">The are two variants of SVD. <a href="https://huggingface.co/stabilityai/stable-video-diffusion-img2vid" rel="nofollow">SVD</a>
	and <a href="https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt" rel="nofollow">SVD-XT</a>. The svd checkpoint is trained to generate 14 frames and the svd-xt checkpoint is further
	finetuned to generate 25 frames.</p> <p data-svelte-h="svelte-ovrr6i">We will use the <code>svd-xt</code> checkpoint for this guide.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch

	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableVideoDiffusionPipeline
	<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, export_to_video

	pipe = StableVideoDiffusionPipeline.from_pretrained(
	<span class="hljs-string">"stabilityai/stable-video-diffusion-img2vid-xt"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>
	)
	pipe.enable_model_cpu_offload()

	<span class="hljs-comment"># Load the conditioning image</span>
	image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png"</span>)
	image = image.resize((<span class="hljs-number">1024</span>, <span class="hljs-number">576</span>))

	generator = torch.manual_seed(<span class="hljs-number">42</span>)
	frames = pipe(image, decode_chunk_size=<span class="hljs-number">8</span>, generator=generator).frames[<span class="hljs-number">0</span>]

	export_to_video(frames, <span class="hljs-string">"generated.mp4"</span>, fps=<span class="hljs-number">7</span>)<!-- HTML_TAG_END --></pre></div> <video controls="" width="1024" height="576" data-svelte-h="svelte-etsswh"><source src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket_generated.webm" type="video/webm"><source src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket_generated.mp4" type="video/mp4"></video> <table data-svelte-h="svelte-n37d2y"><thead><tr><th align="center"><strong>Source Image</strong></th> <th align="center"><strong>Video</strong></th></tr></thead> <tbody><tr><td align="center"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png"></td> <td align="center"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/output_rocket.gif"></td></tr></tbody></table> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400">Since generating videos is more memory intensive we can use the `decode_chunk_size` argument to control how many frames are decoded at once. This will reduce the memory usage. It's recommended to tweak this value based on your GPU memory.
	Setting `decode_chunk_size=1` will decode one frame at a time and will use the least amount of memory but the video might have some flickering.
	<p data-svelte-h="svelte-dumqni">Additionally, we also use <a href="../../optimization/memory#model-offloading">model cpu offloading</a> to reduce the memory usage.</p></div> <h3 class="relative group"><a id="torchcompile" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#torchcompile"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span data-svelte-h="svelte-16x0yxq">Torch.compile</span></h3> <p data-svelte-h="svelte-1avdrs5">You can achieve a 20-25% speed-up at the expense of slightly increased memory by compiling the UNet as follows:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre><!-- HTML_TAG_START --><span class="hljs-deletion">- pipe.enable_model_cpu_offload()</span>
	<span class="hljs-addition">+ pipe.to("cuda")</span>
	<span class="hljs-addition">+ pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="lowmemory" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#lowmemory"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span data-svelte-h="svelte-1pksizj">Low-memory</span></h3> <p data-svelte-h="svelte-19vtt0c">Video generation is very memory intensive as we have to essentially generate <code>num_frames</code> all at once. The mechanism is very comparable to text-to-image generation with a high batch size. To reduce the memory requirement you have multiple options. The following options trade inference speed against lower memory requirement:</p> <ul data-svelte-h="svelte-71ertv"><li>enable model offloading: Each component of the pipeline is offloaded to CPU once it’s not needed anymore.</li> <li>enable feed-forward chunking: The feed-forward layer runs in a loop instead of running with a single huge feed-forward batch size</li> <li>reduce <code>decode_chunk_size</code>: This means that the VAE decodes frames in chunks instead of decoding them all together. <strong>Note</strong>: In addition to leading to a small slowdown, this method also slightly leads to video quality deterioration</li></ul> <p data-svelte-h="svelte-xt0ph2">You can enable them as follows:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre><!-- HTML_TAG_START --><span class="hljs-deletion">-pipe.enable_model_cpu_offload()</span>
	<span class="hljs-deletion">-frames = pipe(image, decode_chunk_size=8, generator=generator).frames[0]</span>
	<span class="hljs-addition">+pipe.enable_model_cpu_offload()</span>
	<span class="hljs-addition">+pipe.unet.enable_forward_chunking()</span>
	<span class="hljs-addition">+frames = pipe(image, decode_chunk_size=2, generator=generator, num_frames=25).frames[0]</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-yldnwm">Including all these tricks should lower the memory requirement to less than 8GB VRAM.</p> <h3 class="relative group"><a id="microconditioning" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#microconditioning"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span data-svelte-h="svelte-1fiwj5x">Micro-conditioning</span></h3> <p data-svelte-h="svelte-zhhgyp">Along with conditioning image Stable Diffusion Video also allows providing micro-conditioning that allows more control over the generated video.
	It accepts the following arguments:</p> <ul data-svelte-h="svelte-1tco7lo"><li><code>fps</code>: The frames per second of the generated video.</li> <li><code>motion_bucket_id</code>: The motion bucket id to use for the generated video. This can be used to control the motion of the generated video. Increasing the motion bucket id will increase the motion of the generated video.</li> <li><code>noise_aug_strength</code>: The amount of noise added to the conditioning image. The higher the values the less the video will resemble the conditioning image. Increasing this value will also increase the motion of the generated video.</li></ul> <p data-svelte-h="svelte-1vov4p7">Here is an example of using micro-conditioning to generate a video with more motion.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch

	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableVideoDiffusionPipeline
	<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, export_to_video

	pipe = StableVideoDiffusionPipeline.from_pretrained(
	<span class="hljs-string">"stabilityai/stable-video-diffusion-img2vid-xt"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>
	)
	pipe.enable_model_cpu_offload()

	<span class="hljs-comment"># Load the conditioning image</span>
	image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png"</span>)
	image = image.resize((<span class="hljs-number">1024</span>, <span class="hljs-number">576</span>))

	generator = torch.manual_seed(<span class="hljs-number">42</span>)
	frames = pipe(image, decode_chunk_size=<span class="hljs-number">8</span>, generator=generator, motion_bucket_id=<span class="hljs-number">180</span>, noise_aug_strength=<span class="hljs-number">0.1</span>).frames[<span class="hljs-number">0</span>]
	export_to_video(frames, <span class="hljs-string">"generated.mp4"</span>, fps=<span class="hljs-number">7</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19qaph4"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/output_rocket_with_conditions.gif"></p>

	<script>
	{
	__sveltekit_1aw1dqo = {
	assets: "/docs/diffusers/v0.25.0/pt",
	base: "/docs/diffusers/v0.25.0/pt",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/diffusers/v0.25.0/pt/_app/immutable/entry/start.66168f11.js"),
	import("/docs/diffusers/v0.25.0/pt/_app/immutable/entry/app.ca5b0bd9.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 182],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 23.5 kB
Xet hash:: d1de690efedc72ec2590f292c405b5aa83f3ec085e1d3fcf8bf358e8f3a059af

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.