Buckets:
| import{s as Qt,o as Yt,n as Ct}from"../chunks/scheduler.8c3d61f6.js";import{S as Ft,i as xt,g as o,s as a,r as i,A as Nt,h as r,f as t,c as s,j as X,u as d,x as J,k as v,y as h,a as n,v as p,d as c,t as m,w as M,m as Et,n as Ht}from"../chunks/index.da70eac4.js";import{T as zt}from"../chunks/Tip.1d9b8c37.js";import{D as Ye}from"../chunks/Docstring.567bc132.js";import{C as _}from"../chunks/CodeBlock.a9c4becf.js";import{E as Rt}from"../chunks/ExampleCodeBlock.15b54358.js";import{H as U,E as St}from"../chunks/index.5d4ab994.js";function At(R){let u;return{c(){u=Et("You can improve the quality of the generated video by running the decoding step in full precision.")},l(w){u=Ht(w,"You can improve the quality of the generated video by running the decoding step in full precision.")},m(w,g){n(w,u,g)},d(w){w&&t(u)}}}function $t(R){let u,w="Examples:",g,f,b;return f=new _({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTBBZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEF1dG9lbmNvZGVyS0xXYW4lMkMlMjBXYW5QaXBlbGluZSUwQWZyb20lMjBkaWZmdXNlcnMuc2NoZWR1bGVycy5zY2hlZHVsaW5nX3VuaXBjX211bHRpc3RlcCUyMGltcG9ydCUyMFVuaVBDTXVsdGlzdGVwU2NoZWR1bGVyJTBBJTBBJTIzJTIwQXZhaWxhYmxlJTIwbW9kZWxzJTNBJTIwV2FuLUFJJTJGV2FuMi4xLVQyVi0xNEItRGlmZnVzZXJzJTJDJTIwV2FuLUFJJTJGV2FuMi4xLVQyVi0xLjNCLURpZmZ1c2VycyUwQW1vZGVsX2lkJTIwJTNEJTIwJTIyV2FuLUFJJTJGV2FuMi4xLVQyVi0xNEItRGlmZnVzZXJzJTIyJTBBdmFlJTIwJTNEJTIwQXV0b2VuY29kZXJLTFdhbi5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjBzdWJmb2xkZXIlM0QlMjJ2YWUlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MzIpJTBBcGlwZSUyMCUzRCUyMFdhblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZChtb2RlbF9pZCUyQyUyMHZhZSUzRHZhZSUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYpJTBBZmxvd19zaGlmdCUyMCUzRCUyMDUuMCUyMCUyMCUyMyUyMDUuMCUyMGZvciUyMDcyMFAlMkMlMjAzLjAlMjBmb3IlMjA0ODBQJTBBcGlwZS5zY2hlZHVsZXIlMjAlM0QlMjBVbmlQQ011bHRpc3RlcFNjaGVkdWxlci5mcm9tX2NvbmZpZyhwaXBlLnNjaGVkdWxlci5jb25maWclMkMlMjBmbG93X3NoaWZ0JTNEZmxvd19zaGlmdCklMEFwaXBlLnRvKCUyMmN1ZGElMjIpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyQSUyMGNhdCUyMGFuZCUyMGElMjBkb2clMjBiYWtpbmclMjBhJTIwY2FrZSUyMHRvZ2V0aGVyJTIwaW4lMjBhJTIwa2l0Y2hlbi4lMjBUaGUlMjBjYXQlMjBpcyUyMGNhcmVmdWxseSUyMG1lYXN1cmluZyUyMGZsb3VyJTJDJTIwd2hpbGUlMjB0aGUlMjBkb2clMjBpcyUyMHN0aXJyaW5nJTIwdGhlJTIwYmF0dGVyJTIwd2l0aCUyMGElMjB3b29kZW4lMjBzcG9vbi4lMjBUaGUlMjBraXRjaGVuJTIwaXMlMjBjb3p5JTJDJTIwd2l0aCUyMHN1bmxpZ2h0JTIwc3RyZWFtaW5nJTIwdGhyb3VnaCUyMHRoZSUyMHdpbmRvdy4lMjIlMEFuZWdhdGl2ZV9wcm9tcHQlMjAlM0QlMjAlMjJCcmlnaHQlMjB0b25lcyUyQyUyMG92ZXJleHBvc2VkJTJDJTIwc3RhdGljJTJDJTIwYmx1cnJlZCUyMGRldGFpbHMlMkMlMjBzdWJ0aXRsZXMlMkMlMjBzdHlsZSUyQyUyMHdvcmtzJTJDJTIwcGFpbnRpbmdzJTJDJTIwaW1hZ2VzJTJDJTIwc3RhdGljJTJDJTIwb3ZlcmFsbCUyMGdyYXklMkMlMjB3b3JzdCUyMHF1YWxpdHklMkMlMjBsb3clMjBxdWFsaXR5JTJDJTIwSlBFRyUyMGNvbXByZXNzaW9uJTIwcmVzaWR1ZSUyQyUyMHVnbHklMkMlMjBpbmNvbXBsZXRlJTJDJTIwZXh0cmElMjBmaW5nZXJzJTJDJTIwcG9vcmx5JTIwZHJhd24lMjBoYW5kcyUyQyUyMHBvb3JseSUyMGRyYXduJTIwZmFjZXMlMkMlMjBkZWZvcm1lZCUyQyUyMGRpc2ZpZ3VyZWQlMkMlMjBtaXNzaGFwZW4lMjBsaW1icyUyQyUyMGZ1c2VkJTIwZmluZ2VycyUyQyUyMHN0aWxsJTIwcGljdHVyZSUyQyUyMG1lc3N5JTIwYmFja2dyb3VuZCUyQyUyMHRocmVlJTIwbGVncyUyQyUyMG1hbnklMjBwZW9wbGUlMjBpbiUyMHRoZSUyMGJhY2tncm91bmQlMkMlMjB3YWxraW5nJTIwYmFja3dhcmRzJTIyJTBBJTBBb3V0cHV0JTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBuZWdhdGl2ZV9wcm9tcHQlM0RuZWdhdGl2ZV9wcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0Q3MjAlMkMlMEElMjAlMjAlMjAlMjB3aWR0aCUzRDEyODAlMkMlMEElMjAlMjAlMjAlMjBudW1fZnJhbWVzJTNEODElMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDUuMCUyQyUwQSkuZnJhbWVzJTVCMCU1RCUwQWV4cG9ydF90b192aWRlbyhvdXRwdXQlMkMlMjAlMjJvdXRwdXQubXA0JTIyJTJDJTIwZnBzJTNEMTYp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoencoderKLWan, WanPipeline | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.schedulers.scheduling_unipc_multistep <span class="hljs-keyword">import</span> UniPCMultistepScheduler | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Available models: Wan-AI/Wan2.1-T2V-14B-Diffusers, Wan-AI/Wan2.1-T2V-1.3B-Diffusers</span> | |
| <span class="hljs-meta">>>> </span>model_id = <span class="hljs-string">"Wan-AI/Wan2.1-T2V-14B-Diffusers"</span> | |
| <span class="hljs-meta">>>> </span>vae = AutoencoderKLWan.from_pretrained(model_id, subfolder=<span class="hljs-string">"vae"</span>, torch_dtype=torch.float32) | |
| <span class="hljs-meta">>>> </span>pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16) | |
| <span class="hljs-meta">>>> </span>flow_shift = <span class="hljs-number">5.0</span> <span class="hljs-comment"># 5.0 for 720P, 3.0 for 480P</span> | |
| <span class="hljs-meta">>>> </span>pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=flow_shift) | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"A cat and a dog baking a cake together in a kitchen. The cat is carefully measuring flour, while the dog is stirring the batter with a wooden spoon. The kitchen is cozy, with sunlight streaming through the window."</span> | |
| <span class="hljs-meta">>>> </span>negative_prompt = <span class="hljs-string">"Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"</span> | |
| <span class="hljs-meta">>>> </span>output = pipe( | |
| <span class="hljs-meta">... </span> prompt=prompt, | |
| <span class="hljs-meta">... </span> negative_prompt=negative_prompt, | |
| <span class="hljs-meta">... </span> height=<span class="hljs-number">720</span>, | |
| <span class="hljs-meta">... </span> width=<span class="hljs-number">1280</span>, | |
| <span class="hljs-meta">... </span> num_frames=<span class="hljs-number">81</span>, | |
| <span class="hljs-meta">... </span> guidance_scale=<span class="hljs-number">5.0</span>, | |
| <span class="hljs-meta">... </span>).frames[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>export_to_video(output, <span class="hljs-string">"output.mp4"</span>, fps=<span class="hljs-number">16</span>)`,wrap:!1}}),{c(){u=o("p"),u.textContent=w,g=a(),i(f.$$.fragment)},l(y){u=r(y,"P",{"data-svelte-h":!0}),J(u)!=="svelte-kvfsh7"&&(u.textContent=w),g=s(y),d(f.$$.fragment,y)},m(y,T){n(y,u,T),n(y,g,T),p(f,y,T),b=!0},p:Ct,i(y){b||(c(f.$$.fragment,y),b=!0)},o(y){m(f.$$.fragment,y),b=!1},d(y){y&&(t(u),t(g)),M(f,y)}}}function Dt(R){let u,w="Examples:",g,f,b;return f=new _({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjBudW1weSUyMGFzJTIwbnAlMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwQXV0b2VuY29kZXJLTFdhbiUyQyUyMFdhbkltYWdlVG9WaWRlb1BpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGV4cG9ydF90b192aWRlbyUyQyUyMGxvYWRfaW1hZ2UlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQ0xJUFZpc2lvbk1vZGVsJTBBJTBBJTIzJTIwQXZhaWxhYmxlJTIwbW9kZWxzJTNBJTIwV2FuLUFJJTJGV2FuMi4xLUkyVi0xNEItNDgwUC1EaWZmdXNlcnMlMkMlMjBXYW4tQUklMkZXYW4yLjEtSTJWLTE0Qi03MjBQLURpZmZ1c2VycyUwQW1vZGVsX2lkJTIwJTNEJTIwJTIyV2FuLUFJJTJGV2FuMi4xLUkyVi0xNEItNDgwUC1EaWZmdXNlcnMlMjIlMEFpbWFnZV9lbmNvZGVyJTIwJTNEJTIwQ0xJUFZpc2lvbk1vZGVsLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9pZCUyQyUyMHN1YmZvbGRlciUzRCUyMmltYWdlX2VuY29kZXIlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MzIlMEEpJTBBdmFlJTIwJTNEJTIwQXV0b2VuY29kZXJLTFdhbi5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjBzdWJmb2xkZXIlM0QlMjJ2YWUlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MzIpJTBBcGlwZSUyMCUzRCUyMFdhbkltYWdlVG9WaWRlb1BpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9pZCUyQyUyMHZhZSUzRHZhZSUyQyUyMGltYWdlX2VuY29kZXIlM0RpbWFnZV9lbmNvZGVyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUwQSklMEFwaXBlLnRvKCUyMmN1ZGElMjIpJTBBJTBBaW1hZ2UlMjAlM0QlMjBsb2FkX2ltYWdlKCUwQSUyMCUyMCUyMCUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZkYXRhc2V0cyUyRmh1Z2dpbmdmYWNlJTJGZG9jdW1lbnRhdGlvbi1pbWFnZXMlMkZyZXNvbHZlJTJGbWFpbiUyRmRpZmZ1c2VycyUyRmFzdHJvbmF1dC5qcGclMjIlMEEpJTBBbWF4X2FyZWElMjAlM0QlMjA0ODAlMjAqJTIwODMyJTBBYXNwZWN0X3JhdGlvJTIwJTNEJTIwaW1hZ2UuaGVpZ2h0JTIwJTJGJTIwaW1hZ2Uud2lkdGglMEFtb2RfdmFsdWUlMjAlM0QlMjBwaXBlLnZhZV9zY2FsZV9mYWN0b3Jfc3BhdGlhbCUyMColMjBwaXBlLnRyYW5zZm9ybWVyLmNvbmZpZy5wYXRjaF9zaXplJTVCMSU1RCUwQWhlaWdodCUyMCUzRCUyMHJvdW5kKG5wLnNxcnQobWF4X2FyZWElMjAqJTIwYXNwZWN0X3JhdGlvKSklMjAlMkYlMkYlMjBtb2RfdmFsdWUlMjAqJTIwbW9kX3ZhbHVlJTBBd2lkdGglMjAlM0QlMjByb3VuZChucC5zcXJ0KG1heF9hcmVhJTIwJTJGJTIwYXNwZWN0X3JhdGlvKSklMjAlMkYlMkYlMjBtb2RfdmFsdWUlMjAqJTIwbW9kX3ZhbHVlJTBBaW1hZ2UlMjAlM0QlMjBpbWFnZS5yZXNpemUoKHdpZHRoJTJDJTIwaGVpZ2h0KSklMEFwcm9tcHQlMjAlM0QlMjAoJTBBJTIwJTIwJTIwJTIwJTIyQW4lMjBhc3Ryb25hdXQlMjBoYXRjaGluZyUyMGZyb20lMjBhbiUyMGVnZyUyQyUyMG9uJTIwdGhlJTIwc3VyZmFjZSUyMG9mJTIwdGhlJTIwbW9vbiUyQyUyMHRoZSUyMGRhcmtuZXNzJTIwYW5kJTIwZGVwdGglMjBvZiUyMHNwYWNlJTIwcmVhbGlzZWQlMjBpbiUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMnRoZSUyMGJhY2tncm91bmQuJTIwSGlnaCUyMHF1YWxpdHklMkMlMjB1bHRyYXJlYWxpc3RpYyUyMGRldGFpbCUyMGFuZCUyMGJyZWF0aC10YWtpbmclMjBtb3ZpZS1saWtlJTIwY2FtZXJhJTIwc2hvdC4lMjIlMEEpJTBBbmVnYXRpdmVfcHJvbXB0JTIwJTNEJTIwJTIyQnJpZ2h0JTIwdG9uZXMlMkMlMjBvdmVyZXhwb3NlZCUyQyUyMHN0YXRpYyUyQyUyMGJsdXJyZWQlMjBkZXRhaWxzJTJDJTIwc3VidGl0bGVzJTJDJTIwc3R5bGUlMkMlMjB3b3JrcyUyQyUyMHBhaW50aW5ncyUyQyUyMGltYWdlcyUyQyUyMHN0YXRpYyUyQyUyMG92ZXJhbGwlMjBncmF5JTJDJTIwd29yc3QlMjBxdWFsaXR5JTJDJTIwbG93JTIwcXVhbGl0eSUyQyUyMEpQRUclMjBjb21wcmVzc2lvbiUyMHJlc2lkdWUlMkMlMjB1Z2x5JTJDJTIwaW5jb21wbGV0ZSUyQyUyMGV4dHJhJTIwZmluZ2VycyUyQyUyMHBvb3JseSUyMGRyYXduJTIwaGFuZHMlMkMlMjBwb29ybHklMjBkcmF3biUyMGZhY2VzJTJDJTIwZGVmb3JtZWQlMkMlMjBkaXNmaWd1cmVkJTJDJTIwbWlzc2hhcGVuJTIwbGltYnMlMkMlMjBmdXNlZCUyMGZpbmdlcnMlMkMlMjBzdGlsbCUyMHBpY3R1cmUlMkMlMjBtZXNzeSUyMGJhY2tncm91bmQlMkMlMjB0aHJlZSUyMGxlZ3MlMkMlMjBtYW55JTIwcGVvcGxlJTIwaW4lMjB0aGUlMjBiYWNrZ3JvdW5kJTJDJTIwd2Fsa2luZyUyMGJhY2t3YXJkcyUyMiUwQSUwQW91dHB1dCUyMCUzRCUyMHBpcGUoJTBBJTIwJTIwJTIwJTIwaW1hZ2UlM0RpbWFnZSUyQyUwQSUyMCUyMCUyMCUyMHByb21wdCUzRHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMG5lZ2F0aXZlX3Byb21wdCUzRG5lZ2F0aXZlX3Byb21wdCUyQyUwQSUyMCUyMCUyMCUyMGhlaWdodCUzRGhlaWdodCUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNEd2lkdGglMkMlMEElMjAlMjAlMjAlMjBudW1fZnJhbWVzJTNEODElMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDUuMCUyQyUwQSkuZnJhbWVzJTVCMCU1RCUwQWV4cG9ydF90b192aWRlbyhvdXRwdXQlMkMlMjAlMjJvdXRwdXQubXA0JTIyJTJDJTIwZnBzJTNEMTYp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoencoderKLWan, WanImageToVideoPipeline | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video, load_image | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> CLIPVisionModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Available models: Wan-AI/Wan2.1-I2V-14B-480P-Diffusers, Wan-AI/Wan2.1-I2V-14B-720P-Diffusers</span> | |
| <span class="hljs-meta">>>> </span>model_id = <span class="hljs-string">"Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"</span> | |
| <span class="hljs-meta">>>> </span>image_encoder = CLIPVisionModel.from_pretrained( | |
| <span class="hljs-meta">... </span> model_id, subfolder=<span class="hljs-string">"image_encoder"</span>, torch_dtype=torch.float32 | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>vae = AutoencoderKLWan.from_pretrained(model_id, subfolder=<span class="hljs-string">"vae"</span>, torch_dtype=torch.float32) | |
| <span class="hljs-meta">>>> </span>pipe = WanImageToVideoPipeline.from_pretrained( | |
| <span class="hljs-meta">... </span> model_id, vae=vae, image_encoder=image_encoder, torch_dtype=torch.bfloat16 | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-meta">>>> </span>image = load_image( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>max_area = <span class="hljs-number">480</span> * <span class="hljs-number">832</span> | |
| <span class="hljs-meta">>>> </span>aspect_ratio = image.height / image.width | |
| <span class="hljs-meta">>>> </span>mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[<span class="hljs-number">1</span>] | |
| <span class="hljs-meta">>>> </span>height = <span class="hljs-built_in">round</span>(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value | |
| <span class="hljs-meta">>>> </span>width = <span class="hljs-built_in">round</span>(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value | |
| <span class="hljs-meta">>>> </span>image = image.resize((width, height)) | |
| <span class="hljs-meta">>>> </span>prompt = ( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in "</span> | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot."</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>negative_prompt = <span class="hljs-string">"Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"</span> | |
| <span class="hljs-meta">>>> </span>output = pipe( | |
| <span class="hljs-meta">... </span> image=image, | |
| <span class="hljs-meta">... </span> prompt=prompt, | |
| <span class="hljs-meta">... </span> negative_prompt=negative_prompt, | |
| <span class="hljs-meta">... </span> height=height, | |
| <span class="hljs-meta">... </span> width=width, | |
| <span class="hljs-meta">... </span> num_frames=<span class="hljs-number">81</span>, | |
| <span class="hljs-meta">... </span> guidance_scale=<span class="hljs-number">5.0</span>, | |
| <span class="hljs-meta">... </span>).frames[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>export_to_video(output, <span class="hljs-string">"output.mp4"</span>, fps=<span class="hljs-number">16</span>)`,wrap:!1}}),{c(){u=o("p"),u.textContent=w,g=a(),i(f.$$.fragment)},l(y){u=r(y,"P",{"data-svelte-h":!0}),J(u)!=="svelte-kvfsh7"&&(u.textContent=w),g=s(y),d(f.$$.fragment,y)},m(y,T){n(y,u,T),n(y,g,T),p(f,y,T),b=!0},p:Ct,i(y){b||(c(f.$$.fragment,y),b=!0)},o(y){m(f.$$.fragment,y),b=!1},d(y){y&&(t(u),t(g)),M(f,y)}}}function Lt(R){let u,w,g,f,b,y,T,rt='<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>',Pe,N,dt='<a href="https://github.com/Wan-Video/Wan2.1" rel="nofollow">Wan 2.1</a> by the Alibaba Wan Team.',qe,E,Ke,H,pt="We will first need to install some addtional dependencies.",Oe,z,el,S,ll,A,ct=`The following example requires 11GB VRAM to run and uses the smaller <code>Wan-AI/Wan2.1-T2V-1.3B-Diffusers</code> model. You can switch it out | |
| for the larger <code>Wan2.1-I2V-14B-720P-Diffusers</code> or <code>Wan-AI/Wan2.1-I2V-14B-480P-Diffusers</code> if you have at least 35GB VRAM available.`,tl,$,nl,C,al,D,sl,L,ol,P,mt=`The Image to Video pipeline requires loading the <code>AutoencoderKLWan</code> and the <code>CLIPVisionModel</code> components in full precision. The following example will need at least | |
| 35GB of VRAM to run.`,il,q,rl,K,dl,O,pl,ee,cl,le,Mt="Base inference with the large 14B Wan 2.1 models can take up to 35GB of VRAM when generating videos at 720p resolution. We’ll outline a few memory optimizations we can apply to reduce the VRAM required to run the model.",ml,te,yt="We’ll use <code>Wan-AI/Wan2.1-I2V-14B-720P-Diffusers</code> model in these examples to demonstrate the memory savings, but the techniques are applicable to all model checkpoints.",Ml,ne,yl,ae,ut='Find more information about group offloading <a href="../optimization/memory.md">here</a>',ul,se,hl,oe,ht="We can reduce our VRAM requirements by applying group offloading to the larger model components of the pipeline; the <code>WanTransformer3DModel</code> and <code>UMT5EncoderModel</code>. Group offloading will break up the individual modules of a model and offload/onload them onto your GPU as needed during inference. In this example, we’ll apply <code>block_level</code> offloading, which will group the modules in a model into blocks of size <code>num_blocks_per_group</code> and offload/onload them to GPU. Moving to between CPU and GPU does add latency to the inference process. You can trade off between latency and memory savings by increasing or decreasing the <code>num_blocks_per_group</code>.",Jl,ie,Jt="The following example will now only require 14GB of VRAM to run, but will take approximately 30 minutes to generate a video.",fl,re,gl,de,bl,pe,ft='We can speed up group offloading inference, by enabling the use of <a href="https://pytorch.org/docs/stable/generated/torch.cuda.Stream.html" rel="nofollow">CUDA streams</a>. However, using CUDA streams requires moving the model parameters into pinned memory. This allocation is handled by Pytorch under the hood, and can result in a significant spike in CPU RAM usage. Please consider this option if your CPU RAM is atleast 2X the size of the model you are group offloading.',wl,ce,gt="In the following example we will use CUDA streams when group offloading the <code>WanTransformer3DModel</code>. When testing on an A100, this example will require 14GB of VRAM, 52GB of CPU RAM, but will generate a video in approximately 9 minutes.",Tl,me,Ul,Me,Zl,ye,bt='Find more information about layerwise casting <a href="../optimization/memory.md">here</a>',jl,ue,wt="In this example, we will model offloading with layerwise casting. Layerwise casting will downcast each layer’s weights to <code>torch.float8_e4m3fn</code>, temporarily upcast to <code>torch.bfloat16</code> during the forward pass of the layer, then revert to <code>torch.float8_e4m3fn</code> afterward. This approach reduces memory requirements by approximately 50% while introducing a minor quality reduction in the generated video due to the precision trade-off.",Wl,he,Tt="This example will require 20GB of VRAM.",_l,Je,Bl,fe,Gl,ge,Ut="Wan can be used with many different schedulers, each with their own benefits regarding speed and generation quality. By default, Wan uses the <code>UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)</code> scheduler. You can use a different scheduler as follows:",Il,be,vl,we,kl,Te,Zt=`The <code>WanTransformer3DModel</code> and <code>AutoencoderKLWan</code> models support loading checkpoints in their original format via the <code>from_single_file</code> loading | |
| method.`,Vl,Ue,Xl,Ze,Rl,je,jt='<li>Keep <code>AutencoderKLWan</code> in <code>torch.float32</code> for better decoding quality.</li> <li><code>num_frames</code> should satisfy the following constraint: <code>(num_frames - 1) % 4 == 0</code></li> <li>For smaller resolution videos, try lower values of <code>shift</code> (between <code>2.0</code> to <code>5.0</code>) in the <a href="https://huggingface.co/docs/diffusers/main/en/api/schedulers/flow_match_euler_discrete#diffusers.FlowMatchEulerDiscreteScheduler.shift" rel="nofollow">Scheduler</a>. For larger resolution videos, try higher values (between <code>7.0</code> and <code>12.0</code>). The default value is <code>3.0</code> for Wan.</li>',Cl,We,Ql,Z,_e,$l,Fe,Wt="Pipeline for text-to-video generation using Wan.",Dl,xe,_t=`This model inherits from <a href="/docs/diffusers/pr_11234/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods | |
| implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,Ll,B,Be,Pl,Ne,Bt="The call function to the pipeline for generation.",ql,Q,Kl,Y,Ge,Ol,Ee,Gt="Encodes the prompt into text encoder hidden states.",Yl,Ie,Fl,j,ve,et,He,It="Pipeline for image-to-video generation using Wan.",lt,ze,vt=`This model inherits from <a href="/docs/diffusers/pr_11234/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods | |
| implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,tt,G,ke,nt,Se,kt="The call function to the pipeline for generation.",at,F,st,x,Ve,ot,Ae,Vt="Encodes the prompt into text encoder hidden states.",xl,Xe,Nl,k,Re,it,$e,Xt="Output class for Wan pipelines.",El,Ce,Hl,Le,zl;return b=new U({props:{title:"Wan",local:"wan",headingTag:"h1"}}),E=new U({props:{title:"Generating Videos with Wan 2.1",local:"generating-videos-with-wan-21",headingTag:"h2"}}),z=new _({props:{code:"cGlwJTIwaW5zdGFsbCUyMC11JTIwZnRmeSUyMGltYWdlaW8tZmZtcGVnJTIwaW1hZ2Vpbw==",highlighted:"pip install -u ftfy imageio-ffmpeg imageio",wrap:!1}}),S=new U({props:{title:"Text to Video Generation",local:"text-to-video-generation",headingTag:"h3"}}),$=new _({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMFdhblBpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGV4cG9ydF90b192aWRlbyUwQSUwQSUyMyUyMEF2YWlsYWJsZSUyMG1vZGVscyUzQSUyMFdhbi1BSSUyRldhbjIuMS1JMlYtMTRCLTcyMFAtRGlmZnVzZXJzJTIwb3IlMjBXYW4tQUklMkZXYW4yLjEtSTJWLTE0Qi00ODBQLURpZmZ1c2VycyUwQW1vZGVsX2lkJTIwJTNEJTIwJTIyV2FuLUFJJTJGV2FuMi4xLVQyVi0xLjNCLURpZmZ1c2VycyUyMiUwQSUwQXBpcGUlMjAlM0QlMjBXYW5QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQXBpcGUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKCklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJBJTIwY2F0JTIwYW5kJTIwYSUyMGRvZyUyMGJha2luZyUyMGElMjBjYWtlJTIwdG9nZXRoZXIlMjBpbiUyMGElMjBraXRjaGVuLiUyMFRoZSUyMGNhdCUyMGlzJTIwY2FyZWZ1bGx5JTIwbWVhc3VyaW5nJTIwZmxvdXIlMkMlMjB3aGlsZSUyMHRoZSUyMGRvZyUyMGlzJTIwc3RpcnJpbmclMjB0aGUlMjBiYXR0ZXIlMjB3aXRoJTIwYSUyMHdvb2RlbiUyMHNwb29uLiUyMFRoZSUyMGtpdGNoZW4lMjBpcyUyMGNvenklMkMlMjB3aXRoJTIwc3VubGlnaHQlMjBzdHJlYW1pbmclMjB0aHJvdWdoJTIwdGhlJTIwd2luZG93LiUyMiUwQW5lZ2F0aXZlX3Byb21wdCUyMCUzRCUyMCUyMkJyaWdodCUyMHRvbmVzJTJDJTIwb3ZlcmV4cG9zZWQlMkMlMjBzdGF0aWMlMkMlMjBibHVycmVkJTIwZGV0YWlscyUyQyUyMHN1YnRpdGxlcyUyQyUyMHN0eWxlJTJDJTIwd29ya3MlMkMlMjBwYWludGluZ3MlMkMlMjBpbWFnZXMlMkMlMjBzdGF0aWMlMkMlMjBvdmVyYWxsJTIwZ3JheSUyQyUyMHdvcnN0JTIwcXVhbGl0eSUyQyUyMGxvdyUyMHF1YWxpdHklMkMlMjBKUEVHJTIwY29tcHJlc3Npb24lMjByZXNpZHVlJTJDJTIwdWdseSUyQyUyMGluY29tcGxldGUlMkMlMjBleHRyYSUyMGZpbmdlcnMlMkMlMjBwb29ybHklMjBkcmF3biUyMGhhbmRzJTJDJTIwcG9vcmx5JTIwZHJhd24lMjBmYWNlcyUyQyUyMGRlZm9ybWVkJTJDJTIwZGlzZmlndXJlZCUyQyUyMG1pc3NoYXBlbiUyMGxpbWJzJTJDJTIwZnVzZWQlMjBmaW5nZXJzJTJDJTIwc3RpbGwlMjBwaWN0dXJlJTJDJTIwbWVzc3klMjBiYWNrZ3JvdW5kJTJDJTIwdGhyZWUlMjBsZWdzJTJDJTIwbWFueSUyMHBlb3BsZSUyMGluJTIwdGhlJTIwYmFja2dyb3VuZCUyQyUyMHdhbGtpbmclMjBiYWNrd2FyZHMlMjIlMEFudW1fZnJhbWVzJTIwJTNEJTIwMzMlMEElMEFmcmFtZXMlMjAlM0QlMjBwaXBlKHByb21wdCUzRHByb21wdCUyQyUyMG5lZ2F0aXZlX3Byb21wdCUzRG5lZ2F0aXZlX3Byb21wdCUyQyUyMG51bV9mcmFtZXMlM0RudW1fZnJhbWVzKS5mcmFtZXMlNUIwJTVEJTBBZXhwb3J0X3RvX3ZpZGVvKGZyYW1lcyUyQyUyMCUyMndhbi10MnYubXA0JTIyJTJDJTIwZnBzJTNEMTYp",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> WanPipeline | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| <span class="hljs-comment"># Available models: Wan-AI/Wan2.1-I2V-14B-720P-Diffusers or Wan-AI/Wan2.1-I2V-14B-480P-Diffusers</span> | |
| model_id = <span class="hljs-string">"Wan-AI/Wan2.1-T2V-1.3B-Diffusers"</span> | |
| pipe = WanPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16) | |
| pipe.enable_model_cpu_offload() | |
| prompt = <span class="hljs-string">"A cat and a dog baking a cake together in a kitchen. The cat is carefully measuring flour, while the dog is stirring the batter with a wooden spoon. The kitchen is cozy, with sunlight streaming through the window."</span> | |
| negative_prompt = <span class="hljs-string">"Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"</span> | |
| num_frames = <span class="hljs-number">33</span> | |
| frames = pipe(prompt=prompt, negative_prompt=negative_prompt, num_frames=num_frames).frames[<span class="hljs-number">0</span>] | |
| export_to_video(frames, <span class="hljs-string">"wan-t2v.mp4"</span>, fps=<span class="hljs-number">16</span>)`,wrap:!1}}),C=new zt({props:{$$slots:{default:[At]},$$scope:{ctx:R}}}),D=new _({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMFdhblBpcGVsaW5lJTJDJTIwQXV0b2VuY29kZXJLTFdhbiUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBleHBvcnRfdG9fdmlkZW8lMEElMEFtb2RlbF9pZCUyMCUzRCUyMCUyMldhbi1BSSUyRldhbjIuMS1UMlYtMS4zQi1EaWZmdXNlcnMlMjIlMEElMEF2YWUlMjAlM0QlMjBBdXRvZW5jb2RlcktMV2FuLmZyb21fcHJldHJhaW5lZChtb2RlbF9pZCUyQyUyMHN1YmZvbGRlciUzRCUyMnZhZSUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQzMiklMEFwaXBlJTIwJTNEJTIwV2FuUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2lkJTJDJTIwdmFlJTNEdmFlJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMEElMEElMjMlMjByZXBsYWNlJTIwdGhpcyUyMHdpdGglMjBwaXBlLnRvKCUyMmN1ZGElMjIpJTIwaWYlMjB5b3UlMjBoYXZlJTIwc3VmZmljaWVudCUyMFZSQU0lMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyQSUyMGNhdCUyMGFuZCUyMGElMjBkb2clMjBiYWtpbmclMjBhJTIwY2FrZSUyMHRvZ2V0aGVyJTIwaW4lMjBhJTIwa2l0Y2hlbi4lMjBUaGUlMjBjYXQlMjBpcyUyMGNhcmVmdWxseSUyMG1lYXN1cmluZyUyMGZsb3VyJTJDJTIwd2hpbGUlMjB0aGUlMjBkb2clMjBpcyUyMHN0aXJyaW5nJTIwdGhlJTIwYmF0dGVyJTIwd2l0aCUyMGElMjB3b29kZW4lMjBzcG9vbi4lMjBUaGUlMjBraXRjaGVuJTIwaXMlMjBjb3p5JTJDJTIwd2l0aCUyMHN1bmxpZ2h0JTIwc3RyZWFtaW5nJTIwdGhyb3VnaCUyMHRoZSUyMHdpbmRvdy4lMjIlMEFuZWdhdGl2ZV9wcm9tcHQlMjAlM0QlMjAlMjJCcmlnaHQlMjB0b25lcyUyQyUyMG92ZXJleHBvc2VkJTJDJTIwc3RhdGljJTJDJTIwYmx1cnJlZCUyMGRldGFpbHMlMkMlMjBzdWJ0aXRsZXMlMkMlMjBzdHlsZSUyQyUyMHdvcmtzJTJDJTIwcGFpbnRpbmdzJTJDJTIwaW1hZ2VzJTJDJTIwc3RhdGljJTJDJTIwb3ZlcmFsbCUyMGdyYXklMkMlMjB3b3JzdCUyMHF1YWxpdHklMkMlMjBsb3clMjBxdWFsaXR5JTJDJTIwSlBFRyUyMGNvbXByZXNzaW9uJTIwcmVzaWR1ZSUyQyUyMHVnbHklMkMlMjBpbmNvbXBsZXRlJTJDJTIwZXh0cmElMjBmaW5nZXJzJTJDJTIwcG9vcmx5JTIwZHJhd24lMjBoYW5kcyUyQyUyMHBvb3JseSUyMGRyYXduJTIwZmFjZXMlMkMlMjBkZWZvcm1lZCUyQyUyMGRpc2ZpZ3VyZWQlMkMlMjBtaXNzaGFwZW4lMjBsaW1icyUyQyUyMGZ1c2VkJTIwZmluZ2VycyUyQyUyMHN0aWxsJTIwcGljdHVyZSUyQyUyMG1lc3N5JTIwYmFja2dyb3VuZCUyQyUyMHRocmVlJTIwbGVncyUyQyUyMG1hbnklMjBwZW9wbGUlMjBpbiUyMHRoZSUyMGJhY2tncm91bmQlMkMlMjB3YWxraW5nJTIwYmFja3dhcmRzJTIyJTBBbnVtX2ZyYW1lcyUyMCUzRCUyMDMzJTBBJTBBZnJhbWVzJTIwJTNEJTIwcGlwZShwcm9tcHQlM0Rwcm9tcHQlMkMlMjBudW1fZnJhbWVzJTNEbnVtX2ZyYW1lcykuZnJhbWVzJTVCMCU1RCUwQWV4cG9ydF90b192aWRlbyhmcmFtZXMlMkMlMjAlMjJ3YW4tdDJ2Lm1wNCUyMiUyQyUyMGZwcyUzRDE2KQ==",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> WanPipeline, AutoencoderKLWan | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| model_id = <span class="hljs-string">"Wan-AI/Wan2.1-T2V-1.3B-Diffusers"</span> | |
| vae = AutoencoderKLWan.from_pretrained(model_id, subfolder=<span class="hljs-string">"vae"</span>, torch_dtype=torch.float32) | |
| pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16) | |
| <span class="hljs-comment"># replace this with pipe.to("cuda") if you have sufficient VRAM</span> | |
| pipe.enable_model_cpu_offload() | |
| prompt = <span class="hljs-string">"A cat and a dog baking a cake together in a kitchen. The cat is carefully measuring flour, while the dog is stirring the batter with a wooden spoon. The kitchen is cozy, with sunlight streaming through the window."</span> | |
| negative_prompt = <span class="hljs-string">"Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"</span> | |
| num_frames = <span class="hljs-number">33</span> | |
| frames = pipe(prompt=prompt, num_frames=num_frames).frames[<span class="hljs-number">0</span>] | |
| export_to_video(frames, <span class="hljs-string">"wan-t2v.mp4"</span>, fps=<span class="hljs-number">16</span>)`,wrap:!1}}),L=new U({props:{title:"Image to Video Generation",local:"image-to-video-generation",headingTag:"h3"}}),q=new _({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjBudW1weSUyMGFzJTIwbnAlMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwQXV0b2VuY29kZXJLTFdhbiUyQyUyMFdhbkltYWdlVG9WaWRlb1BpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGV4cG9ydF90b192aWRlbyUyQyUyMGxvYWRfaW1hZ2UlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQ0xJUFZpc2lvbk1vZGVsJTBBJTBBJTIzJTIwQXZhaWxhYmxlJTIwbW9kZWxzJTNBJTIwV2FuLUFJJTJGV2FuMi4xLUkyVi0xNEItNDgwUC1EaWZmdXNlcnMlMkMlMjBXYW4tQUklMkZXYW4yLjEtSTJWLTE0Qi03MjBQLURpZmZ1c2VycyUwQW1vZGVsX2lkJTIwJTNEJTIwJTIyV2FuLUFJJTJGV2FuMi4xLUkyVi0xNEItNDgwUC1EaWZmdXNlcnMlMjIlMEFpbWFnZV9lbmNvZGVyJTIwJTNEJTIwQ0xJUFZpc2lvbk1vZGVsLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9pZCUyQyUyMHN1YmZvbGRlciUzRCUyMmltYWdlX2VuY29kZXIlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MzIlMEEpJTBBdmFlJTIwJTNEJTIwQXV0b2VuY29kZXJLTFdhbi5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjBzdWJmb2xkZXIlM0QlMjJ2YWUlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MzIpJTBBcGlwZSUyMCUzRCUyMFdhbkltYWdlVG9WaWRlb1BpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9pZCUyQyUyMHZhZSUzRHZhZSUyQyUyMGltYWdlX2VuY29kZXIlM0RpbWFnZV9lbmNvZGVyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUwQSklMEElMEElMjMlMjByZXBsYWNlJTIwdGhpcyUyMHdpdGglMjBwaXBlLnRvKCUyMmN1ZGElMjIpJTIwaWYlMjB5b3UlMjBoYXZlJTIwc3VmZmljaWVudCUyMFZSQU0lMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBJTBBaW1hZ2UlMjAlM0QlMjBsb2FkX2ltYWdlKCUwQSUyMCUyMCUyMCUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZkYXRhc2V0cyUyRmh1Z2dpbmdmYWNlJTJGZG9jdW1lbnRhdGlvbi1pbWFnZXMlMkZyZXNvbHZlJTJGbWFpbiUyRmRpZmZ1c2VycyUyRmFzdHJvbmF1dC5qcGclMjIlMEEpJTBBJTBBbWF4X2FyZWElMjAlM0QlMjA0ODAlMjAqJTIwODMyJTBBYXNwZWN0X3JhdGlvJTIwJTNEJTIwaW1hZ2UuaGVpZ2h0JTIwJTJGJTIwaW1hZ2Uud2lkdGglMEFtb2RfdmFsdWUlMjAlM0QlMjBwaXBlLnZhZV9zY2FsZV9mYWN0b3Jfc3BhdGlhbCUyMColMjBwaXBlLnRyYW5zZm9ybWVyLmNvbmZpZy5wYXRjaF9zaXplJTVCMSU1RCUwQWhlaWdodCUyMCUzRCUyMHJvdW5kKG5wLnNxcnQobWF4X2FyZWElMjAqJTIwYXNwZWN0X3JhdGlvKSklMjAlMkYlMkYlMjBtb2RfdmFsdWUlMjAqJTIwbW9kX3ZhbHVlJTBBd2lkdGglMjAlM0QlMjByb3VuZChucC5zcXJ0KG1heF9hcmVhJTIwJTJGJTIwYXNwZWN0X3JhdGlvKSklMjAlMkYlMkYlMjBtb2RfdmFsdWUlMjAqJTIwbW9kX3ZhbHVlJTBBaW1hZ2UlMjAlM0QlMjBpbWFnZS5yZXNpemUoKHdpZHRoJTJDJTIwaGVpZ2h0KSklMEElMEFwcm9tcHQlMjAlM0QlMjAoJTBBJTIwJTIwJTIwJTIwJTIyQW4lMjBhc3Ryb25hdXQlMjBoYXRjaGluZyUyMGZyb20lMjBhbiUyMGVnZyUyQyUyMG9uJTIwdGhlJTIwc3VyZmFjZSUyMG9mJTIwdGhlJTIwbW9vbiUyQyUyMHRoZSUyMGRhcmtuZXNzJTIwYW5kJTIwZGVwdGglMjBvZiUyMHNwYWNlJTIwcmVhbGlzZWQlMjBpbiUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMnRoZSUyMGJhY2tncm91bmQuJTIwSGlnaCUyMHF1YWxpdHklMkMlMjB1bHRyYXJlYWxpc3RpYyUyMGRldGFpbCUyMGFuZCUyMGJyZWF0aC10YWtpbmclMjBtb3ZpZS1saWtlJTIwY2FtZXJhJTIwc2hvdC4lMjIlMEEpJTBBbmVnYXRpdmVfcHJvbXB0JTIwJTNEJTIwJTIyQnJpZ2h0JTIwdG9uZXMlMkMlMjBvdmVyZXhwb3NlZCUyQyUyMHN0YXRpYyUyQyUyMGJsdXJyZWQlMjBkZXRhaWxzJTJDJTIwc3VidGl0bGVzJTJDJTIwc3R5bGUlMkMlMjB3b3JrcyUyQyUyMHBhaW50aW5ncyUyQyUyMGltYWdlcyUyQyUyMHN0YXRpYyUyQyUyMG92ZXJhbGwlMjBncmF5JTJDJTIwd29yc3QlMjBxdWFsaXR5JTJDJTIwbG93JTIwcXVhbGl0eSUyQyUyMEpQRUclMjBjb21wcmVzc2lvbiUyMHJlc2lkdWUlMkMlMjB1Z2x5JTJDJTIwaW5jb21wbGV0ZSUyQyUyMGV4dHJhJTIwZmluZ2VycyUyQyUyMHBvb3JseSUyMGRyYXduJTIwaGFuZHMlMkMlMjBwb29ybHklMjBkcmF3biUyMGZhY2VzJTJDJTIwZGVmb3JtZWQlMkMlMjBkaXNmaWd1cmVkJTJDJTIwbWlzc2hhcGVuJTIwbGltYnMlMkMlMjBmdXNlZCUyMGZpbmdlcnMlMkMlMjBzdGlsbCUyMHBpY3R1cmUlMkMlMjBtZXNzeSUyMGJhY2tncm91bmQlMkMlMjB0aHJlZSUyMGxlZ3MlMkMlMjBtYW55JTIwcGVvcGxlJTIwaW4lMjB0aGUlMjBiYWNrZ3JvdW5kJTJDJTIwd2Fsa2luZyUyMGJhY2t3YXJkcyUyMiUwQSUwQW51bV9mcmFtZXMlMjAlM0QlMjAzMyUwQSUwQW91dHB1dCUyMCUzRCUyMHBpcGUoJTBBJTIwJTIwJTIwJTIwaW1hZ2UlM0RpbWFnZSUyQyUwQSUyMCUyMCUyMCUyMHByb21wdCUzRHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMG5lZ2F0aXZlX3Byb21wdCUzRG5lZ2F0aXZlX3Byb21wdCUyQyUwQSUyMCUyMCUyMCUyMGhlaWdodCUzRGhlaWdodCUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNEd2lkdGglMkMlMEElMjAlMjAlMjAlMjBudW1fZnJhbWVzJTNEbnVtX2ZyYW1lcyUyQyUwQSUyMCUyMCUyMCUyMGd1aWRhbmNlX3NjYWxlJTNENS4wJTJDJTBBKS5mcmFtZXMlNUIwJTVEJTBBZXhwb3J0X3RvX3ZpZGVvKG91dHB1dCUyQyUyMCUyMndhbi1pMnYubXA0JTIyJTJDJTIwZnBzJTNEMTYp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoencoderKLWan, WanImageToVideoPipeline | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video, load_image | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> CLIPVisionModel | |
| <span class="hljs-comment"># Available models: Wan-AI/Wan2.1-I2V-14B-480P-Diffusers, Wan-AI/Wan2.1-I2V-14B-720P-Diffusers</span> | |
| model_id = <span class="hljs-string">"Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"</span> | |
| image_encoder = CLIPVisionModel.from_pretrained( | |
| model_id, subfolder=<span class="hljs-string">"image_encoder"</span>, torch_dtype=torch.float32 | |
| ) | |
| vae = AutoencoderKLWan.from_pretrained(model_id, subfolder=<span class="hljs-string">"vae"</span>, torch_dtype=torch.float32) | |
| pipe = WanImageToVideoPipeline.from_pretrained( | |
| model_id, vae=vae, image_encoder=image_encoder, torch_dtype=torch.bfloat16 | |
| ) | |
| <span class="hljs-comment"># replace this with pipe.to("cuda") if you have sufficient VRAM</span> | |
| pipe.enable_model_cpu_offload() | |
| image = load_image( | |
| <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg"</span> | |
| ) | |
| max_area = <span class="hljs-number">480</span> * <span class="hljs-number">832</span> | |
| aspect_ratio = image.height / image.width | |
| mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[<span class="hljs-number">1</span>] | |
| height = <span class="hljs-built_in">round</span>(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value | |
| width = <span class="hljs-built_in">round</span>(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value | |
| image = image.resize((width, height)) | |
| prompt = ( | |
| <span class="hljs-string">"An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in "</span> | |
| <span class="hljs-string">"the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot."</span> | |
| ) | |
| negative_prompt = <span class="hljs-string">"Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"</span> | |
| num_frames = <span class="hljs-number">33</span> | |
| output = pipe( | |
| image=image, | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| height=height, | |
| width=width, | |
| num_frames=num_frames, | |
| guidance_scale=<span class="hljs-number">5.0</span>, | |
| ).frames[<span class="hljs-number">0</span>] | |
| export_to_video(output, <span class="hljs-string">"wan-i2v.mp4"</span>, fps=<span class="hljs-number">16</span>)`,wrap:!1}}),K=new U({props:{title:"Video to Video Generation",local:"video-to-video-generation",headingTag:"h3"}}),O=new _({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwbG9hZF92aWRlbyUyQyUyMGV4cG9ydF90b192aWRlbyUwQWZyb20lMjBkaWZmdXNlcnMlMjBpbXBvcnQlMjBBdXRvZW5jb2RlcktMV2FuJTJDJTIwV2FuVmlkZW9Ub1ZpZGVvUGlwZWxpbmUlMkMlMjBVbmlQQ011bHRpc3RlcFNjaGVkdWxlciUwQSUwQSUyMyUyMEF2YWlsYWJsZSUyMG1vZGVscyUzQSUyMFdhbi1BSSUyRldhbjIuMS1UMlYtMTRCLURpZmZ1c2VycyUyQyUyMFdhbi1BSSUyRldhbjIuMS1UMlYtMS4zQi1EaWZmdXNlcnMlMEFtb2RlbF9pZCUyMCUzRCUyMCUyMldhbi1BSSUyRldhbjIuMS1UMlYtMS4zQi1EaWZmdXNlcnMlMjIlMEF2YWUlMjAlM0QlMjBBdXRvZW5jb2RlcktMV2FuLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9pZCUyQyUyMHN1YmZvbGRlciUzRCUyMnZhZSUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQzMiUwQSklMEFwaXBlJTIwJTNEJTIwV2FuVmlkZW9Ub1ZpZGVvUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTIwdmFlJTNEdmFlJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUwQSklMEFmbG93X3NoaWZ0JTIwJTNEJTIwMy4wJTIwJTIwJTIzJTIwNS4wJTIwZm9yJTIwNzIwUCUyQyUyMDMuMCUyMGZvciUyMDQ4MFAlMEFwaXBlLnNjaGVkdWxlciUyMCUzRCUyMFVuaVBDTXVsdGlzdGVwU2NoZWR1bGVyLmZyb21fY29uZmlnKCUwQSUyMCUyMCUyMCUyMHBpcGUuc2NoZWR1bGVyLmNvbmZpZyUyQyUyMGZsb3dfc2hpZnQlM0RmbG93X3NoaWZ0JTBBKSUwQSUyMyUyMGNoYW5nZSUyMHRvJTIwcGlwZS50byglMjJjdWRhJTIyKSUyMGlmJTIweW91JTIwaGF2ZSUyMHN1ZmZpY2llbnQlMjBWUkFNJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkElMjByb2JvdCUyMHN0YW5kaW5nJTIwb24lMjBhJTIwbW91bnRhaW4lMjB0b3AuJTIwVGhlJTIwc3VuJTIwaXMlMjBzZXR0aW5nJTIwaW4lMjB0aGUlMjBiYWNrZ3JvdW5kJTIyJTBBbmVnYXRpdmVfcHJvbXB0JTIwJTNEJTIwJTIyQnJpZ2h0JTIwdG9uZXMlMkMlMjBvdmVyZXhwb3NlZCUyQyUyMHN0YXRpYyUyQyUyMGJsdXJyZWQlMjBkZXRhaWxzJTJDJTIwc3VidGl0bGVzJTJDJTIwc3R5bGUlMkMlMjB3b3JrcyUyQyUyMHBhaW50aW5ncyUyQyUyMGltYWdlcyUyQyUyMHN0YXRpYyUyQyUyMG92ZXJhbGwlMjBncmF5JTJDJTIwd29yc3QlMjBxdWFsaXR5JTJDJTIwbG93JTIwcXVhbGl0eSUyQyUyMEpQRUclMjBjb21wcmVzc2lvbiUyMHJlc2lkdWUlMkMlMjB1Z2x5JTJDJTIwaW5jb21wbGV0ZSUyQyUyMGV4dHJhJTIwZmluZ2VycyUyQyUyMHBvb3JseSUyMGRyYXduJTIwaGFuZHMlMkMlMjBwb29ybHklMjBkcmF3biUyMGZhY2VzJTJDJTIwZGVmb3JtZWQlMkMlMjBkaXNmaWd1cmVkJTJDJTIwbWlzc2hhcGVuJTIwbGltYnMlMkMlMjBmdXNlZCUyMGZpbmdlcnMlMkMlMjBzdGlsbCUyMHBpY3R1cmUlMkMlMjBtZXNzeSUyMGJhY2tncm91bmQlMkMlMjB0aHJlZSUyMGxlZ3MlMkMlMjBtYW55JTIwcGVvcGxlJTIwaW4lMjB0aGUlMjBiYWNrZ3JvdW5kJTJDJTIwd2Fsa2luZyUyMGJhY2t3YXJkcyUyMiUwQXZpZGVvJTIwJTNEJTIwbG9hZF92aWRlbyglMEElMjAlMjAlMjAlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZodWdnaW5nZmFjZSUyRmRvY3VtZW50YXRpb24taW1hZ2VzJTJGcmVzb2x2ZSUyRm1haW4lMkZkaWZmdXNlcnMlMkZoaWtlci5tcDQlMjIlMEEpJTBBb3V0cHV0JTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjB2aWRlbyUzRHZpZGVvJTJDJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbmVnYXRpdmVfcHJvbXB0JTNEbmVnYXRpdmVfcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwaGVpZ2h0JTNENDgwJTJDJTBBJTIwJTIwJTIwJTIwd2lkdGglM0Q1MTIlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDcuMCUyQyUwQSUyMCUyMCUyMCUyMHN0cmVuZ3RoJTNEMC43JTJDJTBBKS5mcmFtZXMlNUIwJTVEJTBBJTBBZXhwb3J0X3RvX3ZpZGVvKG91dHB1dCUyQyUyMCUyMndhbi12MnYubXA0JTIyJTJDJTIwZnBzJTNEMTYp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_video, export_to_video | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoencoderKLWan, WanVideoToVideoPipeline, UniPCMultistepScheduler | |
| <span class="hljs-comment"># Available models: Wan-AI/Wan2.1-T2V-14B-Diffusers, Wan-AI/Wan2.1-T2V-1.3B-Diffusers</span> | |
| model_id = <span class="hljs-string">"Wan-AI/Wan2.1-T2V-1.3B-Diffusers"</span> | |
| vae = AutoencoderKLWan.from_pretrained( | |
| model_id, subfolder=<span class="hljs-string">"vae"</span>, torch_dtype=torch.float32 | |
| ) | |
| pipe = WanVideoToVideoPipeline.from_pretrained( | |
| model_id, vae=vae, torch_dtype=torch.bfloat16 | |
| ) | |
| flow_shift = <span class="hljs-number">3.0</span> <span class="hljs-comment"># 5.0 for 720P, 3.0 for 480P</span> | |
| pipe.scheduler = UniPCMultistepScheduler.from_config( | |
| pipe.scheduler.config, flow_shift=flow_shift | |
| ) | |
| <span class="hljs-comment"># change to pipe.to("cuda") if you have sufficient VRAM</span> | |
| pipe.enable_model_cpu_offload() | |
| prompt = <span class="hljs-string">"A robot standing on a mountain top. The sun is setting in the background"</span> | |
| negative_prompt = <span class="hljs-string">"Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"</span> | |
| video = load_video( | |
| <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/hiker.mp4"</span> | |
| ) | |
| output = pipe( | |
| video=video, | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| height=<span class="hljs-number">480</span>, | |
| width=<span class="hljs-number">512</span>, | |
| guidance_scale=<span class="hljs-number">7.0</span>, | |
| strength=<span class="hljs-number">0.7</span>, | |
| ).frames[<span class="hljs-number">0</span>] | |
| export_to_video(output, <span class="hljs-string">"wan-v2v.mp4"</span>, fps=<span class="hljs-number">16</span>)`,wrap:!1}}),ee=new U({props:{title:"Memory Optimizations for Wan 2.1",local:"memory-optimizations-for-wan-21",headingTag:"h2"}}),ne=new U({props:{title:"Group Offloading the Transformer and UMT5 Text Encoder",local:"group-offloading-the-transformer-and-umt5-text-encoder",headingTag:"h3"}}),se=new U({props:{title:"Block Level Group Offloading",local:"block-level-group-offloading",headingTag:"h4"}}),re=new _({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjBudW1weSUyMGFzJTIwbnAlMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwQXV0b2VuY29kZXJLTFdhbiUyQyUyMFdhblRyYW5zZm9ybWVyM0RNb2RlbCUyQyUyMFdhbkltYWdlVG9WaWRlb1BpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy5ob29rcy5ncm91cF9vZmZsb2FkaW5nJTIwaW1wb3J0JTIwYXBwbHlfZ3JvdXBfb2ZmbG9hZGluZyUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBleHBvcnRfdG9fdmlkZW8lMkMlMjBsb2FkX2ltYWdlJTBBZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFVNVDVFbmNvZGVyTW9kZWwlMkMlMjBDTElQVmlzaW9uTW9kZWwlMEElMEElMjMlMjBBdmFpbGFibGUlMjBtb2RlbHMlM0ElMjBXYW4tQUklMkZXYW4yLjEtSTJWLTE0Qi00ODBQLURpZmZ1c2VycyUyQyUyMFdhbi1BSSUyRldhbjIuMS1JMlYtMTRCLTcyMFAtRGlmZnVzZXJzJTBBbW9kZWxfaWQlMjAlM0QlMjAlMjJXYW4tQUklMkZXYW4yLjEtSTJWLTE0Qi03MjBQLURpZmZ1c2VycyUyMiUwQWltYWdlX2VuY29kZXIlMjAlM0QlMjBDTElQVmlzaW9uTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTIwc3ViZm9sZGVyJTNEJTIyaW1hZ2VfZW5jb2RlciUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQzMiUwQSklMEElMEF0ZXh0X2VuY29kZXIlMjAlM0QlMjBVTVQ1RW5jb2Rlck1vZGVsLmZyb21fcHJldHJhaW5lZChtb2RlbF9pZCUyQyUyMHN1YmZvbGRlciUzRCUyMnRleHRfZW5jb2RlciUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYpJTBBdmFlJTIwJTNEJTIwQXV0b2VuY29kZXJLTFdhbi5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjBzdWJmb2xkZXIlM0QlMjJ2YWUlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MzIpJTBBdHJhbnNmb3JtZXIlMjAlM0QlMjBXYW5UcmFuc2Zvcm1lcjNETW9kZWwuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2lkJTJDJTIwc3ViZm9sZGVyJTNEJTIydHJhbnNmb3JtZXIlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQSUwQW9ubG9hZF9kZXZpY2UlMjAlM0QlMjB0b3JjaC5kZXZpY2UoJTIyY3VkYSUyMiklMEFvZmZsb2FkX2RldmljZSUyMCUzRCUyMHRvcmNoLmRldmljZSglMjJjcHUlMjIpJTBBJTBBYXBwbHlfZ3JvdXBfb2ZmbG9hZGluZyh0ZXh0X2VuY29kZXIlMkMlMEElMjAlMjAlMjAlMjBvbmxvYWRfZGV2aWNlJTNEb25sb2FkX2RldmljZSUyQyUwQSUyMCUyMCUyMCUyMG9mZmxvYWRfZGV2aWNlJTNEb2ZmbG9hZF9kZXZpY2UlMkMlMEElMjAlMjAlMjAlMjBvZmZsb2FkX3R5cGUlM0QlMjJibG9ja19sZXZlbCUyMiUyQyUwQSUyMCUyMCUyMCUyMG51bV9ibG9ja3NfcGVyX2dyb3VwJTNENCUwQSklMEElMEF0cmFuc2Zvcm1lci5lbmFibGVfZ3JvdXBfb2ZmbG9hZCglMEElMjAlMjAlMjAlMjBvbmxvYWRfZGV2aWNlJTNEb25sb2FkX2RldmljZSUyQyUwQSUyMCUyMCUyMCUyMG9mZmxvYWRfZGV2aWNlJTNEb2ZmbG9hZF9kZXZpY2UlMkMlMEElMjAlMjAlMjAlMjBvZmZsb2FkX3R5cGUlM0QlMjJibG9ja19sZXZlbCUyMiUyQyUwQSUyMCUyMCUyMCUyMG51bV9ibG9ja3NfcGVyX2dyb3VwJTNENCUyQyUwQSklMEFwaXBlJTIwJTNEJTIwV2FuSW1hZ2VUb1ZpZGVvUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTBBJTIwJTIwJTIwJTIwdmFlJTNEdmFlJTJDJTBBJTIwJTIwJTIwJTIwdHJhbnNmb3JtZXIlM0R0cmFuc2Zvcm1lciUyQyUwQSUyMCUyMCUyMCUyMHRleHRfZW5jb2RlciUzRHRleHRfZW5jb2RlciUyQyUwQSUyMCUyMCUyMCUyMGltYWdlX2VuY29kZXIlM0RpbWFnZV9lbmNvZGVyJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUwQSklMEElMjMlMjBTaW5jZSUyMHdlJ3ZlJTIwb2ZmbG9hZGVkJTIwdGhlJTIwbGFyZ2VyJTIwbW9kZWxzJTIwYWxyYWR5JTJDJTIwd2UlMjBjYW4lMjBtb3ZlJTIwdGhlJTIwcmVzdCUyMG9mJTIwdGhlJTIwbW9kZWwlMjBjb21wb25lbnRzJTIwdG8lMjBHUFUlMEFwaXBlLnRvKCUyMmN1ZGElMjIpJTBBJTBBaW1hZ2UlMjAlM0QlMjBsb2FkX2ltYWdlKCUwQSUyMCUyMCUyMCUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZkYXRhc2V0cyUyRmh1Z2dpbmdmYWNlJTJGZG9jdW1lbnRhdGlvbi1pbWFnZXMlMkZyZXNvbHZlJTJGbWFpbiUyRmRpZmZ1c2VycyUyRmFzdHJvbmF1dC5qcGclMjIlMEEpJTBBJTBBbWF4X2FyZWElMjAlM0QlMjA3MjAlMjAqJTIwODMyJTBBYXNwZWN0X3JhdGlvJTIwJTNEJTIwaW1hZ2UuaGVpZ2h0JTIwJTJGJTIwaW1hZ2Uud2lkdGglMEFtb2RfdmFsdWUlMjAlM0QlMjBwaXBlLnZhZV9zY2FsZV9mYWN0b3Jfc3BhdGlhbCUyMColMjBwaXBlLnRyYW5zZm9ybWVyLmNvbmZpZy5wYXRjaF9zaXplJTVCMSU1RCUwQWhlaWdodCUyMCUzRCUyMHJvdW5kKG5wLnNxcnQobWF4X2FyZWElMjAqJTIwYXNwZWN0X3JhdGlvKSklMjAlMkYlMkYlMjBtb2RfdmFsdWUlMjAqJTIwbW9kX3ZhbHVlJTBBd2lkdGglMjAlM0QlMjByb3VuZChucC5zcXJ0KG1heF9hcmVhJTIwJTJGJTIwYXNwZWN0X3JhdGlvKSklMjAlMkYlMkYlMjBtb2RfdmFsdWUlMjAqJTIwbW9kX3ZhbHVlJTBBaW1hZ2UlMjAlM0QlMjBpbWFnZS5yZXNpemUoKHdpZHRoJTJDJTIwaGVpZ2h0KSklMEElMEFwcm9tcHQlMjAlM0QlMjAoJTBBJTIwJTIwJTIwJTIwJTIyQW4lMjBhc3Ryb25hdXQlMjBoYXRjaGluZyUyMGZyb20lMjBhbiUyMGVnZyUyQyUyMG9uJTIwdGhlJTIwc3VyZmFjZSUyMG9mJTIwdGhlJTIwbW9vbiUyQyUyMHRoZSUyMGRhcmtuZXNzJTIwYW5kJTIwZGVwdGglMjBvZiUyMHNwYWNlJTIwcmVhbGlzZWQlMjBpbiUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMnRoZSUyMGJhY2tncm91bmQuJTIwSGlnaCUyMHF1YWxpdHklMkMlMjB1bHRyYXJlYWxpc3RpYyUyMGRldGFpbCUyMGFuZCUyMGJyZWF0aC10YWtpbmclMjBtb3ZpZS1saWtlJTIwY2FtZXJhJTIwc2hvdC4lMjIlMEEpJTBBbmVnYXRpdmVfcHJvbXB0JTIwJTNEJTIwJTIyQnJpZ2h0JTIwdG9uZXMlMkMlMjBvdmVyZXhwb3NlZCUyQyUyMHN0YXRpYyUyQyUyMGJsdXJyZWQlMjBkZXRhaWxzJTJDJTIwc3VidGl0bGVzJTJDJTIwc3R5bGUlMkMlMjB3b3JrcyUyQyUyMHBhaW50aW5ncyUyQyUyMGltYWdlcyUyQyUyMHN0YXRpYyUyQyUyMG92ZXJhbGwlMjBncmF5JTJDJTIwd29yc3QlMjBxdWFsaXR5JTJDJTIwbG93JTIwcXVhbGl0eSUyQyUyMEpQRUclMjBjb21wcmVzc2lvbiUyMHJlc2lkdWUlMkMlMjB1Z2x5JTJDJTIwaW5jb21wbGV0ZSUyQyUyMGV4dHJhJTIwZmluZ2VycyUyQyUyMHBvb3JseSUyMGRyYXduJTIwaGFuZHMlMkMlMjBwb29ybHklMjBkcmF3biUyMGZhY2VzJTJDJTIwZGVmb3JtZWQlMkMlMjBkaXNmaWd1cmVkJTJDJTIwbWlzc2hhcGVuJTIwbGltYnMlMkMlMjBmdXNlZCUyMGZpbmdlcnMlMkMlMjBzdGlsbCUyMHBpY3R1cmUlMkMlMjBtZXNzeSUyMGJhY2tncm91bmQlMkMlMjB0aHJlZSUyMGxlZ3MlMkMlMjBtYW55JTIwcGVvcGxlJTIwaW4lMjB0aGUlMjBiYWNrZ3JvdW5kJTJDJTIwd2Fsa2luZyUyMGJhY2t3YXJkcyUyMiUwQSUwQW51bV9mcmFtZXMlMjAlM0QlMjAzMyUwQSUwQW91dHB1dCUyMCUzRCUyMHBpcGUoJTBBJTIwJTIwJTIwJTIwaW1hZ2UlM0RpbWFnZSUyQyUwQSUyMCUyMCUyMCUyMHByb21wdCUzRHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMG5lZ2F0aXZlX3Byb21wdCUzRG5lZ2F0aXZlX3Byb21wdCUyQyUwQSUyMCUyMCUyMCUyMGhlaWdodCUzRGhlaWdodCUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNEd2lkdGglMkMlMEElMjAlMjAlMjAlMjBudW1fZnJhbWVzJTNEbnVtX2ZyYW1lcyUyQyUwQSUyMCUyMCUyMCUyMGd1aWRhbmNlX3NjYWxlJTNENS4wJTJDJTBBKS5mcmFtZXMlNUIwJTVEJTBBJTBBZXhwb3J0X3RvX3ZpZGVvKG91dHB1dCUyQyUyMCUyMndhbi1pMnYubXA0JTIyJTJDJTIwZnBzJTNEMTYp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoencoderKLWan, WanTransformer3DModel, WanImageToVideoPipeline | |
| <span class="hljs-keyword">from</span> diffusers.hooks.group_offloading <span class="hljs-keyword">import</span> apply_group_offloading | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video, load_image | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> UMT5EncoderModel, CLIPVisionModel | |
| <span class="hljs-comment"># Available models: Wan-AI/Wan2.1-I2V-14B-480P-Diffusers, Wan-AI/Wan2.1-I2V-14B-720P-Diffusers</span> | |
| model_id = <span class="hljs-string">"Wan-AI/Wan2.1-I2V-14B-720P-Diffusers"</span> | |
| image_encoder = CLIPVisionModel.from_pretrained( | |
| model_id, subfolder=<span class="hljs-string">"image_encoder"</span>, torch_dtype=torch.float32 | |
| ) | |
| text_encoder = UMT5EncoderModel.from_pretrained(model_id, subfolder=<span class="hljs-string">"text_encoder"</span>, torch_dtype=torch.bfloat16) | |
| vae = AutoencoderKLWan.from_pretrained(model_id, subfolder=<span class="hljs-string">"vae"</span>, torch_dtype=torch.float32) | |
| transformer = WanTransformer3DModel.from_pretrained(model_id, subfolder=<span class="hljs-string">"transformer"</span>, torch_dtype=torch.bfloat16) | |
| onload_device = torch.device(<span class="hljs-string">"cuda"</span>) | |
| offload_device = torch.device(<span class="hljs-string">"cpu"</span>) | |
| apply_group_offloading(text_encoder, | |
| onload_device=onload_device, | |
| offload_device=offload_device, | |
| offload_type=<span class="hljs-string">"block_level"</span>, | |
| num_blocks_per_group=<span class="hljs-number">4</span> | |
| ) | |
| transformer.enable_group_offload( | |
| onload_device=onload_device, | |
| offload_device=offload_device, | |
| offload_type=<span class="hljs-string">"block_level"</span>, | |
| num_blocks_per_group=<span class="hljs-number">4</span>, | |
| ) | |
| pipe = WanImageToVideoPipeline.from_pretrained( | |
| model_id, | |
| vae=vae, | |
| transformer=transformer, | |
| text_encoder=text_encoder, | |
| image_encoder=image_encoder, | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| <span class="hljs-comment"># Since we've offloaded the larger models alrady, we can move the rest of the model components to GPU</span> | |
| pipe.to(<span class="hljs-string">"cuda"</span>) | |
| image = load_image( | |
| <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg"</span> | |
| ) | |
| max_area = <span class="hljs-number">720</span> * <span class="hljs-number">832</span> | |
| aspect_ratio = image.height / image.width | |
| mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[<span class="hljs-number">1</span>] | |
| height = <span class="hljs-built_in">round</span>(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value | |
| width = <span class="hljs-built_in">round</span>(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value | |
| image = image.resize((width, height)) | |
| prompt = ( | |
| <span class="hljs-string">"An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in "</span> | |
| <span class="hljs-string">"the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot."</span> | |
| ) | |
| negative_prompt = <span class="hljs-string">"Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"</span> | |
| num_frames = <span class="hljs-number">33</span> | |
| output = pipe( | |
| image=image, | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| height=height, | |
| width=width, | |
| num_frames=num_frames, | |
| guidance_scale=<span class="hljs-number">5.0</span>, | |
| ).frames[<span class="hljs-number">0</span>] | |
| export_to_video(output, <span class="hljs-string">"wan-i2v.mp4"</span>, fps=<span class="hljs-number">16</span>)`,wrap:!1}}),de=new U({props:{title:"Block Level Group Offloading with CUDA Streams",local:"block-level-group-offloading-with-cuda-streams",headingTag:"h4"}}),me=new _({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjBudW1weSUyMGFzJTIwbnAlMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwQXV0b2VuY29kZXJLTFdhbiUyQyUyMFdhblRyYW5zZm9ybWVyM0RNb2RlbCUyQyUyMFdhbkltYWdlVG9WaWRlb1BpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy5ob29rcy5ncm91cF9vZmZsb2FkaW5nJTIwaW1wb3J0JTIwYXBwbHlfZ3JvdXBfb2ZmbG9hZGluZyUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBleHBvcnRfdG9fdmlkZW8lMkMlMjBsb2FkX2ltYWdlJTBBZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFVNVDVFbmNvZGVyTW9kZWwlMkMlMjBDTElQVmlzaW9uTW9kZWwlMEElMEElMjMlMjBBdmFpbGFibGUlMjBtb2RlbHMlM0ElMjBXYW4tQUklMkZXYW4yLjEtSTJWLTE0Qi00ODBQLURpZmZ1c2VycyUyQyUyMFdhbi1BSSUyRldhbjIuMS1JMlYtMTRCLTcyMFAtRGlmZnVzZXJzJTBBbW9kZWxfaWQlMjAlM0QlMjAlMjJXYW4tQUklMkZXYW4yLjEtSTJWLTE0Qi03MjBQLURpZmZ1c2VycyUyMiUwQWltYWdlX2VuY29kZXIlMjAlM0QlMjBDTElQVmlzaW9uTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTIwc3ViZm9sZGVyJTNEJTIyaW1hZ2VfZW5jb2RlciUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQzMiUwQSklMEElMEF0ZXh0X2VuY29kZXIlMjAlM0QlMjBVTVQ1RW5jb2Rlck1vZGVsLmZyb21fcHJldHJhaW5lZChtb2RlbF9pZCUyQyUyMHN1YmZvbGRlciUzRCUyMnRleHRfZW5jb2RlciUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYpJTBBdmFlJTIwJTNEJTIwQXV0b2VuY29kZXJLTFdhbi5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjBzdWJmb2xkZXIlM0QlMjJ2YWUlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MzIpJTBBdHJhbnNmb3JtZXIlMjAlM0QlMjBXYW5UcmFuc2Zvcm1lcjNETW9kZWwuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2lkJTJDJTIwc3ViZm9sZGVyJTNEJTIydHJhbnNmb3JtZXIlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQSUwQW9ubG9hZF9kZXZpY2UlMjAlM0QlMjB0b3JjaC5kZXZpY2UoJTIyY3VkYSUyMiklMEFvZmZsb2FkX2RldmljZSUyMCUzRCUyMHRvcmNoLmRldmljZSglMjJjcHUlMjIpJTBBJTBBYXBwbHlfZ3JvdXBfb2ZmbG9hZGluZyh0ZXh0X2VuY29kZXIlMkMlMEElMjAlMjAlMjAlMjBvbmxvYWRfZGV2aWNlJTNEb25sb2FkX2RldmljZSUyQyUwQSUyMCUyMCUyMCUyMG9mZmxvYWRfZGV2aWNlJTNEb2ZmbG9hZF9kZXZpY2UlMkMlMEElMjAlMjAlMjAlMjBvZmZsb2FkX3R5cGUlM0QlMjJibG9ja19sZXZlbCUyMiUyQyUwQSUyMCUyMCUyMCUyMG51bV9ibG9ja3NfcGVyX2dyb3VwJTNENCUwQSklMEElMEF0cmFuc2Zvcm1lci5lbmFibGVfZ3JvdXBfb2ZmbG9hZCglMEElMjAlMjAlMjAlMjBvbmxvYWRfZGV2aWNlJTNEb25sb2FkX2RldmljZSUyQyUwQSUyMCUyMCUyMCUyMG9mZmxvYWRfZGV2aWNlJTNEb2ZmbG9hZF9kZXZpY2UlMkMlMEElMjAlMjAlMjAlMjBvZmZsb2FkX3R5cGUlM0QlMjJsZWFmX2xldmVsJTIyJTJDJTBBJTIwJTIwJTIwJTIwdXNlX3N0cmVhbSUzRFRydWUlMEEpJTBBcGlwZSUyMCUzRCUyMFdhbkltYWdlVG9WaWRlb1BpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9pZCUyQyUwQSUyMCUyMCUyMCUyMHZhZSUzRHZhZSUyQyUwQSUyMCUyMCUyMCUyMHRyYW5zZm9ybWVyJTNEdHJhbnNmb3JtZXIlMkMlMEElMjAlMjAlMjAlMjB0ZXh0X2VuY29kZXIlM0R0ZXh0X2VuY29kZXIlMkMlMEElMjAlMjAlMjAlMjBpbWFnZV9lbmNvZGVyJTNEaW1hZ2VfZW5jb2RlciUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEpJTBBJTIzJTIwU2luY2UlMjB3ZSd2ZSUyMG9mZmxvYWRlZCUyMHRoZSUyMGxhcmdlciUyMG1vZGVscyUyMGFscmFkeSUyQyUyMHdlJTIwY2FuJTIwbW92ZSUyMHRoZSUyMHJlc3QlMjBvZiUyMHRoZSUyMG1vZGVsJTIwY29tcG9uZW50cyUyMHRvJTIwR1BVJTBBcGlwZS50byglMjJjdWRhJTIyKSUwQSUwQWltYWdlJTIwJTNEJTIwbG9hZF9pbWFnZSglMEElMjAlMjAlMjAlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZodWdnaW5nZmFjZSUyRmRvY3VtZW50YXRpb24taW1hZ2VzJTJGcmVzb2x2ZSUyRm1haW4lMkZkaWZmdXNlcnMlMkZhc3Ryb25hdXQuanBnJTIyJTBBKSUwQSUwQW1heF9hcmVhJTIwJTNEJTIwNzIwJTIwKiUyMDgzMiUwQWFzcGVjdF9yYXRpbyUyMCUzRCUyMGltYWdlLmhlaWdodCUyMCUyRiUyMGltYWdlLndpZHRoJTBBbW9kX3ZhbHVlJTIwJTNEJTIwcGlwZS52YWVfc2NhbGVfZmFjdG9yX3NwYXRpYWwlMjAqJTIwcGlwZS50cmFuc2Zvcm1lci5jb25maWcucGF0Y2hfc2l6ZSU1QjElNUQlMEFoZWlnaHQlMjAlM0QlMjByb3VuZChucC5zcXJ0KG1heF9hcmVhJTIwKiUyMGFzcGVjdF9yYXRpbykpJTIwJTJGJTJGJTIwbW9kX3ZhbHVlJTIwKiUyMG1vZF92YWx1ZSUwQXdpZHRoJTIwJTNEJTIwcm91bmQobnAuc3FydChtYXhfYXJlYSUyMCUyRiUyMGFzcGVjdF9yYXRpbykpJTIwJTJGJTJGJTIwbW9kX3ZhbHVlJTIwKiUyMG1vZF92YWx1ZSUwQWltYWdlJTIwJTNEJTIwaW1hZ2UucmVzaXplKCh3aWR0aCUyQyUyMGhlaWdodCkpJTBBJTBBcHJvbXB0JTIwJTNEJTIwKCUwQSUyMCUyMCUyMCUyMCUyMkFuJTIwYXN0cm9uYXV0JTIwaGF0Y2hpbmclMjBmcm9tJTIwYW4lMjBlZ2clMkMlMjBvbiUyMHRoZSUyMHN1cmZhY2UlMjBvZiUyMHRoZSUyMG1vb24lMkMlMjB0aGUlMjBkYXJrbmVzcyUyMGFuZCUyMGRlcHRoJTIwb2YlMjBzcGFjZSUyMHJlYWxpc2VkJTIwaW4lMjAlMjIlMEElMjAlMjAlMjAlMjAlMjJ0aGUlMjBiYWNrZ3JvdW5kLiUyMEhpZ2glMjBxdWFsaXR5JTJDJTIwdWx0cmFyZWFsaXN0aWMlMjBkZXRhaWwlMjBhbmQlMjBicmVhdGgtdGFraW5nJTIwbW92aWUtbGlrZSUyMGNhbWVyYSUyMHNob3QuJTIyJTBBKSUwQW5lZ2F0aXZlX3Byb21wdCUyMCUzRCUyMCUyMkJyaWdodCUyMHRvbmVzJTJDJTIwb3ZlcmV4cG9zZWQlMkMlMjBzdGF0aWMlMkMlMjBibHVycmVkJTIwZGV0YWlscyUyQyUyMHN1YnRpdGxlcyUyQyUyMHN0eWxlJTJDJTIwd29ya3MlMkMlMjBwYWludGluZ3MlMkMlMjBpbWFnZXMlMkMlMjBzdGF0aWMlMkMlMjBvdmVyYWxsJTIwZ3JheSUyQyUyMHdvcnN0JTIwcXVhbGl0eSUyQyUyMGxvdyUyMHF1YWxpdHklMkMlMjBKUEVHJTIwY29tcHJlc3Npb24lMjByZXNpZHVlJTJDJTIwdWdseSUyQyUyMGluY29tcGxldGUlMkMlMjBleHRyYSUyMGZpbmdlcnMlMkMlMjBwb29ybHklMjBkcmF3biUyMGhhbmRzJTJDJTIwcG9vcmx5JTIwZHJhd24lMjBmYWNlcyUyQyUyMGRlZm9ybWVkJTJDJTIwZGlzZmlndXJlZCUyQyUyMG1pc3NoYXBlbiUyMGxpbWJzJTJDJTIwZnVzZWQlMjBmaW5nZXJzJTJDJTIwc3RpbGwlMjBwaWN0dXJlJTJDJTIwbWVzc3klMjBiYWNrZ3JvdW5kJTJDJTIwdGhyZWUlMjBsZWdzJTJDJTIwbWFueSUyMHBlb3BsZSUyMGluJTIwdGhlJTIwYmFja2dyb3VuZCUyQyUyMHdhbGtpbmclMjBiYWNrd2FyZHMlMjIlMEElMEFudW1fZnJhbWVzJTIwJTNEJTIwMzMlMEElMEFvdXRwdXQlMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMGltYWdlJTNEaW1hZ2UlMkMlMEElMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBuZWdhdGl2ZV9wcm9tcHQlM0RuZWdhdGl2ZV9wcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0RoZWlnaHQlMkMlMEElMjAlMjAlMjAlMjB3aWR0aCUzRHdpZHRoJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2ZyYW1lcyUzRG51bV9mcmFtZXMlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDUuMCUyQyUwQSkuZnJhbWVzJTVCMCU1RCUwQSUwQWV4cG9ydF90b192aWRlbyhvdXRwdXQlMkMlMjAlMjJ3YW4taTJ2Lm1wNCUyMiUyQyUyMGZwcyUzRDE2KQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoencoderKLWan, WanTransformer3DModel, WanImageToVideoPipeline | |
| <span class="hljs-keyword">from</span> diffusers.hooks.group_offloading <span class="hljs-keyword">import</span> apply_group_offloading | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video, load_image | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> UMT5EncoderModel, CLIPVisionModel | |
| <span class="hljs-comment"># Available models: Wan-AI/Wan2.1-I2V-14B-480P-Diffusers, Wan-AI/Wan2.1-I2V-14B-720P-Diffusers</span> | |
| model_id = <span class="hljs-string">"Wan-AI/Wan2.1-I2V-14B-720P-Diffusers"</span> | |
| image_encoder = CLIPVisionModel.from_pretrained( | |
| model_id, subfolder=<span class="hljs-string">"image_encoder"</span>, torch_dtype=torch.float32 | |
| ) | |
| text_encoder = UMT5EncoderModel.from_pretrained(model_id, subfolder=<span class="hljs-string">"text_encoder"</span>, torch_dtype=torch.bfloat16) | |
| vae = AutoencoderKLWan.from_pretrained(model_id, subfolder=<span class="hljs-string">"vae"</span>, torch_dtype=torch.float32) | |
| transformer = WanTransformer3DModel.from_pretrained(model_id, subfolder=<span class="hljs-string">"transformer"</span>, torch_dtype=torch.bfloat16) | |
| onload_device = torch.device(<span class="hljs-string">"cuda"</span>) | |
| offload_device = torch.device(<span class="hljs-string">"cpu"</span>) | |
| apply_group_offloading(text_encoder, | |
| onload_device=onload_device, | |
| offload_device=offload_device, | |
| offload_type=<span class="hljs-string">"block_level"</span>, | |
| num_blocks_per_group=<span class="hljs-number">4</span> | |
| ) | |
| transformer.enable_group_offload( | |
| onload_device=onload_device, | |
| offload_device=offload_device, | |
| offload_type=<span class="hljs-string">"leaf_level"</span>, | |
| use_stream=<span class="hljs-literal">True</span> | |
| ) | |
| pipe = WanImageToVideoPipeline.from_pretrained( | |
| model_id, | |
| vae=vae, | |
| transformer=transformer, | |
| text_encoder=text_encoder, | |
| image_encoder=image_encoder, | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| <span class="hljs-comment"># Since we've offloaded the larger models alrady, we can move the rest of the model components to GPU</span> | |
| pipe.to(<span class="hljs-string">"cuda"</span>) | |
| image = load_image( | |
| <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg"</span> | |
| ) | |
| max_area = <span class="hljs-number">720</span> * <span class="hljs-number">832</span> | |
| aspect_ratio = image.height / image.width | |
| mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[<span class="hljs-number">1</span>] | |
| height = <span class="hljs-built_in">round</span>(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value | |
| width = <span class="hljs-built_in">round</span>(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value | |
| image = image.resize((width, height)) | |
| prompt = ( | |
| <span class="hljs-string">"An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in "</span> | |
| <span class="hljs-string">"the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot."</span> | |
| ) | |
| negative_prompt = <span class="hljs-string">"Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"</span> | |
| num_frames = <span class="hljs-number">33</span> | |
| output = pipe( | |
| image=image, | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| height=height, | |
| width=width, | |
| num_frames=num_frames, | |
| guidance_scale=<span class="hljs-number">5.0</span>, | |
| ).frames[<span class="hljs-number">0</span>] | |
| export_to_video(output, <span class="hljs-string">"wan-i2v.mp4"</span>, fps=<span class="hljs-number">16</span>)`,wrap:!1}}),Me=new U({props:{title:"Applying Layerwise Casting to the Transformer",local:"applying-layerwise-casting-to-the-transformer",headingTag:"h3"}}),Je=new _({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjBudW1weSUyMGFzJTIwbnAlMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwQXV0b2VuY29kZXJLTFdhbiUyQyUyMFdhblRyYW5zZm9ybWVyM0RNb2RlbCUyQyUyMFdhbkltYWdlVG9WaWRlb1BpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy5ob29rcy5ncm91cF9vZmZsb2FkaW5nJTIwaW1wb3J0JTIwYXBwbHlfZ3JvdXBfb2ZmbG9hZGluZyUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBleHBvcnRfdG9fdmlkZW8lMkMlMjBsb2FkX2ltYWdlJTBBZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFVNVDVFbmNvZGVyTW9kZWwlMkMlMjBDTElQVmlzaW9uTW9kZWwlMEElMEFtb2RlbF9pZCUyMCUzRCUyMCUyMldhbi1BSSUyRldhbjIuMS1JMlYtMTRCLTcyMFAtRGlmZnVzZXJzJTIyJTBBaW1hZ2VfZW5jb2RlciUyMCUzRCUyMENMSVBWaXNpb25Nb2RlbC5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwbW9kZWxfaWQlMkMlMjBzdWJmb2xkZXIlM0QlMjJpbWFnZV9lbmNvZGVyJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDMyJTBBKSUwQXRleHRfZW5jb2RlciUyMCUzRCUyMFVNVDVFbmNvZGVyTW9kZWwuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2lkJTJDJTIwc3ViZm9sZGVyJTNEJTIydGV4dF9lbmNvZGVyJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMEF2YWUlMjAlM0QlMjBBdXRvZW5jb2RlcktMV2FuLmZyb21fcHJldHJhaW5lZChtb2RlbF9pZCUyQyUyMHN1YmZvbGRlciUzRCUyMnZhZSUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQzMiklMEElMEF0cmFuc2Zvcm1lciUyMCUzRCUyMFdhblRyYW5zZm9ybWVyM0RNb2RlbC5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjBzdWJmb2xkZXIlM0QlMjJ0cmFuc2Zvcm1lciUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYpJTBBdHJhbnNmb3JtZXIuZW5hYmxlX2xheWVyd2lzZV9jYXN0aW5nKHN0b3JhZ2VfZHR5cGUlM0R0b3JjaC5mbG9hdDhfZTRtM2ZuJTJDJTIwY29tcHV0ZV9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQSUwQXBpcGUlMjAlM0QlMjBXYW5JbWFnZVRvVmlkZW9QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwbW9kZWxfaWQlMkMlMEElMjAlMjAlMjAlMjB2YWUlM0R2YWUlMkMlMEElMjAlMjAlMjAlMjB0cmFuc2Zvcm1lciUzRHRyYW5zZm9ybWVyJTJDJTBBJTIwJTIwJTIwJTIwdGV4dF9lbmNvZGVyJTNEdGV4dF9lbmNvZGVyJTJDJTBBJTIwJTIwJTIwJTIwaW1hZ2VfZW5jb2RlciUzRGltYWdlX2VuY29kZXIlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQXBpcGUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKCklMEFpbWFnZSUyMCUzRCUyMGxvYWRfaW1hZ2UoJTIyaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmRhdGFzZXRzJTJGaHVnZ2luZ2ZhY2UlMkZkb2N1bWVudGF0aW9uLWltYWdlcyUyRnJlc29sdmUlMkZtYWluJTJGZGlmZnVzZXJzJTJGYXN0cm9uYXV0LmpwZyUyMiklMEElMEFtYXhfYXJlYSUyMCUzRCUyMDcyMCUyMColMjA4MzIlMEFhc3BlY3RfcmF0aW8lMjAlM0QlMjBpbWFnZS5oZWlnaHQlMjAlMkYlMjBpbWFnZS53aWR0aCUwQW1vZF92YWx1ZSUyMCUzRCUyMHBpcGUudmFlX3NjYWxlX2ZhY3Rvcl9zcGF0aWFsJTIwKiUyMHBpcGUudHJhbnNmb3JtZXIuY29uZmlnLnBhdGNoX3NpemUlNUIxJTVEJTBBaGVpZ2h0JTIwJTNEJTIwcm91bmQobnAuc3FydChtYXhfYXJlYSUyMColMjBhc3BlY3RfcmF0aW8pKSUyMCUyRiUyRiUyMG1vZF92YWx1ZSUyMColMjBtb2RfdmFsdWUlMEF3aWR0aCUyMCUzRCUyMHJvdW5kKG5wLnNxcnQobWF4X2FyZWElMjAlMkYlMjBhc3BlY3RfcmF0aW8pKSUyMCUyRiUyRiUyMG1vZF92YWx1ZSUyMColMjBtb2RfdmFsdWUlMEFpbWFnZSUyMCUzRCUyMGltYWdlLnJlc2l6ZSgod2lkdGglMkMlMjBoZWlnaHQpKSUwQXByb21wdCUyMCUzRCUyMCglMEElMjAlMjAlMjAlMjAlMjJBbiUyMGFzdHJvbmF1dCUyMGhhdGNoaW5nJTIwZnJvbSUyMGFuJTIwZWdnJTJDJTIwb24lMjB0aGUlMjBzdXJmYWNlJTIwb2YlMjB0aGUlMjBtb29uJTJDJTIwdGhlJTIwZGFya25lc3MlMjBhbmQlMjBkZXB0aCUyMG9mJTIwc3BhY2UlMjByZWFsaXNlZCUyMGluJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIydGhlJTIwYmFja2dyb3VuZC4lMjBIaWdoJTIwcXVhbGl0eSUyQyUyMHVsdHJhcmVhbGlzdGljJTIwZGV0YWlsJTIwYW5kJTIwYnJlYXRoLXRha2luZyUyMG1vdmllLWxpa2UlMjBjYW1lcmElMjBzaG90LiUyMiUwQSklMEFuZWdhdGl2ZV9wcm9tcHQlMjAlM0QlMjAlMjJCcmlnaHQlMjB0b25lcyUyQyUyMG92ZXJleHBvc2VkJTJDJTIwc3RhdGljJTJDJTIwYmx1cnJlZCUyMGRldGFpbHMlMkMlMjBzdWJ0aXRsZXMlMkMlMjBzdHlsZSUyQyUyMHdvcmtzJTJDJTIwcGFpbnRpbmdzJTJDJTIwaW1hZ2VzJTJDJTIwc3RhdGljJTJDJTIwb3ZlcmFsbCUyMGdyYXklMkMlMjB3b3JzdCUyMHF1YWxpdHklMkMlMjBsb3clMjBxdWFsaXR5JTJDJTIwSlBFRyUyMGNvbXByZXNzaW9uJTIwcmVzaWR1ZSUyQyUyMHVnbHklMkMlMjBpbmNvbXBsZXRlJTJDJTIwZXh0cmElMjBmaW5nZXJzJTJDJTIwcG9vcmx5JTIwZHJhd24lMjBoYW5kcyUyQyUyMHBvb3JseSUyMGRyYXduJTIwZmFjZXMlMkMlMjBkZWZvcm1lZCUyQyUyMGRpc2ZpZ3VyZWQlMkMlMjBtaXNzaGFwZW4lMjBsaW1icyUyQyUyMGZ1c2VkJTIwZmluZ2VycyUyQyUyMHN0aWxsJTIwcGljdHVyZSUyQyUyMG1lc3N5JTIwYmFja2dyb3VuZCUyQyUyMHRocmVlJTIwbGVncyUyQyUyMG1hbnklMjBwZW9wbGUlMjBpbiUyMHRoZSUyMGJhY2tncm91bmQlMkMlMjB3YWxraW5nJTIwYmFja3dhcmRzJTIyJTBBbnVtX2ZyYW1lcyUyMCUzRCUyMDMzJTBBJTBBb3V0cHV0JTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBpbWFnZSUzRGltYWdlJTJDJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbmVnYXRpdmVfcHJvbXB0JTNEbmVnYXRpdmVfcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwaGVpZ2h0JTNEaGVpZ2h0JTJDJTBBJTIwJTIwJTIwJTIwd2lkdGglM0R3aWR0aCUyQyUwQSUyMCUyMCUyMCUyMG51bV9mcmFtZXMlM0RudW1fZnJhbWVzJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDUwJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0Q1LjAlMkMlMEEpLmZyYW1lcyU1QjAlNUQlMEFleHBvcnRfdG9fdmlkZW8ob3V0cHV0JTJDJTIwJTIyd2FuLWkydi5tcDQlMjIlMkMlMjBmcHMlM0QxNik=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoencoderKLWan, WanTransformer3DModel, WanImageToVideoPipeline | |
| <span class="hljs-keyword">from</span> diffusers.hooks.group_offloading <span class="hljs-keyword">import</span> apply_group_offloading | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video, load_image | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> UMT5EncoderModel, CLIPVisionModel | |
| model_id = <span class="hljs-string">"Wan-AI/Wan2.1-I2V-14B-720P-Diffusers"</span> | |
| image_encoder = CLIPVisionModel.from_pretrained( | |
| model_id, subfolder=<span class="hljs-string">"image_encoder"</span>, torch_dtype=torch.float32 | |
| ) | |
| text_encoder = UMT5EncoderModel.from_pretrained(model_id, subfolder=<span class="hljs-string">"text_encoder"</span>, torch_dtype=torch.bfloat16) | |
| vae = AutoencoderKLWan.from_pretrained(model_id, subfolder=<span class="hljs-string">"vae"</span>, torch_dtype=torch.float32) | |
| transformer = WanTransformer3DModel.from_pretrained(model_id, subfolder=<span class="hljs-string">"transformer"</span>, torch_dtype=torch.bfloat16) | |
| transformer.enable_layerwise_casting(storage_dtype=torch.float8_e4m3fn, compute_dtype=torch.bfloat16) | |
| pipe = WanImageToVideoPipeline.from_pretrained( | |
| model_id, | |
| vae=vae, | |
| transformer=transformer, | |
| text_encoder=text_encoder, | |
| image_encoder=image_encoder, | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| pipe.enable_model_cpu_offload() | |
| image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg"</span>) | |
| max_area = <span class="hljs-number">720</span> * <span class="hljs-number">832</span> | |
| aspect_ratio = image.height / image.width | |
| mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[<span class="hljs-number">1</span>] | |
| height = <span class="hljs-built_in">round</span>(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value | |
| width = <span class="hljs-built_in">round</span>(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value | |
| image = image.resize((width, height)) | |
| prompt = ( | |
| <span class="hljs-string">"An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in "</span> | |
| <span class="hljs-string">"the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot."</span> | |
| ) | |
| negative_prompt = <span class="hljs-string">"Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"</span> | |
| num_frames = <span class="hljs-number">33</span> | |
| output = pipe( | |
| image=image, | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| height=height, | |
| width=width, | |
| num_frames=num_frames, | |
| num_inference_steps=<span class="hljs-number">50</span>, | |
| guidance_scale=<span class="hljs-number">5.0</span>, | |
| ).frames[<span class="hljs-number">0</span>] | |
| export_to_video(output, <span class="hljs-string">"wan-i2v.mp4"</span>, fps=<span class="hljs-number">16</span>)`,wrap:!1}}),fe=new U({props:{title:"Using a Custom Scheduler",local:"using-a-custom-scheduler",headingTag:"h2"}}),be=new _({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEZsb3dNYXRjaEV1bGVyRGlzY3JldGVTY2hlZHVsZXIlMkMlMjBVbmlQQ011bHRpc3RlcFNjaGVkdWxlciUyQyUyMFdhblBpcGVsaW5lJTBBJTBBc2NoZWR1bGVyX2ElMjAlM0QlMjBGbG93TWF0Y2hFdWxlckRpc2NyZXRlU2NoZWR1bGVyKHNoaWZ0JTNENS4wKSUwQXNjaGVkdWxlcl9iJTIwJTNEJTIwVW5pUENNdWx0aXN0ZXBTY2hlZHVsZXIocHJlZGljdGlvbl90eXBlJTNEJTIyZmxvd19wcmVkaWN0aW9uJTIyJTJDJTIwdXNlX2Zsb3dfc2lnbWFzJTNEVHJ1ZSUyQyUyMGZsb3dfc2hpZnQlM0Q0LjApJTBBJTBBcGlwZSUyMCUzRCUyMFdhblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJXYW4tQUklMkZXYW4yLjEtVDJWLTEuM0ItRGlmZnVzZXJzJTIyJTJDJTIwc2NoZWR1bGVyJTNEJTNDQ1VTVE9NX1NDSEVEVUxFUl9IRVJFJTNFKSUwQSUwQSUyMyUyMG9yJTJDJTBBcGlwZS5zY2hlZHVsZXIlMjAlM0QlMjAlM0NDVVNUT01fU0NIRURVTEVSX0hFUkUlM0U=",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FlowMatchEulerDiscreteScheduler, UniPCMultistepScheduler, WanPipeline | |
| scheduler_a = FlowMatchEulerDiscreteScheduler(shift=<span class="hljs-number">5.0</span>) | |
| scheduler_b = UniPCMultistepScheduler(prediction_type=<span class="hljs-string">"flow_prediction"</span>, use_flow_sigmas=<span class="hljs-literal">True</span>, flow_shift=<span class="hljs-number">4.0</span>) | |
| pipe = WanPipeline.from_pretrained(<span class="hljs-string">"Wan-AI/Wan2.1-T2V-1.3B-Diffusers"</span>, scheduler=<CUSTOM_SCHEDULER_HERE>) | |
| <span class="hljs-comment"># or,</span> | |
| pipe.scheduler = <CUSTOM_SCHEDULER_HERE>`,wrap:!1}}),we=new U({props:{title:"Using Single File Loading with Wan 2.1",local:"using-single-file-loading-with-wan-21",headingTag:"h2"}}),Ue=new _({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwV2FuUGlwZWxpbmUlMkMlMjBXYW5UcmFuc2Zvcm1lcjNETW9kZWwlMEElMEFja3B0X3BhdGglMjAlM0QlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGQ29tZnktT3JnJTJGV2FuXzIuMV9Db21meVVJX3JlcGFja2FnZWQlMkZibG9iJTJGbWFpbiUyRnNwbGl0X2ZpbGVzJTJGZGlmZnVzaW9uX21vZGVscyUyRndhbjIuMV90MnZfMS4zQl9iZjE2LnNhZmV0ZW5zb3JzJTIyJTBBdHJhbnNmb3JtZXIlMjAlM0QlMjBXYW5UcmFuc2Zvcm1lcjNETW9kZWwuZnJvbV9zaW5nbGVfZmlsZShja3B0X3BhdGglMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQSUwQXBpcGUlMjAlM0QlMjBXYW5QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTIyV2FuLUFJJTJGV2FuMi4xLVQyVi0xLjNCLURpZmZ1c2VycyUyMiUyQyUyMHRyYW5zZm9ybWVyJTNEdHJhbnNmb3JtZXIp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> WanPipeline, WanTransformer3DModel | |
| ckpt_path = <span class="hljs-string">"https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_t2v_1.3B_bf16.safetensors"</span> | |
| transformer = WanTransformer3DModel.from_single_file(ckpt_path, torch_dtype=torch.bfloat16) | |
| pipe = WanPipeline.from_pretrained(<span class="hljs-string">"Wan-AI/Wan2.1-T2V-1.3B-Diffusers"</span>, transformer=transformer)`,wrap:!1}}),Ze=new U({props:{title:"Recommendations for Inference",local:"recommendations-for-inference",headingTag:"h2"}}),We=new U({props:{title:"WanPipeline",local:"diffusers.WanPipeline",headingTag:"h2"}}),_e=new Ye({props:{name:"class diffusers.WanPipeline",anchor:"diffusers.WanPipeline",parameters:[{name:"tokenizer",val:": AutoTokenizer"},{name:"text_encoder",val:": UMT5EncoderModel"},{name:"transformer",val:": WanTransformer3DModel"},{name:"vae",val:": AutoencoderKLWan"},{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"}],parametersDescription:[{anchor:"diffusers.WanPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>T5Tokenizer</code>) — | |
| Tokenizer from <a href="https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5Tokenizer" rel="nofollow">T5</a>, | |
| specifically the <a href="https://huggingface.co/google/umt5-xxl" rel="nofollow">google/umt5-xxl</a> variant.`,name:"tokenizer"},{anchor:"diffusers.WanPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>T5EncoderModel</code>) — | |
| <a href="https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5EncoderModel" rel="nofollow">T5</a>, specifically | |
| the <a href="https://huggingface.co/google/umt5-xxl" rel="nofollow">google/umt5-xxl</a> variant.`,name:"text_encoder"},{anchor:"diffusers.WanPipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/pr_11234/en/api/models/wan_transformer_3d#diffusers.WanTransformer3DModel">WanTransformer3DModel</a>) — | |
| Conditional Transformer to denoise the input latents.`,name:"transformer"},{anchor:"diffusers.WanPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_11234/en/api/schedulers/unipc#diffusers.UniPCMultistepScheduler">UniPCMultistepScheduler</a>) — | |
| A scheduler to be used in combination with <code>transformer</code> to denoise the encoded image latents.`,name:"scheduler"},{anchor:"diffusers.WanPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_11234/en/api/models/autoencoder_kl_wan#diffusers.AutoencoderKLWan">AutoencoderKLWan</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode videos to and from latent representations.`,name:"vae"}],source:"https://github.com/huggingface/diffusers/blob/vr_11234/src/diffusers/pipelines/wan/pipeline_wan.py#L93"}}),Be=new Ye({props:{name:"__call__",anchor:"diffusers.WanPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"height",val:": int = 480"},{name:"width",val:": int = 832"},{name:"num_frames",val:": int = 81"},{name:"num_inference_steps",val:": int = 50"},{name:"guidance_scale",val:": float = 5.0"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'np'"},{name:"return_dict",val:": bool = True"},{name:"attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"callback_on_step_end",val:": typing.Union[typing.Callable[[int, int, typing.Dict], NoneType], diffusers.callbacks.PipelineCallback, diffusers.callbacks.MultiPipelineCallbacks, NoneType] = None"},{name:"callback_on_step_end_tensor_inputs",val:": typing.List[str] = ['latents']"},{name:"max_sequence_length",val:": int = 512"}],parametersDescription:[{anchor:"diffusers.WanPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.WanPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, defaults to <code>480</code>) — | |
| The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.WanPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, defaults to <code>832</code>) — | |
| The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.WanPipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, defaults to <code>81</code>) — | |
| The number of frames in the generated video.`,name:"num_frames"},{anchor:"diffusers.WanPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, defaults to <code>50</code>) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.WanPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, defaults to <code>5.0</code>) — | |
| Guidance scale as defined in <a href="https://arxiv.org/abs/2207.12598" rel="nofollow">Classifier-Free Diffusion Guidance</a>. | |
| <code>guidance_scale</code> is defined as <code>w</code> of equation 2. of <a href="https://arxiv.org/pdf/2205.11487.pdf" rel="nofollow">Imagen | |
| Paper</a>. Guidance scale is enabled by setting <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to the text <code>prompt</code>, | |
| usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.WanPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of images to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.WanPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"},{anchor:"diffusers.WanPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor is generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.WanPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not | |
| provided, text embeddings are generated from the <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.WanPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generated image. Choose between <code>PIL.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.WanPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>WanPipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.WanPipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.WanPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <code>PipelineCallback</code>, <code>MultiPipelineCallbacks</code>, <em>optional</em>) — | |
| A function or a subclass of <code>PipelineCallback</code> or <code>MultiPipelineCallbacks</code> that is called at the end of | |
| each denoising step during the inference. with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a | |
| list of all tensors as specified by <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.WanPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) — | |
| The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list | |
| will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the | |
| <code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.WanPipeline.__call__.autocast_dtype",description:`<strong>autocast_dtype</strong> (<code>torch.dtype</code>, <em>optional</em>, defaults to <code>torch.bfloat16</code>) — | |
| The dtype to use for the torch.amp.autocast.`,name:"autocast_dtype"}],source:"https://github.com/huggingface/diffusers/blob/vr_11234/src/diffusers/pipelines/wan/pipeline_wan.py#L359",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <code>WanPipelineOutput</code> is returned, otherwise a <code>tuple</code> is returned where | |
| the first element is a list with the generated images and the second element is a list of <code>bool</code>s | |
| indicating whether the corresponding generated image contains “not-safe-for-work” (nsfw) content.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>~WanPipelineOutput</code> or <code>tuple</code></p> | |
| `}}),Q=new Rt({props:{anchor:"diffusers.WanPipeline.__call__.example",$$slots:{default:[$t]},$$scope:{ctx:R}}}),Ge=new Ye({props:{name:"encode_prompt",anchor:"diffusers.WanPipeline.encode_prompt",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"max_sequence_length",val:": int = 226"},{name:"device",val:": typing.Optional[torch.device] = None"},{name:"dtype",val:": typing.Optional[torch.dtype] = None"}],parametersDescription:[{anchor:"diffusers.WanPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.WanPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.WanPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to use classifier free guidance or not.`,name:"do_classifier_free_guidance"},{anchor:"diffusers.WanPipeline.encode_prompt.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| Number of videos that should be generated per prompt. torch device to place the resulting embeddings on`,name:"num_videos_per_prompt"},{anchor:"diffusers.WanPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.WanPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.WanPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>, <em>optional</em>): | |
| torch device`,name:"device"},{anchor:"diffusers.WanPipeline.encode_prompt.dtype",description:`<strong>dtype</strong> — (<code>torch.dtype</code>, <em>optional</em>): | |
| torch dtype`,name:"dtype"}],source:"https://github.com/huggingface/diffusers/blob/vr_11234/src/diffusers/pipelines/wan/pipeline_wan.py#L181"}}),Ie=new U({props:{title:"WanImageToVideoPipeline",local:"diffusers.WanImageToVideoPipeline",headingTag:"h2"}}),ve=new Ye({props:{name:"class diffusers.WanImageToVideoPipeline",anchor:"diffusers.WanImageToVideoPipeline",parameters:[{name:"tokenizer",val:": AutoTokenizer"},{name:"text_encoder",val:": UMT5EncoderModel"},{name:"image_encoder",val:": CLIPVisionModel"},{name:"image_processor",val:": CLIPImageProcessor"},{name:"transformer",val:": WanTransformer3DModel"},{name:"vae",val:": AutoencoderKLWan"},{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"}],parametersDescription:[{anchor:"diffusers.WanImageToVideoPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>T5Tokenizer</code>) — | |
| Tokenizer from <a href="https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5Tokenizer" rel="nofollow">T5</a>, | |
| specifically the <a href="https://huggingface.co/google/umt5-xxl" rel="nofollow">google/umt5-xxl</a> variant.`,name:"tokenizer"},{anchor:"diffusers.WanImageToVideoPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>T5EncoderModel</code>) — | |
| <a href="https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5EncoderModel" rel="nofollow">T5</a>, specifically | |
| the <a href="https://huggingface.co/google/umt5-xxl" rel="nofollow">google/umt5-xxl</a> variant.`,name:"text_encoder"},{anchor:"diffusers.WanImageToVideoPipeline.image_encoder",description:`<strong>image_encoder</strong> (<code>CLIPVisionModel</code>) — | |
| <a href="https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPVisionModel" rel="nofollow">CLIP</a>, specifically | |
| the | |
| <a href="https://github.com/mlfoundations/open_clip/blob/main/docs/PRETRAINED.md#vit-h14-xlm-roberta-large" rel="nofollow">clip-vit-huge-patch14</a> | |
| variant.`,name:"image_encoder"},{anchor:"diffusers.WanImageToVideoPipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/pr_11234/en/api/models/wan_transformer_3d#diffusers.WanTransformer3DModel">WanTransformer3DModel</a>) — | |
| Conditional Transformer to denoise the input latents.`,name:"transformer"},{anchor:"diffusers.WanImageToVideoPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_11234/en/api/schedulers/unipc#diffusers.UniPCMultistepScheduler">UniPCMultistepScheduler</a>) — | |
| A scheduler to be used in combination with <code>transformer</code> to denoise the encoded image latents.`,name:"scheduler"},{anchor:"diffusers.WanImageToVideoPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_11234/en/api/models/autoencoder_kl_wan#diffusers.AutoencoderKLWan">AutoencoderKLWan</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode videos to and from latent representations.`,name:"vae"}],source:"https://github.com/huggingface/diffusers/blob/vr_11234/src/diffusers/pipelines/wan/pipeline_wan_i2v.py#L125"}}),ke=new Ye({props:{name:"__call__",anchor:"diffusers.WanImageToVideoPipeline.__call__",parameters:[{name:"image",val:": typing.Union[PIL.Image.Image, numpy.ndarray, torch.Tensor, typing.List[PIL.Image.Image], typing.List[numpy.ndarray], typing.List[torch.Tensor]]"},{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"height",val:": int = 480"},{name:"width",val:": int = 832"},{name:"num_frames",val:": int = 81"},{name:"num_inference_steps",val:": int = 50"},{name:"guidance_scale",val:": float = 5.0"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"image_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'np'"},{name:"return_dict",val:": bool = True"},{name:"attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"callback_on_step_end",val:": typing.Union[typing.Callable[[int, int, typing.Dict], NoneType], diffusers.callbacks.PipelineCallback, diffusers.callbacks.MultiPipelineCallbacks, NoneType] = None"},{name:"callback_on_step_end_tensor_inputs",val:": typing.List[str] = ['latents']"},{name:"max_sequence_length",val:": int = 512"}],parametersDescription:[{anchor:"diffusers.WanImageToVideoPipeline.__call__.image",description:`<strong>image</strong> (<code>PipelineImageInput</code>) — | |
| The input image to condition the generation on. Must be an image, a list of images or a <code>torch.Tensor</code>.`,name:"image"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, defaults to <code>480</code>) — | |
| The height of the generated video.`,name:"height"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, defaults to <code>832</code>) — | |
| The width of the generated video.`,name:"width"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, defaults to <code>81</code>) — | |
| The number of frames in the generated video.`,name:"num_frames"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, defaults to <code>50</code>) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, defaults to <code>5.0</code>) — | |
| Guidance scale as defined in <a href="https://arxiv.org/abs/2207.12598" rel="nofollow">Classifier-Free Diffusion Guidance</a>. | |
| <code>guidance_scale</code> is defined as <code>w</code> of equation 2. of <a href="https://arxiv.org/pdf/2205.11487.pdf" rel="nofollow">Imagen | |
| Paper</a>. Guidance scale is enabled by setting <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to the text <code>prompt</code>, | |
| usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of images to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor is generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not | |
| provided, text embeddings are generated from the <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not | |
| provided, text embeddings are generated from the <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.image_embeds",description:`<strong>image_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated image embeddings. Can be used to easily tweak image inputs (weighting). If not provided, | |
| image embeddings are generated from the <code>image</code> input argument.`,name:"image_embeds"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generated image. Choose between <code>PIL.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>WanPipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <code>PipelineCallback</code>, <code>MultiPipelineCallbacks</code>, <em>optional</em>) — | |
| A function or a subclass of <code>PipelineCallback</code> or <code>MultiPipelineCallbacks</code> that is called at the end of | |
| each denoising step during the inference. with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a | |
| list of all tensors as specified by <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) — | |
| The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list | |
| will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the | |
| <code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code>, <em>optional</em>, defaults to <code>512</code>) — | |
| The maximum sequence length of the prompt.`,name:"max_sequence_length"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.shift",description:`<strong>shift</strong> (<code>float</code>, <em>optional</em>, defaults to <code>5.0</code>) — | |
| The shift of the flow.`,name:"shift"},{anchor:"diffusers.WanImageToVideoPipeline.__call__.autocast_dtype",description:`<strong>autocast_dtype</strong> (<code>torch.dtype</code>, <em>optional</em>, defaults to <code>torch.bfloat16</code>) — | |
| The dtype to use for the torch.amp.autocast.`,name:"autocast_dtype"}],source:"https://github.com/huggingface/diffusers/blob/vr_11234/src/diffusers/pipelines/wan/pipeline_wan_i2v.py#L459",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <code>WanPipelineOutput</code> is returned, otherwise a <code>tuple</code> is returned where | |
| the first element is a list with the generated images and the second element is a list of <code>bool</code>s | |
| indicating whether the corresponding generated image contains “not-safe-for-work” (nsfw) content.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>~WanPipelineOutput</code> or <code>tuple</code></p> | |
| `}}),F=new Rt({props:{anchor:"diffusers.WanImageToVideoPipeline.__call__.example",$$slots:{default:[Dt]},$$scope:{ctx:R}}}),Ve=new Ye({props:{name:"encode_prompt",anchor:"diffusers.WanImageToVideoPipeline.encode_prompt",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"max_sequence_length",val:": int = 226"},{name:"device",val:": typing.Optional[torch.device] = None"},{name:"dtype",val:": typing.Optional[torch.dtype] = None"}],parametersDescription:[{anchor:"diffusers.WanImageToVideoPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.WanImageToVideoPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.WanImageToVideoPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to use classifier free guidance or not.`,name:"do_classifier_free_guidance"},{anchor:"diffusers.WanImageToVideoPipeline.encode_prompt.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| Number of videos that should be generated per prompt. torch device to place the resulting embeddings on`,name:"num_videos_per_prompt"},{anchor:"diffusers.WanImageToVideoPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.WanImageToVideoPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.WanImageToVideoPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>, <em>optional</em>): | |
| torch device`,name:"device"},{anchor:"diffusers.WanImageToVideoPipeline.encode_prompt.dtype",description:`<strong>dtype</strong> — (<code>torch.dtype</code>, <em>optional</em>): | |
| torch dtype`,name:"dtype"}],source:"https://github.com/huggingface/diffusers/blob/vr_11234/src/diffusers/pipelines/wan/pipeline_wan_i2v.py#L234"}}),Xe=new U({props:{title:"WanPipelineOutput",local:"diffusers.pipelines.wan.pipeline_output.WanPipelineOutput",headingTag:"h2"}}),Re=new Ye({props:{name:"class diffusers.pipelines.wan.pipeline_output.WanPipelineOutput",anchor:"diffusers.pipelines.wan.pipeline_output.WanPipelineOutput",parameters:[{name:"frames",val:": Tensor"}],parametersDescription:[{anchor:"diffusers.pipelines.wan.pipeline_output.WanPipelineOutput.frames",description:`<strong>frames</strong> (<code>torch.Tensor</code>, <code>np.ndarray</code>, or List[List[PIL.Image.Image]]) — | |
| List of video outputs - It can be a nested list of length <code>batch_size,</code> with each sub-list containing | |
| denoised PIL image sequences of length <code>num_frames.</code> It can also be a NumPy array or Torch tensor of shape | |
| <code>(batch_size, num_frames, channels, height, width)</code>.`,name:"frames"}],source:"https://github.com/huggingface/diffusers/blob/vr_11234/src/diffusers/pipelines/wan/pipeline_output.py#L8"}}),Ce=new St({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/wan.md"}}),{c(){u=o("meta"),w=a(),g=o("p"),f=a(),i(b.$$.fragment),y=a(),T=o("div"),T.innerHTML=rt,Pe=a(),N=o("p"),N.innerHTML=dt,qe=a(),i(E.$$.fragment),Ke=a(),H=o("p"),H.textContent=pt,Oe=a(),i(z.$$.fragment),el=a(),i(S.$$.fragment),ll=a(),A=o("p"),A.innerHTML=ct,tl=a(),i($.$$.fragment),nl=a(),i(C.$$.fragment),al=a(),i(D.$$.fragment),sl=a(),i(L.$$.fragment),ol=a(),P=o("p"),P.innerHTML=mt,il=a(),i(q.$$.fragment),rl=a(),i(K.$$.fragment),dl=a(),i(O.$$.fragment),pl=a(),i(ee.$$.fragment),cl=a(),le=o("p"),le.textContent=Mt,ml=a(),te=o("p"),te.innerHTML=yt,Ml=a(),i(ne.$$.fragment),yl=a(),ae=o("p"),ae.innerHTML=ut,ul=a(),i(se.$$.fragment),hl=a(),oe=o("p"),oe.innerHTML=ht,Jl=a(),ie=o("p"),ie.textContent=Jt,fl=a(),i(re.$$.fragment),gl=a(),i(de.$$.fragment),bl=a(),pe=o("p"),pe.innerHTML=ft,wl=a(),ce=o("p"),ce.innerHTML=gt,Tl=a(),i(me.$$.fragment),Ul=a(),i(Me.$$.fragment),Zl=a(),ye=o("p"),ye.innerHTML=bt,jl=a(),ue=o("p"),ue.innerHTML=wt,Wl=a(),he=o("p"),he.textContent=Tt,_l=a(),i(Je.$$.fragment),Bl=a(),i(fe.$$.fragment),Gl=a(),ge=o("p"),ge.innerHTML=Ut,Il=a(),i(be.$$.fragment),vl=a(),i(we.$$.fragment),kl=a(),Te=o("p"),Te.innerHTML=Zt,Vl=a(),i(Ue.$$.fragment),Xl=a(),i(Ze.$$.fragment),Rl=a(),je=o("ul"),je.innerHTML=jt,Cl=a(),i(We.$$.fragment),Ql=a(),Z=o("div"),i(_e.$$.fragment),$l=a(),Fe=o("p"),Fe.textContent=Wt,Dl=a(),xe=o("p"),xe.innerHTML=_t,Ll=a(),B=o("div"),i(Be.$$.fragment),Pl=a(),Ne=o("p"),Ne.textContent=Bt,ql=a(),i(Q.$$.fragment),Kl=a(),Y=o("div"),i(Ge.$$.fragment),Ol=a(),Ee=o("p"),Ee.textContent=Gt,Yl=a(),i(Ie.$$.fragment),Fl=a(),j=o("div"),i(ve.$$.fragment),et=a(),He=o("p"),He.textContent=It,lt=a(),ze=o("p"),ze.innerHTML=vt,tt=a(),G=o("div"),i(ke.$$.fragment),nt=a(),Se=o("p"),Se.textContent=kt,at=a(),i(F.$$.fragment),st=a(),x=o("div"),i(Ve.$$.fragment),ot=a(),Ae=o("p"),Ae.textContent=Vt,xl=a(),i(Xe.$$.fragment),Nl=a(),k=o("div"),i(Re.$$.fragment),it=a(),$e=o("p"),$e.textContent=Xt,El=a(),i(Ce.$$.fragment),Hl=a(),Le=o("p"),this.h()},l(e){const l=Nt("svelte-u9bgzb",document.head);u=r(l,"META",{name:!0,content:!0}),l.forEach(t),w=s(e),g=r(e,"P",{}),X(g).forEach(t),f=s(e),d(b.$$.fragment,e),y=s(e),T=r(e,"DIV",{class:!0,"data-svelte-h":!0}),J(T)!=="svelte-si9ct8"&&(T.innerHTML=rt),Pe=s(e),N=r(e,"P",{"data-svelte-h":!0}),J(N)!=="svelte-16y8q1j"&&(N.innerHTML=dt),qe=s(e),d(E.$$.fragment,e),Ke=s(e),H=r(e,"P",{"data-svelte-h":!0}),J(H)!=="svelte-e9rb6v"&&(H.textContent=pt),Oe=s(e),d(z.$$.fragment,e),el=s(e),d(S.$$.fragment,e),ll=s(e),A=r(e,"P",{"data-svelte-h":!0}),J(A)!=="svelte-ub05vh"&&(A.innerHTML=ct),tl=s(e),d($.$$.fragment,e),nl=s(e),d(C.$$.fragment,e),al=s(e),d(D.$$.fragment,e),sl=s(e),d(L.$$.fragment,e),ol=s(e),P=r(e,"P",{"data-svelte-h":!0}),J(P)!=="svelte-1wgdieo"&&(P.innerHTML=mt),il=s(e),d(q.$$.fragment,e),rl=s(e),d(K.$$.fragment,e),dl=s(e),d(O.$$.fragment,e),pl=s(e),d(ee.$$.fragment,e),cl=s(e),le=r(e,"P",{"data-svelte-h":!0}),J(le)!=="svelte-1trx1st"&&(le.textContent=Mt),ml=s(e),te=r(e,"P",{"data-svelte-h":!0}),J(te)!=="svelte-16134hm"&&(te.innerHTML=yt),Ml=s(e),d(ne.$$.fragment,e),yl=s(e),ae=r(e,"P",{"data-svelte-h":!0}),J(ae)!=="svelte-1pym4hp"&&(ae.innerHTML=ut),ul=s(e),d(se.$$.fragment,e),hl=s(e),oe=r(e,"P",{"data-svelte-h":!0}),J(oe)!=="svelte-uw02y7"&&(oe.innerHTML=ht),Jl=s(e),ie=r(e,"P",{"data-svelte-h":!0}),J(ie)!=="svelte-1sgls4v"&&(ie.textContent=Jt),fl=s(e),d(re.$$.fragment,e),gl=s(e),d(de.$$.fragment,e),bl=s(e),pe=r(e,"P",{"data-svelte-h":!0}),J(pe)!=="svelte-ued87y"&&(pe.innerHTML=ft),wl=s(e),ce=r(e,"P",{"data-svelte-h":!0}),J(ce)!=="svelte-1bpr6ms"&&(ce.innerHTML=gt),Tl=s(e),d(me.$$.fragment,e),Ul=s(e),d(Me.$$.fragment,e),Zl=s(e),ye=r(e,"P",{"data-svelte-h":!0}),J(ye)!=="svelte-15at3eb"&&(ye.innerHTML=bt),jl=s(e),ue=r(e,"P",{"data-svelte-h":!0}),J(ue)!=="svelte-3gypvb"&&(ue.innerHTML=wt),Wl=s(e),he=r(e,"P",{"data-svelte-h":!0}),J(he)!=="svelte-e0h9kl"&&(he.textContent=Tt),_l=s(e),d(Je.$$.fragment,e),Bl=s(e),d(fe.$$.fragment,e),Gl=s(e),ge=r(e,"P",{"data-svelte-h":!0}),J(ge)!=="svelte-c0om1t"&&(ge.innerHTML=Ut),Il=s(e),d(be.$$.fragment,e),vl=s(e),d(we.$$.fragment,e),kl=s(e),Te=r(e,"P",{"data-svelte-h":!0}),J(Te)!=="svelte-936le3"&&(Te.innerHTML=Zt),Vl=s(e),d(Ue.$$.fragment,e),Xl=s(e),d(Ze.$$.fragment,e),Rl=s(e),je=r(e,"UL",{"data-svelte-h":!0}),J(je)!=="svelte-c5e0rt"&&(je.innerHTML=jt),Cl=s(e),d(We.$$.fragment,e),Ql=s(e),Z=r(e,"DIV",{class:!0});var W=X(Z);d(_e.$$.fragment,W),$l=s(W),Fe=r(W,"P",{"data-svelte-h":!0}),J(Fe)!=="svelte-cppop6"&&(Fe.textContent=Wt),Dl=s(W),xe=r(W,"P",{"data-svelte-h":!0}),J(xe)!=="svelte-1k5rinp"&&(xe.innerHTML=_t),Ll=s(W),B=r(W,"DIV",{class:!0});var V=X(B);d(Be.$$.fragment,V),Pl=s(V),Ne=r(V,"P",{"data-svelte-h":!0}),J(Ne)!=="svelte-50j04k"&&(Ne.textContent=Bt),ql=s(V),d(Q.$$.fragment,V),V.forEach(t),Kl=s(W),Y=r(W,"DIV",{class:!0});var Qe=X(Y);d(Ge.$$.fragment,Qe),Ol=s(Qe),Ee=r(Qe,"P",{"data-svelte-h":!0}),J(Ee)!=="svelte-16q0ax1"&&(Ee.textContent=Gt),Qe.forEach(t),W.forEach(t),Yl=s(e),d(Ie.$$.fragment,e),Fl=s(e),j=r(e,"DIV",{class:!0});var I=X(j);d(ve.$$.fragment,I),et=s(I),He=r(I,"P",{"data-svelte-h":!0}),J(He)!=="svelte-1u5shki"&&(He.textContent=It),lt=s(I),ze=r(I,"P",{"data-svelte-h":!0}),J(ze)!=="svelte-1k5rinp"&&(ze.innerHTML=vt),tt=s(I),G=r(I,"DIV",{class:!0});var De=X(G);d(ke.$$.fragment,De),nt=s(De),Se=r(De,"P",{"data-svelte-h":!0}),J(Se)!=="svelte-50j04k"&&(Se.textContent=kt),at=s(De),d(F.$$.fragment,De),De.forEach(t),st=s(I),x=r(I,"DIV",{class:!0});var Sl=X(x);d(Ve.$$.fragment,Sl),ot=s(Sl),Ae=r(Sl,"P",{"data-svelte-h":!0}),J(Ae)!=="svelte-16q0ax1"&&(Ae.textContent=Vt),Sl.forEach(t),I.forEach(t),xl=s(e),d(Xe.$$.fragment,e),Nl=s(e),k=r(e,"DIV",{class:!0});var Al=X(k);d(Re.$$.fragment,Al),it=s(Al),$e=r(Al,"P",{"data-svelte-h":!0}),J($e)!=="svelte-fkhkmf"&&($e.textContent=Xt),Al.forEach(t),El=s(e),d(Ce.$$.fragment,e),Hl=s(e),Le=r(e,"P",{}),X(Le).forEach(t),this.h()},h(){v(u,"name","hf:doc:metadata"),v(u,"content",Pt),v(T,"class","flex flex-wrap space-x-1"),v(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(Y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(Z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(x,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(k,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,l){h(document.head,u),n(e,w,l),n(e,g,l),n(e,f,l),p(b,e,l),n(e,y,l),n(e,T,l),n(e,Pe,l),n(e,N,l),n(e,qe,l),p(E,e,l),n(e,Ke,l),n(e,H,l),n(e,Oe,l),p(z,e,l),n(e,el,l),p(S,e,l),n(e,ll,l),n(e,A,l),n(e,tl,l),p($,e,l),n(e,nl,l),p(C,e,l),n(e,al,l),p(D,e,l),n(e,sl,l),p(L,e,l),n(e,ol,l),n(e,P,l),n(e,il,l),p(q,e,l),n(e,rl,l),p(K,e,l),n(e,dl,l),p(O,e,l),n(e,pl,l),p(ee,e,l),n(e,cl,l),n(e,le,l),n(e,ml,l),n(e,te,l),n(e,Ml,l),p(ne,e,l),n(e,yl,l),n(e,ae,l),n(e,ul,l),p(se,e,l),n(e,hl,l),n(e,oe,l),n(e,Jl,l),n(e,ie,l),n(e,fl,l),p(re,e,l),n(e,gl,l),p(de,e,l),n(e,bl,l),n(e,pe,l),n(e,wl,l),n(e,ce,l),n(e,Tl,l),p(me,e,l),n(e,Ul,l),p(Me,e,l),n(e,Zl,l),n(e,ye,l),n(e,jl,l),n(e,ue,l),n(e,Wl,l),n(e,he,l),n(e,_l,l),p(Je,e,l),n(e,Bl,l),p(fe,e,l),n(e,Gl,l),n(e,ge,l),n(e,Il,l),p(be,e,l),n(e,vl,l),p(we,e,l),n(e,kl,l),n(e,Te,l),n(e,Vl,l),p(Ue,e,l),n(e,Xl,l),p(Ze,e,l),n(e,Rl,l),n(e,je,l),n(e,Cl,l),p(We,e,l),n(e,Ql,l),n(e,Z,l),p(_e,Z,null),h(Z,$l),h(Z,Fe),h(Z,Dl),h(Z,xe),h(Z,Ll),h(Z,B),p(Be,B,null),h(B,Pl),h(B,Ne),h(B,ql),p(Q,B,null),h(Z,Kl),h(Z,Y),p(Ge,Y,null),h(Y,Ol),h(Y,Ee),n(e,Yl,l),p(Ie,e,l),n(e,Fl,l),n(e,j,l),p(ve,j,null),h(j,et),h(j,He),h(j,lt),h(j,ze),h(j,tt),h(j,G),p(ke,G,null),h(G,nt),h(G,Se),h(G,at),p(F,G,null),h(j,st),h(j,x),p(Ve,x,null),h(x,ot),h(x,Ae),n(e,xl,l),p(Xe,e,l),n(e,Nl,l),n(e,k,l),p(Re,k,null),h(k,it),h(k,$e),n(e,El,l),p(Ce,e,l),n(e,Hl,l),n(e,Le,l),zl=!0},p(e,[l]){const W={};l&2&&(W.$$scope={dirty:l,ctx:e}),C.$set(W);const V={};l&2&&(V.$$scope={dirty:l,ctx:e}),Q.$set(V);const Qe={};l&2&&(Qe.$$scope={dirty:l,ctx:e}),F.$set(Qe)},i(e){zl||(c(b.$$.fragment,e),c(E.$$.fragment,e),c(z.$$.fragment,e),c(S.$$.fragment,e),c($.$$.fragment,e),c(C.$$.fragment,e),c(D.$$.fragment,e),c(L.$$.fragment,e),c(q.$$.fragment,e),c(K.$$.fragment,e),c(O.$$.fragment,e),c(ee.$$.fragment,e),c(ne.$$.fragment,e),c(se.$$.fragment,e),c(re.$$.fragment,e),c(de.$$.fragment,e),c(me.$$.fragment,e),c(Me.$$.fragment,e),c(Je.$$.fragment,e),c(fe.$$.fragment,e),c(be.$$.fragment,e),c(we.$$.fragment,e),c(Ue.$$.fragment,e),c(Ze.$$.fragment,e),c(We.$$.fragment,e),c(_e.$$.fragment,e),c(Be.$$.fragment,e),c(Q.$$.fragment,e),c(Ge.$$.fragment,e),c(Ie.$$.fragment,e),c(ve.$$.fragment,e),c(ke.$$.fragment,e),c(F.$$.fragment,e),c(Ve.$$.fragment,e),c(Xe.$$.fragment,e),c(Re.$$.fragment,e),c(Ce.$$.fragment,e),zl=!0)},o(e){m(b.$$.fragment,e),m(E.$$.fragment,e),m(z.$$.fragment,e),m(S.$$.fragment,e),m($.$$.fragment,e),m(C.$$.fragment,e),m(D.$$.fragment,e),m(L.$$.fragment,e),m(q.$$.fragment,e),m(K.$$.fragment,e),m(O.$$.fragment,e),m(ee.$$.fragment,e),m(ne.$$.fragment,e),m(se.$$.fragment,e),m(re.$$.fragment,e),m(de.$$.fragment,e),m(me.$$.fragment,e),m(Me.$$.fragment,e),m(Je.$$.fragment,e),m(fe.$$.fragment,e),m(be.$$.fragment,e),m(we.$$.fragment,e),m(Ue.$$.fragment,e),m(Ze.$$.fragment,e),m(We.$$.fragment,e),m(_e.$$.fragment,e),m(Be.$$.fragment,e),m(Q.$$.fragment,e),m(Ge.$$.fragment,e),m(Ie.$$.fragment,e),m(ve.$$.fragment,e),m(ke.$$.fragment,e),m(F.$$.fragment,e),m(Ve.$$.fragment,e),m(Xe.$$.fragment,e),m(Re.$$.fragment,e),m(Ce.$$.fragment,e),zl=!1},d(e){e&&(t(w),t(g),t(f),t(y),t(T),t(Pe),t(N),t(qe),t(Ke),t(H),t(Oe),t(el),t(ll),t(A),t(tl),t(nl),t(al),t(sl),t(ol),t(P),t(il),t(rl),t(dl),t(pl),t(cl),t(le),t(ml),t(te),t(Ml),t(yl),t(ae),t(ul),t(hl),t(oe),t(Jl),t(ie),t(fl),t(gl),t(bl),t(pe),t(wl),t(ce),t(Tl),t(Ul),t(Zl),t(ye),t(jl),t(ue),t(Wl),t(he),t(_l),t(Bl),t(Gl),t(ge),t(Il),t(vl),t(kl),t(Te),t(Vl),t(Xl),t(Rl),t(je),t(Cl),t(Ql),t(Z),t(Yl),t(Fl),t(j),t(xl),t(Nl),t(k),t(El),t(Hl),t(Le)),t(u),M(b,e),M(E,e),M(z,e),M(S,e),M($,e),M(C,e),M(D,e),M(L,e),M(q,e),M(K,e),M(O,e),M(ee,e),M(ne,e),M(se,e),M(re,e),M(de,e),M(me,e),M(Me,e),M(Je,e),M(fe,e),M(be,e),M(we,e),M(Ue,e),M(Ze,e),M(We,e),M(_e),M(Be),M(Q),M(Ge),M(Ie,e),M(ve),M(ke),M(F),M(Ve),M(Xe,e),M(Re),M(Ce,e)}}}const Pt='{"title":"Wan","local":"wan","sections":[{"title":"Generating Videos with Wan 2.1","local":"generating-videos-with-wan-21","sections":[{"title":"Text to Video Generation","local":"text-to-video-generation","sections":[],"depth":3},{"title":"Image to Video Generation","local":"image-to-video-generation","sections":[],"depth":3},{"title":"Video to Video Generation","local":"video-to-video-generation","sections":[],"depth":3}],"depth":2},{"title":"Memory Optimizations for Wan 2.1","local":"memory-optimizations-for-wan-21","sections":[{"title":"Group Offloading the Transformer and UMT5 Text Encoder","local":"group-offloading-the-transformer-and-umt5-text-encoder","sections":[{"title":"Block Level Group Offloading","local":"block-level-group-offloading","sections":[],"depth":4},{"title":"Block Level Group Offloading with CUDA Streams","local":"block-level-group-offloading-with-cuda-streams","sections":[],"depth":4}],"depth":3},{"title":"Applying Layerwise Casting to the Transformer","local":"applying-layerwise-casting-to-the-transformer","sections":[],"depth":3}],"depth":2},{"title":"Using a Custom Scheduler","local":"using-a-custom-scheduler","sections":[],"depth":2},{"title":"Using Single File Loading with Wan 2.1","local":"using-single-file-loading-with-wan-21","sections":[],"depth":2},{"title":"Recommendations for Inference","local":"recommendations-for-inference","sections":[],"depth":2},{"title":"WanPipeline","local":"diffusers.WanPipeline","sections":[],"depth":2},{"title":"WanImageToVideoPipeline","local":"diffusers.WanImageToVideoPipeline","sections":[],"depth":2},{"title":"WanPipelineOutput","local":"diffusers.pipelines.wan.pipeline_output.WanPipelineOutput","sections":[],"depth":2}],"depth":1}';function qt(R){return Yt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class sn extends Ft{constructor(u){super(),xt(this,u,qt,Lt,Qt,{})}}export{sn as component}; | |
Xet Storage Details
- Size:
- 118 kB
- Xet hash:
- 7fa2f5409c75f809727591f649ea2f39b79a60ced428f6d313faeca6cfef567f
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.