Buckets:
| import{s as Xa,o as Ca,n as fa}from"../chunks/scheduler.8c3d61f6.js";import{S as Ha,i as xa,g as p,s as t,r as c,A as Ra,h as m,f as n,c as s,j as v,u as d,x as _,k as T,y as i,a as l,v as u,d as h,t as g,w as f}from"../chunks/index.da70eac4.js";import{T as Ya}from"../chunks/Tip.1d9b8c37.js";import{D as de}from"../chunks/Docstring.9419aa1d.js";import{C as Ze}from"../chunks/CodeBlock.a9c4becf.js";import{E as Ga}from"../chunks/ExampleCodeBlock.1b2603c3.js";import{H as j,E as Na}from"../chunks/getInferenceSnippets.39110341.js";function Ea(V){let o,J='Make sure to check out the Schedulers <a href="../../using-diffusers/schedulers">guide</a> to learn how to explore the tradeoff between scheduler speed and quality, and see the <a href="../../using-diffusers/loading#reuse-a-pipeline">reuse components across pipelines</a> section to learn how to efficiently load the same components into multiple pipelines.';return{c(){o=p("p"),o.innerHTML=J},l(r){o=m(r,"P",{"data-svelte-h":!0}),_(o)!=="svelte-1qn15hi"&&(o.innerHTML=J)},m(r,Z){l(r,o,Z)},p:fa,d(r){r&&n(o)}}}function $a(V){let o,J;return o=new Ze({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwSHVueXVhblZpZGVvRnJhbWVwYWNrUGlwZWxpbmUlMkMlMjBIdW55dWFuVmlkZW9GcmFtZXBhY2tUcmFuc2Zvcm1lcjNETW9kZWwlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTJDJTIwbG9hZF9pbWFnZSUwQWZyb20lMjB0cmFuc2Zvcm1lcnMlMjBpbXBvcnQlMjBTaWdsaXBJbWFnZVByb2Nlc3NvciUyQyUyMFNpZ2xpcFZpc2lvbk1vZGVsJTBBJTBBdHJhbnNmb3JtZXIlMjAlM0QlMjBIdW55dWFuVmlkZW9GcmFtZXBhY2tUcmFuc2Zvcm1lcjNETW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmxsbHlhc3ZpZWwlMkZGcmFtZVBhY2tJMlZfSFklMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQWZlYXR1cmVfZXh0cmFjdG9yJTIwJTNEJTIwU2lnbGlwSW1hZ2VQcm9jZXNzb3IuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmxsbHlhc3ZpZWwlMkZmbHV4X3JlZHV4X2JmbCUyMiUyQyUyMHN1YmZvbGRlciUzRCUyMmZlYXR1cmVfZXh0cmFjdG9yJTIyJTBBKSUwQWltYWdlX2VuY29kZXIlMjAlM0QlMjBTaWdsaXBWaXNpb25Nb2RlbC5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIybGxseWFzdmllbCUyRmZsdXhfcmVkdXhfYmZsJTIyJTJDJTIwc3ViZm9sZGVyJTNEJTIyaW1hZ2VfZW5jb2RlciUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUwQSklMEFwaXBlJTIwJTNEJTIwSHVueXVhblZpZGVvRnJhbWVwYWNrUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmh1bnl1YW52aWRlby1jb21tdW5pdHklMkZIdW55dWFuVmlkZW8lMjIlMkMlMEElMjAlMjAlMjAlMjB0cmFuc2Zvcm1lciUzRHRyYW5zZm9ybWVyJTJDJTBBJTIwJTIwJTIwJTIwZmVhdHVyZV9leHRyYWN0b3IlM0RmZWF0dXJlX2V4dHJhY3RvciUyQyUwQSUyMCUyMCUyMCUyMGltYWdlX2VuY29kZXIlM0RpbWFnZV9lbmNvZGVyJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQXBpcGUudmFlLmVuYWJsZV90aWxpbmcoKSUwQXBpcGUudG8oJTIyY3VkYSUyMiklMEElMEFpbWFnZSUyMCUzRCUyMGxvYWRfaW1hZ2UoJTBBJTIwJTIwJTIwJTIwJTIyaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmRhdGFzZXRzJTJGaHVnZ2luZ2ZhY2UlMkZkb2N1bWVudGF0aW9uLWltYWdlcyUyRnJlc29sdmUlMkZtYWluJTJGZGlmZnVzZXJzJTJGcGVuZ3Vpbi5wbmclMjIlMEEpJTBBb3V0cHV0JTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBpbWFnZSUzRGltYWdlJTJDJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEJTIyQSUyMHBlbmd1aW4lMjBkYW5jaW5nJTIwaW4lMjB0aGUlMjBzbm93JTIyJTJDJTBBJTIwJTIwJTIwJTIwaGVpZ2h0JTNEODMyJTJDJTBBJTIwJTIwJTIwJTIwd2lkdGglM0Q0ODAlMkMlMEElMjAlMjAlMjAlMjBudW1fZnJhbWVzJTNEOTElMkMlMEElMjAlMjAlMjAlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNEMzAlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDkuMCUyQyUwQSUyMCUyMCUyMCUyMGdlbmVyYXRvciUzRHRvcmNoLkdlbmVyYXRvcigpLm1hbnVhbF9zZWVkKDApJTJDJTBBJTIwJTIwJTIwJTIwc2FtcGxpbmdfdHlwZSUzRCUyMmludmVydGVkX2FudGlfZHJpZnRpbmclMjIlMkMlMEEpLmZyYW1lcyU1QjAlNUQlMEFleHBvcnRfdG9fdmlkZW8ob3V0cHV0JTJDJTIwJTIyb3V0cHV0Lm1wNCUyMiUyQyUyMGZwcyUzRDMwKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> HunyuanVideoFramepackPipeline, HunyuanVideoFramepackTransformer3DModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video, load_image | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> SiglipImageProcessor, SiglipVisionModel | |
| <span class="hljs-meta">>>> </span>transformer = HunyuanVideoFramepackTransformer3DModel.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"lllyasviel/FramePackI2V_HY"</span>, torch_dtype=torch.bfloat16 | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>feature_extractor = SiglipImageProcessor.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"lllyasviel/flux_redux_bfl"</span>, subfolder=<span class="hljs-string">"feature_extractor"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>image_encoder = SiglipVisionModel.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"lllyasviel/flux_redux_bfl"</span>, subfolder=<span class="hljs-string">"image_encoder"</span>, torch_dtype=torch.float16 | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>pipe = HunyuanVideoFramepackPipeline.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"hunyuanvideo-community/HunyuanVideo"</span>, | |
| <span class="hljs-meta">... </span> transformer=transformer, | |
| <span class="hljs-meta">... </span> feature_extractor=feature_extractor, | |
| <span class="hljs-meta">... </span> image_encoder=image_encoder, | |
| <span class="hljs-meta">... </span> torch_dtype=torch.float16, | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>pipe.vae.enable_tiling() | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-meta">>>> </span>image = load_image( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/penguin.png"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>output = pipe( | |
| <span class="hljs-meta">... </span> image=image, | |
| <span class="hljs-meta">... </span> prompt=<span class="hljs-string">"A penguin dancing in the snow"</span>, | |
| <span class="hljs-meta">... </span> height=<span class="hljs-number">832</span>, | |
| <span class="hljs-meta">... </span> width=<span class="hljs-number">480</span>, | |
| <span class="hljs-meta">... </span> num_frames=<span class="hljs-number">91</span>, | |
| <span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">30</span>, | |
| <span class="hljs-meta">... </span> guidance_scale=<span class="hljs-number">9.0</span>, | |
| <span class="hljs-meta">... </span> generator=torch.Generator().manual_seed(<span class="hljs-number">0</span>), | |
| <span class="hljs-meta">... </span> sampling_type=<span class="hljs-string">"inverted_anti_drifting"</span>, | |
| <span class="hljs-meta">... </span>).frames[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>export_to_video(output, <span class="hljs-string">"output.mp4"</span>, fps=<span class="hljs-number">30</span>)`,wrap:!1}}),{c(){c(o.$$.fragment)},l(r){d(o.$$.fragment,r)},m(r,Z){u(o,r,Z),J=!0},p:fa,i(r){J||(h(o.$$.fragment,r),J=!0)},o(r){g(o.$$.fragment,r),J=!1},d(r){f(o,r)}}}function Qa(V){let o,J;return o=new Ze({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwSHVueXVhblZpZGVvRnJhbWVwYWNrUGlwZWxpbmUlMkMlMjBIdW55dWFuVmlkZW9GcmFtZXBhY2tUcmFuc2Zvcm1lcjNETW9kZWwlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTJDJTIwbG9hZF9pbWFnZSUwQWZyb20lMjB0cmFuc2Zvcm1lcnMlMjBpbXBvcnQlMjBTaWdsaXBJbWFnZVByb2Nlc3NvciUyQyUyMFNpZ2xpcFZpc2lvbk1vZGVsJTBBJTBBdHJhbnNmb3JtZXIlMjAlM0QlMjBIdW55dWFuVmlkZW9GcmFtZXBhY2tUcmFuc2Zvcm1lcjNETW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmxsbHlhc3ZpZWwlMkZGcmFtZVBhY2tJMlZfSFklMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQWZlYXR1cmVfZXh0cmFjdG9yJTIwJTNEJTIwU2lnbGlwSW1hZ2VQcm9jZXNzb3IuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmxsbHlhc3ZpZWwlMkZmbHV4X3JlZHV4X2JmbCUyMiUyQyUyMHN1YmZvbGRlciUzRCUyMmZlYXR1cmVfZXh0cmFjdG9yJTIyJTBBKSUwQWltYWdlX2VuY29kZXIlMjAlM0QlMjBTaWdsaXBWaXNpb25Nb2RlbC5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIybGxseWFzdmllbCUyRmZsdXhfcmVkdXhfYmZsJTIyJTJDJTIwc3ViZm9sZGVyJTNEJTIyaW1hZ2VfZW5jb2RlciUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUwQSklMEFwaXBlJTIwJTNEJTIwSHVueXVhblZpZGVvRnJhbWVwYWNrUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmh1bnl1YW52aWRlby1jb21tdW5pdHklMkZIdW55dWFuVmlkZW8lMjIlMkMlMEElMjAlMjAlMjAlMjB0cmFuc2Zvcm1lciUzRHRyYW5zZm9ybWVyJTJDJTBBJTIwJTIwJTIwJTIwZmVhdHVyZV9leHRyYWN0b3IlM0RmZWF0dXJlX2V4dHJhY3RvciUyQyUwQSUyMCUyMCUyMCUyMGltYWdlX2VuY29kZXIlM0RpbWFnZV9lbmNvZGVyJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQXBpcGUudG8oJTIyY3VkYSUyMiklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJDRyUyMGFuaW1hdGlvbiUyMHN0eWxlJTJDJTIwYSUyMHNtYWxsJTIwYmx1ZSUyMGJpcmQlMjB0YWtlcyUyMG9mZiUyMGZyb20lMjB0aGUlMjBncm91bmQlMkMlMjBmbGFwcGluZyUyMGl0cyUyMHdpbmdzLiUyMFRoZSUyMGJpcmQncyUyMGZlYXRoZXJzJTIwYXJlJTIwZGVsaWNhdGUlMkMlMjB3aXRoJTIwYSUyMHVuaXF1ZSUyMHBhdHRlcm4lMjBvbiUyMGl0cyUyMGNoZXN0LiUyMFRoZSUyMGJhY2tncm91bmQlMjBzaG93cyUyMGElMjBibHVlJTIwc2t5JTIwd2l0aCUyMHdoaXRlJTIwY2xvdWRzJTIwdW5kZXIlMjBicmlnaHQlMjBzdW5zaGluZS4lMjBUaGUlMjBjYW1lcmElMjBmb2xsb3dzJTIwdGhlJTIwYmlyZCUyMHVwd2FyZCUyQyUyMGNhcHR1cmluZyUyMGl0cyUyMGZsaWdodCUyMGFuZCUyMHRoZSUyMHZhc3RuZXNzJTIwb2YlMjB0aGUlMjBza3klMjBmcm9tJTIwYSUyMGNsb3NlLXVwJTJDJTIwbG93LWFuZ2xlJTIwcGVyc3BlY3RpdmUuJTIyJTBBZmlyc3RfaW1hZ2UlMjAlM0QlMjBsb2FkX2ltYWdlKCUwQSUyMCUyMCUyMCUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZkYXRhc2V0cyUyRmh1Z2dpbmdmYWNlJTJGZG9jdW1lbnRhdGlvbi1pbWFnZXMlMkZyZXNvbHZlJTJGbWFpbiUyRmRpZmZ1c2VycyUyRmZsZjJ2X2lucHV0X2ZpcnN0X2ZyYW1lLnBuZyUyMiUwQSklMEFsYXN0X2ltYWdlJTIwJTNEJTIwbG9hZF9pbWFnZSglMEElMjAlMjAlMjAlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZodWdnaW5nZmFjZSUyRmRvY3VtZW50YXRpb24taW1hZ2VzJTJGcmVzb2x2ZSUyRm1haW4lMkZkaWZmdXNlcnMlMkZmbGYydl9pbnB1dF9sYXN0X2ZyYW1lLnBuZyUyMiUwQSklMEFvdXRwdXQlMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMGltYWdlJTNEZmlyc3RfaW1hZ2UlMkMlMEElMjAlMjAlMjAlMjBsYXN0X2ltYWdlJTNEbGFzdF9pbWFnZSUyQyUwQSUyMCUyMCUyMCUyMHByb21wdCUzRHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMGhlaWdodCUzRDUxMiUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNENTEyJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2ZyYW1lcyUzRDkxJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDMwJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0Q5LjAlMkMlMEElMjAlMjAlMjAlMjBnZW5lcmF0b3IlM0R0b3JjaC5HZW5lcmF0b3IoKS5tYW51YWxfc2VlZCgwKSUyQyUwQSUyMCUyMCUyMCUyMHNhbXBsaW5nX3R5cGUlM0QlMjJpbnZlcnRlZF9hbnRpX2RyaWZ0aW5nJTIyJTJDJTBBKS5mcmFtZXMlNUIwJTVEJTBBZXhwb3J0X3RvX3ZpZGVvKG91dHB1dCUyQyUyMCUyMm91dHB1dC5tcDQlMjIlMkMlMjBmcHMlM0QzMCk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> HunyuanVideoFramepackPipeline, HunyuanVideoFramepackTransformer3DModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video, load_image | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> SiglipImageProcessor, SiglipVisionModel | |
| <span class="hljs-meta">>>> </span>transformer = HunyuanVideoFramepackTransformer3DModel.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"lllyasviel/FramePackI2V_HY"</span>, torch_dtype=torch.bfloat16 | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>feature_extractor = SiglipImageProcessor.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"lllyasviel/flux_redux_bfl"</span>, subfolder=<span class="hljs-string">"feature_extractor"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>image_encoder = SiglipVisionModel.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"lllyasviel/flux_redux_bfl"</span>, subfolder=<span class="hljs-string">"image_encoder"</span>, torch_dtype=torch.float16 | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>pipe = HunyuanVideoFramepackPipeline.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"hunyuanvideo-community/HunyuanVideo"</span>, | |
| <span class="hljs-meta">... </span> transformer=transformer, | |
| <span class="hljs-meta">... </span> feature_extractor=feature_extractor, | |
| <span class="hljs-meta">... </span> image_encoder=image_encoder, | |
| <span class="hljs-meta">... </span> torch_dtype=torch.float16, | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird's feathers are delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, low-angle perspective."</span> | |
| <span class="hljs-meta">>>> </span>first_image = load_image( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_first_frame.png"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>last_image = load_image( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_last_frame.png"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>output = pipe( | |
| <span class="hljs-meta">... </span> image=first_image, | |
| <span class="hljs-meta">... </span> last_image=last_image, | |
| <span class="hljs-meta">... </span> prompt=prompt, | |
| <span class="hljs-meta">... </span> height=<span class="hljs-number">512</span>, | |
| <span class="hljs-meta">... </span> width=<span class="hljs-number">512</span>, | |
| <span class="hljs-meta">... </span> num_frames=<span class="hljs-number">91</span>, | |
| <span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">30</span>, | |
| <span class="hljs-meta">... </span> guidance_scale=<span class="hljs-number">9.0</span>, | |
| <span class="hljs-meta">... </span> generator=torch.Generator().manual_seed(<span class="hljs-number">0</span>), | |
| <span class="hljs-meta">... </span> sampling_type=<span class="hljs-string">"inverted_anti_drifting"</span>, | |
| <span class="hljs-meta">... </span>).frames[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>export_to_video(output, <span class="hljs-string">"output.mp4"</span>, fps=<span class="hljs-number">30</span>)`,wrap:!1}}),{c(){c(o.$$.fragment)},l(r){d(o.$$.fragment,r)},m(r,Z){u(o,r,Z),J=!0},p:fa,i(r){J||(h(o.$$.fragment,r),J=!0)},o(r){g(o.$$.fragment,r),J=!1},d(r){f(o,r)}}}function Sa(V){let o,J,r,Z,H,Te,W,ya='<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>',je,x,Ma='<a href="https://huggingface.co/papers/2504.12626" rel="nofollow">Packing Input Frame Context in Next-Frame Prediction Models for Video Generation</a> by Lvmin Zhang and Maneesh Agrawala.',Ue,R,ba="<em>We present a neural network structure, FramePack, to train next-frame (or next-frame-section) prediction models for video generation. The FramePack compresses input frames to make the transformer context length a fixed number regardless of the video length. As a result, we are able to process a large number of frames using video diffusion with computation bottleneck similar to image diffusion. This also makes the training video batch sizes significantly higher (batch sizes become comparable to image diffusion training). We also propose an anti-drifting sampling method that generates frames in inverted temporal order with early-established endpoints to avoid exposure bias (error accumulation over iterations). Finally, we show that existing video diffusion models can be finetuned with FramePack, and their visual quality may be improved because the next-frame prediction supports more balanced diffusion schedulers with less extreme flow shift timesteps.</em>",ve,k,Ve,Y,We,N,_a='<thead><tr><th align="left">Model name</th> <th align="left">Description</th></tr></thead> <tbody><tr><td align="left">- <a href="https://huggingface.co/lllyasviel/FramePackI2V_HY" rel="nofollow"><code>lllyasviel/FramePackI2V_HY</code></a></td> <td align="left">Trained with the “inverted anti-drifting” strategy as described in the paper. Inference requires setting <code>sampling_type="inverted_anti_drifting"</code> when running the pipeline.</td></tr> <tr><td align="left">- <a href="https://huggingface.co/lllyasviel/FramePack_F1_I2V_HY_20250503" rel="nofollow"><code>lllyasviel/FramePack_F1_I2V_HY_20250503</code></a></td> <td align="left">Trained with a novel anti-drifting strategy but inference is performed in “vanilla” strategy as described in the paper. Inference requires setting <code>sampling_type="vanilla"</code> when running the pipeline.</td></tr></tbody>',ke,E,Ie,$,Ja="Refer to the pipeline documentation for basic usage examples. The following section contains examples of offloading, different sampling methods, quantization, and more.",Fe,Q,Be,S,wa="The following example shows how to use Framepack with start and end image controls, using the inverted anti-drifiting sampling model.",Ge,z,Xe,A,Ce,P,Za="The following example shows how to use Framepack with the F1 model trained with vanilla sampling but new regulation approach for anti-drifting.",He,q,xe,L,Re,D,Ta='Group offloading (<a href="/docs/diffusers/pr_11340/en/api/utilities#diffusers.hooks.apply_group_offloading">apply_group_offloading()</a>) provides aggressive memory optimizations for offloading internal parts of any model to the CPU, with possibly no additional overhead to generation time. If you have very low VRAM available, this approach may be suitable for you depending on the amount of CPU RAM available.',Ye,K,Ne,O,Ee,y,ee,Ke,ue,ja="Pipeline for text-to-video generation using HunyuanVideo.",Oe,he,Ua=`This model inherits from <a href="/docs/diffusers/pr_11340/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods | |
| implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,ea,b,ae,aa,ge,va="The call function to the pipeline for generation.",na,fe,Va="Examples:",ta,ne,sa,I,la,te,oa,F,ia,B,se,ra,ye,Wa=`Disable sliced VAE decoding. If <code>enable_vae_slicing</code> was previously enabled, this method will go back to | |
| computing decoding in one step.`,pa,G,le,ma,Me,ka=`Disable tiled VAE decoding. If <code>enable_vae_tiling</code> was previously enabled, this method will go back to | |
| computing decoding in one step.`,ca,X,oe,da,be,Ia=`Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to | |
| compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.`,ua,C,ie,ha,_e,Fa=`Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to | |
| compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow | |
| processing larger images.`,$e,re,Qe,U,pe,ga,Je,Ba="Output class for HunyuanVideo pipelines.",Se,me,ze,we,Ae;return H=new j({props:{title:"Framepack",local:"framepack",headingTag:"h1"}}),k=new Ya({props:{$$slots:{default:[Ea]},$$scope:{ctx:V}}}),Y=new j({props:{title:"Available models",local:"available-models",headingTag:"h2"}}),E=new j({props:{title:"Usage",local:"usage",headingTag:"h2"}}),Q=new j({props:{title:"First and last frame to video",local:"first-and-last-frame-to-video",headingTag:"h3"}}),z=new Ze({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwSHVueXVhblZpZGVvRnJhbWVwYWNrUGlwZWxpbmUlMkMlMjBIdW55dWFuVmlkZW9GcmFtZXBhY2tUcmFuc2Zvcm1lcjNETW9kZWwlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTJDJTIwbG9hZF9pbWFnZSUwQWZyb20lMjB0cmFuc2Zvcm1lcnMlMjBpbXBvcnQlMjBTaWdsaXBJbWFnZVByb2Nlc3NvciUyQyUyMFNpZ2xpcFZpc2lvbk1vZGVsJTBBJTBBdHJhbnNmb3JtZXIlMjAlM0QlMjBIdW55dWFuVmlkZW9GcmFtZXBhY2tUcmFuc2Zvcm1lcjNETW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmxsbHlhc3ZpZWwlMkZGcmFtZVBhY2tJMlZfSFklMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQWZlYXR1cmVfZXh0cmFjdG9yJTIwJTNEJTIwU2lnbGlwSW1hZ2VQcm9jZXNzb3IuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmxsbHlhc3ZpZWwlMkZmbHV4X3JlZHV4X2JmbCUyMiUyQyUyMHN1YmZvbGRlciUzRCUyMmZlYXR1cmVfZXh0cmFjdG9yJTIyJTBBKSUwQWltYWdlX2VuY29kZXIlMjAlM0QlMjBTaWdsaXBWaXNpb25Nb2RlbC5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIybGxseWFzdmllbCUyRmZsdXhfcmVkdXhfYmZsJTIyJTJDJTIwc3ViZm9sZGVyJTNEJTIyaW1hZ2VfZW5jb2RlciUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUwQSklMEFwaXBlJTIwJTNEJTIwSHVueXVhblZpZGVvRnJhbWVwYWNrUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmh1bnl1YW52aWRlby1jb21tdW5pdHklMkZIdW55dWFuVmlkZW8lMjIlMkMlMEElMjAlMjAlMjAlMjB0cmFuc2Zvcm1lciUzRHRyYW5zZm9ybWVyJTJDJTBBJTIwJTIwJTIwJTIwZmVhdHVyZV9leHRyYWN0b3IlM0RmZWF0dXJlX2V4dHJhY3RvciUyQyUwQSUyMCUyMCUyMCUyMGltYWdlX2VuY29kZXIlM0RpbWFnZV9lbmNvZGVyJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQSUwQSUyMyUyMEVuYWJsZSUyMG1lbW9yeSUyMG9wdGltaXphdGlvbnMlMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBcGlwZS52YWUuZW5hYmxlX3RpbGluZygpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyQ0clMjBhbmltYXRpb24lMjBzdHlsZSUyQyUyMGElMjBzbWFsbCUyMGJsdWUlMjBiaXJkJTIwdGFrZXMlMjBvZmYlMjBmcm9tJTIwdGhlJTIwZ3JvdW5kJTJDJTIwZmxhcHBpbmclMjBpdHMlMjB3aW5ncy4lMjBUaGUlMjBiaXJkJ3MlMjBmZWF0aGVycyUyMGFyZSUyMGRlbGljYXRlJTJDJTIwd2l0aCUyMGElMjB1bmlxdWUlMjBwYXR0ZXJuJTIwb24lMjBpdHMlMjBjaGVzdC4lMjBUaGUlMjBiYWNrZ3JvdW5kJTIwc2hvd3MlMjBhJTIwYmx1ZSUyMHNreSUyMHdpdGglMjB3aGl0ZSUyMGNsb3VkcyUyMHVuZGVyJTIwYnJpZ2h0JTIwc3Vuc2hpbmUuJTIwVGhlJTIwY2FtZXJhJTIwZm9sbG93cyUyMHRoZSUyMGJpcmQlMjB1cHdhcmQlMkMlMjBjYXB0dXJpbmclMjBpdHMlMjBmbGlnaHQlMjBhbmQlMjB0aGUlMjB2YXN0bmVzcyUyMG9mJTIwdGhlJTIwc2t5JTIwZnJvbSUyMGElMjBjbG9zZS11cCUyQyUyMGxvdy1hbmdsZSUyMHBlcnNwZWN0aXZlLiUyMiUwQWZpcnN0X2ltYWdlJTIwJTNEJTIwbG9hZF9pbWFnZSglMEElMjAlMjAlMjAlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZodWdnaW5nZmFjZSUyRmRvY3VtZW50YXRpb24taW1hZ2VzJTJGcmVzb2x2ZSUyRm1haW4lMkZkaWZmdXNlcnMlMkZmbGYydl9pbnB1dF9maXJzdF9mcmFtZS5wbmclMjIlMEEpJTBBbGFzdF9pbWFnZSUyMCUzRCUyMGxvYWRfaW1hZ2UoJTBBJTIwJTIwJTIwJTIwJTIyaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmRhdGFzZXRzJTJGaHVnZ2luZ2ZhY2UlMkZkb2N1bWVudGF0aW9uLWltYWdlcyUyRnJlc29sdmUlMkZtYWluJTJGZGlmZnVzZXJzJTJGZmxmMnZfaW5wdXRfbGFzdF9mcmFtZS5wbmclMjIlMEEpJTBBb3V0cHV0JTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBpbWFnZSUzRGZpcnN0X2ltYWdlJTJDJTBBJTIwJTIwJTIwJTIwbGFzdF9pbWFnZSUzRGxhc3RfaW1hZ2UlMkMlMEElMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0Q1MTIlMkMlMEElMjAlMjAlMjAlMjB3aWR0aCUzRDUxMiUyQyUwQSUyMCUyMCUyMCUyMG51bV9mcmFtZXMlM0Q5MSUyQyUwQSUyMCUyMCUyMCUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0QzMCUyQyUwQSUyMCUyMCUyMCUyMGd1aWRhbmNlX3NjYWxlJTNEOS4wJTJDJTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yJTNEdG9yY2guR2VuZXJhdG9yKCkubWFudWFsX3NlZWQoMCklMkMlMEElMjAlMjAlMjAlMjBzYW1wbGluZ190eXBlJTNEJTIyaW52ZXJ0ZWRfYW50aV9kcmlmdGluZyUyMiUyQyUwQSkuZnJhbWVzJTVCMCU1RCUwQWV4cG9ydF90b192aWRlbyhvdXRwdXQlMkMlMjAlMjJvdXRwdXQubXA0JTIyJTJDJTIwZnBzJTNEMzAp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> HunyuanVideoFramepackPipeline, HunyuanVideoFramepackTransformer3DModel | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video, load_image | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> SiglipImageProcessor, SiglipVisionModel | |
| transformer = HunyuanVideoFramepackTransformer3DModel.from_pretrained( | |
| <span class="hljs-string">"lllyasviel/FramePackI2V_HY"</span>, torch_dtype=torch.bfloat16 | |
| ) | |
| feature_extractor = SiglipImageProcessor.from_pretrained( | |
| <span class="hljs-string">"lllyasviel/flux_redux_bfl"</span>, subfolder=<span class="hljs-string">"feature_extractor"</span> | |
| ) | |
| image_encoder = SiglipVisionModel.from_pretrained( | |
| <span class="hljs-string">"lllyasviel/flux_redux_bfl"</span>, subfolder=<span class="hljs-string">"image_encoder"</span>, torch_dtype=torch.float16 | |
| ) | |
| pipe = HunyuanVideoFramepackPipeline.from_pretrained( | |
| <span class="hljs-string">"hunyuanvideo-community/HunyuanVideo"</span>, | |
| transformer=transformer, | |
| feature_extractor=feature_extractor, | |
| image_encoder=image_encoder, | |
| torch_dtype=torch.float16, | |
| ) | |
| <span class="hljs-comment"># Enable memory optimizations</span> | |
| pipe.enable_model_cpu_offload() | |
| pipe.vae.enable_tiling() | |
| prompt = <span class="hljs-string">"CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird's feathers are delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, low-angle perspective."</span> | |
| first_image = load_image( | |
| <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_first_frame.png"</span> | |
| ) | |
| last_image = load_image( | |
| <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_last_frame.png"</span> | |
| ) | |
| output = pipe( | |
| image=first_image, | |
| last_image=last_image, | |
| prompt=prompt, | |
| height=<span class="hljs-number">512</span>, | |
| width=<span class="hljs-number">512</span>, | |
| num_frames=<span class="hljs-number">91</span>, | |
| num_inference_steps=<span class="hljs-number">30</span>, | |
| guidance_scale=<span class="hljs-number">9.0</span>, | |
| generator=torch.Generator().manual_seed(<span class="hljs-number">0</span>), | |
| sampling_type=<span class="hljs-string">"inverted_anti_drifting"</span>, | |
| ).frames[<span class="hljs-number">0</span>] | |
| export_to_video(output, <span class="hljs-string">"output.mp4"</span>, fps=<span class="hljs-number">30</span>)`,wrap:!1}}),A=new j({props:{title:"Vanilla sampling",local:"vanilla-sampling",headingTag:"h3"}}),q=new Ze({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwSHVueXVhblZpZGVvRnJhbWVwYWNrUGlwZWxpbmUlMkMlMjBIdW55dWFuVmlkZW9GcmFtZXBhY2tUcmFuc2Zvcm1lcjNETW9kZWwlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTJDJTIwbG9hZF9pbWFnZSUwQWZyb20lMjB0cmFuc2Zvcm1lcnMlMjBpbXBvcnQlMjBTaWdsaXBJbWFnZVByb2Nlc3NvciUyQyUyMFNpZ2xpcFZpc2lvbk1vZGVsJTBBJTBBdHJhbnNmb3JtZXIlMjAlM0QlMjBIdW55dWFuVmlkZW9GcmFtZXBhY2tUcmFuc2Zvcm1lcjNETW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmxsbHlhc3ZpZWwlMkZGcmFtZVBhY2tfRjFfSTJWX0hZXzIwMjUwNTAzJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUwQSklMEFmZWF0dXJlX2V4dHJhY3RvciUyMCUzRCUyMFNpZ2xpcEltYWdlUHJvY2Vzc29yLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJsbGx5YXN2aWVsJTJGZmx1eF9yZWR1eF9iZmwlMjIlMkMlMjBzdWJmb2xkZXIlM0QlMjJmZWF0dXJlX2V4dHJhY3RvciUyMiUwQSklMEFpbWFnZV9lbmNvZGVyJTIwJTNEJTIwU2lnbGlwVmlzaW9uTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmxsbHlhc3ZpZWwlMkZmbHV4X3JlZHV4X2JmbCUyMiUyQyUyMHN1YmZvbGRlciUzRCUyMmltYWdlX2VuY29kZXIlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMEEpJTBBcGlwZSUyMCUzRCUyMEh1bnl1YW5WaWRlb0ZyYW1lcGFja1BpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJodW55dWFudmlkZW8tY29tbXVuaXR5JTJGSHVueXVhblZpZGVvJTIyJTJDJTBBJTIwJTIwJTIwJTIwdHJhbnNmb3JtZXIlM0R0cmFuc2Zvcm1lciUyQyUwQSUyMCUyMCUyMCUyMGZlYXR1cmVfZXh0cmFjdG9yJTNEZmVhdHVyZV9leHRyYWN0b3IlMkMlMEElMjAlMjAlMjAlMjBpbWFnZV9lbmNvZGVyJTNEaW1hZ2VfZW5jb2RlciUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUwQSklMEElMEElMjMlMjBFbmFibGUlMjBtZW1vcnklMjBvcHRpbWl6YXRpb25zJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQXBpcGUudmFlLmVuYWJsZV90aWxpbmcoKSUwQSUwQWltYWdlJTIwJTNEJTIwbG9hZF9pbWFnZSglMEElMjAlMjAlMjAlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZodWdnaW5nZmFjZSUyRmRvY3VtZW50YXRpb24taW1hZ2VzJTJGcmVzb2x2ZSUyRm1haW4lMkZkaWZmdXNlcnMlMkZwZW5ndWluLnBuZyUyMiUwQSklMEFvdXRwdXQlMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMGltYWdlJTNEaW1hZ2UlMkMlMEElMjAlMjAlMjAlMjBwcm9tcHQlM0QlMjJBJTIwcGVuZ3VpbiUyMGRhbmNpbmclMjBpbiUyMHRoZSUyMHNub3clMjIlMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0Q4MzIlMkMlMEElMjAlMjAlMjAlMjB3aWR0aCUzRDQ4MCUyQyUwQSUyMCUyMCUyMCUyMG51bV9mcmFtZXMlM0Q5MSUyQyUwQSUyMCUyMCUyMCUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0QzMCUyQyUwQSUyMCUyMCUyMCUyMGd1aWRhbmNlX3NjYWxlJTNEOS4wJTJDJTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yJTNEdG9yY2guR2VuZXJhdG9yKCkubWFudWFsX3NlZWQoMCklMkMlMEElMjAlMjAlMjAlMjBzYW1wbGluZ190eXBlJTNEJTIydmFuaWxsYSUyMiUyQyUwQSkuZnJhbWVzJTVCMCU1RCUwQWV4cG9ydF90b192aWRlbyhvdXRwdXQlMkMlMjAlMjJvdXRwdXQubXA0JTIyJTJDJTIwZnBzJTNEMzAp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> HunyuanVideoFramepackPipeline, HunyuanVideoFramepackTransformer3DModel | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video, load_image | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> SiglipImageProcessor, SiglipVisionModel | |
| transformer = HunyuanVideoFramepackTransformer3DModel.from_pretrained( | |
| <span class="hljs-string">"lllyasviel/FramePack_F1_I2V_HY_20250503"</span>, torch_dtype=torch.bfloat16 | |
| ) | |
| feature_extractor = SiglipImageProcessor.from_pretrained( | |
| <span class="hljs-string">"lllyasviel/flux_redux_bfl"</span>, subfolder=<span class="hljs-string">"feature_extractor"</span> | |
| ) | |
| image_encoder = SiglipVisionModel.from_pretrained( | |
| <span class="hljs-string">"lllyasviel/flux_redux_bfl"</span>, subfolder=<span class="hljs-string">"image_encoder"</span>, torch_dtype=torch.float16 | |
| ) | |
| pipe = HunyuanVideoFramepackPipeline.from_pretrained( | |
| <span class="hljs-string">"hunyuanvideo-community/HunyuanVideo"</span>, | |
| transformer=transformer, | |
| feature_extractor=feature_extractor, | |
| image_encoder=image_encoder, | |
| torch_dtype=torch.float16, | |
| ) | |
| <span class="hljs-comment"># Enable memory optimizations</span> | |
| pipe.enable_model_cpu_offload() | |
| pipe.vae.enable_tiling() | |
| image = load_image( | |
| <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/penguin.png"</span> | |
| ) | |
| output = pipe( | |
| image=image, | |
| prompt=<span class="hljs-string">"A penguin dancing in the snow"</span>, | |
| height=<span class="hljs-number">832</span>, | |
| width=<span class="hljs-number">480</span>, | |
| num_frames=<span class="hljs-number">91</span>, | |
| num_inference_steps=<span class="hljs-number">30</span>, | |
| guidance_scale=<span class="hljs-number">9.0</span>, | |
| generator=torch.Generator().manual_seed(<span class="hljs-number">0</span>), | |
| sampling_type=<span class="hljs-string">"vanilla"</span>, | |
| ).frames[<span class="hljs-number">0</span>] | |
| export_to_video(output, <span class="hljs-string">"output.mp4"</span>, fps=<span class="hljs-number">30</span>)`,wrap:!1}}),L=new j({props:{title:"Group offloading",local:"group-offloading",headingTag:"h3"}}),K=new Ze({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwSHVueXVhblZpZGVvRnJhbWVwYWNrUGlwZWxpbmUlMkMlMjBIdW55dWFuVmlkZW9GcmFtZXBhY2tUcmFuc2Zvcm1lcjNETW9kZWwlMEFmcm9tJTIwZGlmZnVzZXJzLmhvb2tzJTIwaW1wb3J0JTIwYXBwbHlfZ3JvdXBfb2ZmbG9hZGluZyUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBleHBvcnRfdG9fdmlkZW8lMkMlMjBsb2FkX2ltYWdlJTBBZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFNpZ2xpcEltYWdlUHJvY2Vzc29yJTJDJTIwU2lnbGlwVmlzaW9uTW9kZWwlMEElMEF0cmFuc2Zvcm1lciUyMCUzRCUyMEh1bnl1YW5WaWRlb0ZyYW1lcGFja1RyYW5zZm9ybWVyM0RNb2RlbC5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIybGxseWFzdmllbCUyRkZyYW1lUGFja19GMV9JMlZfSFlfMjAyNTA1MDMlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQWZlYXR1cmVfZXh0cmFjdG9yJTIwJTNEJTIwU2lnbGlwSW1hZ2VQcm9jZXNzb3IuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmxsbHlhc3ZpZWwlMkZmbHV4X3JlZHV4X2JmbCUyMiUyQyUyMHN1YmZvbGRlciUzRCUyMmZlYXR1cmVfZXh0cmFjdG9yJTIyJTBBKSUwQWltYWdlX2VuY29kZXIlMjAlM0QlMjBTaWdsaXBWaXNpb25Nb2RlbC5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIybGxseWFzdmllbCUyRmZsdXhfcmVkdXhfYmZsJTIyJTJDJTIwc3ViZm9sZGVyJTNEJTIyaW1hZ2VfZW5jb2RlciUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUwQSklMEFwaXBlJTIwJTNEJTIwSHVueXVhblZpZGVvRnJhbWVwYWNrUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmh1bnl1YW52aWRlby1jb21tdW5pdHklMkZIdW55dWFuVmlkZW8lMjIlMkMlMEElMjAlMjAlMjAlMjB0cmFuc2Zvcm1lciUzRHRyYW5zZm9ybWVyJTJDJTBBJTIwJTIwJTIwJTIwZmVhdHVyZV9leHRyYWN0b3IlM0RmZWF0dXJlX2V4dHJhY3RvciUyQyUwQSUyMCUyMCUyMCUyMGltYWdlX2VuY29kZXIlM0RpbWFnZV9lbmNvZGVyJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQSUwQSUyMyUyMEVuYWJsZSUyMGdyb3VwJTIwb2ZmbG9hZGluZyUwQW9ubG9hZF9kZXZpY2UlMjAlM0QlMjB0b3JjaC5kZXZpY2UoJTIyY3VkYSUyMiklMEFvZmZsb2FkX2RldmljZSUyMCUzRCUyMHRvcmNoLmRldmljZSglMjJjcHUlMjIpJTBBbGlzdChtYXAoJTBBJTIwJTIwJTIwJTIwbGFtYmRhJTIweCUzQSUyMGFwcGx5X2dyb3VwX29mZmxvYWRpbmcoeCUyQyUyMG9ubG9hZF9kZXZpY2UlMkMlMjBvZmZsb2FkX2RldmljZSUyQyUyMG9mZmxvYWRfdHlwZSUzRCUyMmxlYWZfbGV2ZWwlMjIlMkMlMjB1c2Vfc3RyZWFtJTNEVHJ1ZSUyQyUyMGxvd19jcHVfbWVtX3VzYWdlJTNEVHJ1ZSklMkMlMEElMjAlMjAlMjAlMjAlNUJwaXBlLnRleHRfZW5jb2RlciUyQyUyMHBpcGUudGV4dF9lbmNvZGVyXzIlMkMlMjBwaXBlLnRyYW5zZm9ybWVyJTVEJTBBKSklMEFwaXBlLmltYWdlX2VuY29kZXIudG8ob25sb2FkX2RldmljZSklMEFwaXBlLnZhZS50byhvbmxvYWRfZGV2aWNlKSUwQXBpcGUudmFlLmVuYWJsZV90aWxpbmcoKSUwQSUwQWltYWdlJTIwJTNEJTIwbG9hZF9pbWFnZSglMEElMjAlMjAlMjAlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZodWdnaW5nZmFjZSUyRmRvY3VtZW50YXRpb24taW1hZ2VzJTJGcmVzb2x2ZSUyRm1haW4lMkZkaWZmdXNlcnMlMkZwZW5ndWluLnBuZyUyMiUwQSklMEFvdXRwdXQlMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMGltYWdlJTNEaW1hZ2UlMkMlMEElMjAlMjAlMjAlMjBwcm9tcHQlM0QlMjJBJTIwcGVuZ3VpbiUyMGRhbmNpbmclMjBpbiUyMHRoZSUyMHNub3clMjIlMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0Q4MzIlMkMlMEElMjAlMjAlMjAlMjB3aWR0aCUzRDQ4MCUyQyUwQSUyMCUyMCUyMCUyMG51bV9mcmFtZXMlM0Q5MSUyQyUwQSUyMCUyMCUyMCUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0QzMCUyQyUwQSUyMCUyMCUyMCUyMGd1aWRhbmNlX3NjYWxlJTNEOS4wJTJDJTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yJTNEdG9yY2guR2VuZXJhdG9yKCkubWFudWFsX3NlZWQoMCklMkMlMEElMjAlMjAlMjAlMjBzYW1wbGluZ190eXBlJTNEJTIydmFuaWxsYSUyMiUyQyUwQSkuZnJhbWVzJTVCMCU1RCUwQXByaW50KGYlMjJNYXglMjBtZW1vcnklM0ElMjAlN0J0b3JjaC5jdWRhLm1heF9tZW1vcnlfYWxsb2NhdGVkKCklMjAlMkYlMjAxMDI0KiozJTNBLjNmJTdEJTIwR0IlMjIpJTBBZXhwb3J0X3RvX3ZpZGVvKG91dHB1dCUyQyUyMCUyMm91dHB1dC5tcDQlMjIlMkMlMjBmcHMlM0QzMCk=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> HunyuanVideoFramepackPipeline, HunyuanVideoFramepackTransformer3DModel | |
| <span class="hljs-keyword">from</span> diffusers.hooks <span class="hljs-keyword">import</span> apply_group_offloading | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video, load_image | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> SiglipImageProcessor, SiglipVisionModel | |
| transformer = HunyuanVideoFramepackTransformer3DModel.from_pretrained( | |
| <span class="hljs-string">"lllyasviel/FramePack_F1_I2V_HY_20250503"</span>, torch_dtype=torch.bfloat16 | |
| ) | |
| feature_extractor = SiglipImageProcessor.from_pretrained( | |
| <span class="hljs-string">"lllyasviel/flux_redux_bfl"</span>, subfolder=<span class="hljs-string">"feature_extractor"</span> | |
| ) | |
| image_encoder = SiglipVisionModel.from_pretrained( | |
| <span class="hljs-string">"lllyasviel/flux_redux_bfl"</span>, subfolder=<span class="hljs-string">"image_encoder"</span>, torch_dtype=torch.float16 | |
| ) | |
| pipe = HunyuanVideoFramepackPipeline.from_pretrained( | |
| <span class="hljs-string">"hunyuanvideo-community/HunyuanVideo"</span>, | |
| transformer=transformer, | |
| feature_extractor=feature_extractor, | |
| image_encoder=image_encoder, | |
| torch_dtype=torch.float16, | |
| ) | |
| <span class="hljs-comment"># Enable group offloading</span> | |
| onload_device = torch.device(<span class="hljs-string">"cuda"</span>) | |
| offload_device = torch.device(<span class="hljs-string">"cpu"</span>) | |
| <span class="hljs-built_in">list</span>(<span class="hljs-built_in">map</span>( | |
| <span class="hljs-keyword">lambda</span> x: apply_group_offloading(x, onload_device, offload_device, offload_type=<span class="hljs-string">"leaf_level"</span>, use_stream=<span class="hljs-literal">True</span>, low_cpu_mem_usage=<span class="hljs-literal">True</span>), | |
| [pipe.text_encoder, pipe.text_encoder_2, pipe.transformer] | |
| )) | |
| pipe.image_encoder.to(onload_device) | |
| pipe.vae.to(onload_device) | |
| pipe.vae.enable_tiling() | |
| image = load_image( | |
| <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/penguin.png"</span> | |
| ) | |
| output = pipe( | |
| image=image, | |
| prompt=<span class="hljs-string">"A penguin dancing in the snow"</span>, | |
| height=<span class="hljs-number">832</span>, | |
| width=<span class="hljs-number">480</span>, | |
| num_frames=<span class="hljs-number">91</span>, | |
| num_inference_steps=<span class="hljs-number">30</span>, | |
| guidance_scale=<span class="hljs-number">9.0</span>, | |
| generator=torch.Generator().manual_seed(<span class="hljs-number">0</span>), | |
| sampling_type=<span class="hljs-string">"vanilla"</span>, | |
| ).frames[<span class="hljs-number">0</span>] | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Max memory: <span class="hljs-subst">{torch.cuda.max_memory_allocated() / <span class="hljs-number">1024</span>**<span class="hljs-number">3</span>:<span class="hljs-number">.3</span>f}</span> GB"</span>) | |
| export_to_video(output, <span class="hljs-string">"output.mp4"</span>, fps=<span class="hljs-number">30</span>)`,wrap:!1}}),O=new j({props:{title:"HunyuanVideoFramepackPipeline",local:"diffusers.HunyuanVideoFramepackPipeline",headingTag:"h2"}}),ee=new de({props:{name:"class diffusers.HunyuanVideoFramepackPipeline",anchor:"diffusers.HunyuanVideoFramepackPipeline",parameters:[{name:"text_encoder",val:": LlamaModel"},{name:"tokenizer",val:": LlamaTokenizerFast"},{name:"transformer",val:": HunyuanVideoFramepackTransformer3DModel"},{name:"vae",val:": AutoencoderKLHunyuanVideo"},{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"text_encoder_2",val:": CLIPTextModel"},{name:"tokenizer_2",val:": CLIPTokenizer"},{name:"image_encoder",val:": SiglipVisionModel"},{name:"feature_extractor",val:": SiglipImageProcessor"}],parametersDescription:[{anchor:"diffusers.HunyuanVideoFramepackPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>LlamaModel</code>) — | |
| <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers" rel="nofollow">Llava Llama3-8B</a>.`,name:"text_encoder"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>LlamaTokenizer</code>) — | |
| Tokenizer from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers" rel="nofollow">Llava Llama3-8B</a>.`,name:"tokenizer"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/pr_11340/en/api/models/hunyuan_video_transformer_3d#diffusers.HunyuanVideoTransformer3DModel">HunyuanVideoTransformer3DModel</a>) — | |
| Conditional Transformer to denoise the encoded image latents.`,name:"transformer"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_11340/en/api/schedulers/flow_match_euler_discrete#diffusers.FlowMatchEulerDiscreteScheduler">FlowMatchEulerDiscreteScheduler</a>) — | |
| A scheduler to be used in combination with <code>transformer</code> to denoise the encoded image latents.`,name:"scheduler"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_11340/en/api/models/autoencoder_kl_hunyuan_video#diffusers.AutoencoderKLHunyuanVideo">AutoencoderKLHunyuanVideo</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode videos to and from latent representations.`,name:"vae"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.text_encoder_2",description:`<strong>text_encoder_2</strong> (<code>CLIPTextModel</code>) — | |
| <a href="https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel" rel="nofollow">CLIP</a>, specifically | |
| the <a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a> variant.`,name:"text_encoder_2"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.tokenizer_2",description:`<strong>tokenizer_2</strong> (<code>CLIPTokenizer</code>) — | |
| Tokenizer of class | |
| <a href="https://huggingface.co/docs/transformers/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a>.`,name:"tokenizer_2"}],source:"https://github.com/huggingface/diffusers/blob/vr_11340/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py#L243"}}),ae=new de({props:{name:"__call__",anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__",parameters:[{name:"image",val:": typing.Union[PIL.Image.Image, numpy.ndarray, torch.Tensor, typing.List[PIL.Image.Image], typing.List[numpy.ndarray], typing.List[torch.Tensor]]"},{name:"last_image",val:": typing.Union[PIL.Image.Image, numpy.ndarray, torch.Tensor, typing.List[PIL.Image.Image], typing.List[numpy.ndarray], typing.List[torch.Tensor], NoneType] = None"},{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"prompt_2",val:": typing.Union[str, typing.List[str]] = None"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"negative_prompt_2",val:": typing.Union[str, typing.List[str]] = None"},{name:"height",val:": int = 720"},{name:"width",val:": int = 1280"},{name:"num_frames",val:": int = 129"},{name:"latent_window_size",val:": int = 9"},{name:"num_inference_steps",val:": int = 50"},{name:"sigmas",val:": typing.List[float] = None"},{name:"true_cfg_scale",val:": float = 1.0"},{name:"guidance_scale",val:": float = 6.0"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"image_latents",val:": typing.Optional[torch.Tensor] = None"},{name:"last_image_latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"callback_on_step_end",val:": typing.Union[typing.Callable[[int, int, typing.Dict], NoneType], diffusers.callbacks.PipelineCallback, diffusers.callbacks.MultiPipelineCallbacks, NoneType] = None"},{name:"callback_on_step_end_tensor_inputs",val:": typing.List[str] = ['latents']"},{name:"prompt_template",val:": typing.Dict[str, typing.Any] = {'template': '<|start_header_id|>system<|end_header_id|>\\n\\nDescribe the video by detailing the following aspects: 1. The main content and theme of the video.2. The color, shape, size, texture, quantity, text, and spatial relationships of the objects.3. Actions, events, behaviors temporal relationships, physical movement changes of the objects.4. background environment, light, style and atmosphere.5. camera angles, movements, and transitions used in the video:<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\n{}<|eot_id|>', 'crop_start': 95}"},{name:"max_sequence_length",val:": int = 256"},{name:"sampling_type",val:": FramepackSamplingType = <FramepackSamplingType.INVERTED_ANTI_DRIFTING: 'inverted_anti_drifting'>"}],parametersDescription:[{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.image",description:`<strong>image</strong> (<code>PIL.Image.Image</code> or <code>np.ndarray</code> or <code>torch.Tensor</code>) — | |
| The image to be used as the starting point for the video generation.`,name:"image"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.last_image",description:`<strong>last_image</strong> (<code>PIL.Image.Image</code> or <code>np.ndarray</code> or <code>torch.Tensor</code>, <em>optional</em>) — | |
| The optional last image to be used as the ending point for the video generation. This is useful for | |
| generating transitions between two images.`,name:"last_image"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.prompt_2",description:`<strong>prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to be sent to <code>tokenizer_2</code> and <code>text_encoder_2</code>. If not defined, <code>prompt</code> is | |
| will be used instead.`,name:"prompt_2"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>true_cfg_scale</code> is | |
| not greater than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.negative_prompt_2",description:`<strong>negative_prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation to be sent to <code>tokenizer_2</code> and | |
| <code>text_encoder_2</code>. If not defined, <code>negative_prompt</code> is used in all the text-encoders.`,name:"negative_prompt_2"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, defaults to <code>720</code>) — | |
| The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, defaults to <code>1280</code>) — | |
| The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, defaults to <code>129</code>) — | |
| The number of frames in the generated video.`,name:"num_frames"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, defaults to <code>50</code>) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.sigmas",description:`<strong>sigmas</strong> (<code>List[float]</code>, <em>optional</em>) — | |
| Custom sigmas to use for the denoising process with schedulers which support a <code>sigmas</code> argument in | |
| their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is passed | |
| will be used.`,name:"sigmas"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.true_cfg_scale",description:`<strong>true_cfg_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 1.0) — | |
| When > 1.0 and a provided <code>negative_prompt</code>, enables true classifier-free guidance.`,name:"true_cfg_scale"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, defaults to <code>6.0</code>) — | |
| Guidance scale as defined in <a href="https://huggingface.co/papers/2207.12598" rel="nofollow">Classifier-Free Diffusion | |
| Guidance</a>. <code>guidance_scale</code> is defined as <code>w</code> of equation 2. | |
| of <a href="https://huggingface.co/papers/2205.11487" rel="nofollow">Imagen Paper</a>. Guidance scale is enabled by setting | |
| <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to | |
| the text <code>prompt</code>, usually at the expense of lower image quality. Note that the only available | |
| HunyuanVideo model is CFG-distilled, which means that traditional guidance between unconditional and | |
| conditional latent is not applied.`,name:"guidance_scale"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of images to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.image_latents",description:`<strong>image_latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-encoded image latents. If not provided, the image will be encoded using the VAE.`,name:"image_latents"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.last_image_latents",description:`<strong>last_image_latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-encoded last image latents. If not provided, the last image will be encoded using the VAE.`,name:"last_image_latents"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not | |
| provided, text embeddings are generated from the <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.pooled_prompt_embeds",description:`<strong>pooled_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) — | |
| Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. | |
| If not provided, pooled text embeddings will be generated from <code>prompt</code> input argument.`,name:"pooled_prompt_embeds"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.negative_pooled_prompt_embeds",description:`<strong>negative_pooled_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) — | |
| Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, pooled negative_prompt_embeds will be generated from <code>negative_prompt</code> | |
| input argument.`,name:"negative_pooled_prompt_embeds"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generated image. Choose between <code>PIL.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>HunyuanVideoFramepackPipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <code>PipelineCallback</code>, <code>MultiPipelineCallbacks</code>, <em>optional</em>) — | |
| A function or a subclass of <code>PipelineCallback</code> or <code>MultiPipelineCallbacks</code> that is called at the end of | |
| each denoising step during the inference. with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a | |
| list of all tensors as specified by <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) — | |
| The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list | |
| will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the | |
| <code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"}],source:"https://github.com/huggingface/diffusers/blob/vr_11340/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py#L617",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <code>HunyuanVideoFramepackPipelineOutput</code> is returned, otherwise a <code>tuple</code> is | |
| returned where the first element is a list with the generated images and the second element is a list | |
| of <code>bool</code>s indicating whether the corresponding generated image contains “not-safe-for-work” (nsfw) | |
| content.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>~HunyuanVideoFramepackPipelineOutput</code> or <code>tuple</code></p> | |
| `}}),ne=new j({props:{title:"Image-to-Video",local:"image-to-video",headingTag:"h5"}}),I=new Ga({props:{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.example",$$slots:{default:[$a]},$$scope:{ctx:V}}}),te=new j({props:{title:"First and Last Image-to-Video",local:"first-and-last-image-to-video",headingTag:"h5"}}),F=new Ga({props:{anchor:"diffusers.HunyuanVideoFramepackPipeline.__call__.example-2",$$slots:{default:[Qa]},$$scope:{ctx:V}}}),se=new de({props:{name:"disable_vae_slicing",anchor:"diffusers.HunyuanVideoFramepackPipeline.disable_vae_slicing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_11340/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py#L575"}}),le=new de({props:{name:"disable_vae_tiling",anchor:"diffusers.HunyuanVideoFramepackPipeline.disable_vae_tiling",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_11340/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py#L590"}}),oe=new de({props:{name:"enable_vae_slicing",anchor:"diffusers.HunyuanVideoFramepackPipeline.enable_vae_slicing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_11340/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py#L568"}}),ie=new de({props:{name:"enable_vae_tiling",anchor:"diffusers.HunyuanVideoFramepackPipeline.enable_vae_tiling",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_11340/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py#L582"}}),re=new j({props:{title:"HunyuanVideoPipelineOutput",local:"diffusers.pipelines.hunyuan_video.pipeline_output.HunyuanVideoPipelineOutput",headingTag:"h2"}}),pe=new de({props:{name:"class diffusers.pipelines.hunyuan_video.pipeline_output.HunyuanVideoPipelineOutput",anchor:"diffusers.pipelines.hunyuan_video.pipeline_output.HunyuanVideoPipelineOutput",parameters:[{name:"frames",val:": Tensor"}],parametersDescription:[{anchor:"diffusers.pipelines.hunyuan_video.pipeline_output.HunyuanVideoPipelineOutput.frames",description:`<strong>frames</strong> (<code>torch.Tensor</code>, <code>np.ndarray</code>, or List[List[PIL.Image.Image]]) — | |
| List of video outputs - It can be a nested list of length <code>batch_size,</code> with each sub-list containing | |
| denoised PIL image sequences of length <code>num_frames.</code> It can also be a NumPy array or Torch tensor of shape | |
| <code>(batch_size, num_frames, channels, height, width)</code>.`,name:"frames"}],source:"https://github.com/huggingface/diffusers/blob/vr_11340/src/diffusers/pipelines/hunyuan_video/pipeline_output.py#L11"}}),me=new Na({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/framepack.md"}}),{c(){o=p("meta"),J=t(),r=p("p"),Z=t(),c(H.$$.fragment),Te=t(),W=p("div"),W.innerHTML=ya,je=t(),x=p("p"),x.innerHTML=Ma,Ue=t(),R=p("p"),R.innerHTML=ba,ve=t(),c(k.$$.fragment),Ve=t(),c(Y.$$.fragment),We=t(),N=p("table"),N.innerHTML=_a,ke=t(),c(E.$$.fragment),Ie=t(),$=p("p"),$.textContent=Ja,Fe=t(),c(Q.$$.fragment),Be=t(),S=p("p"),S.textContent=wa,Ge=t(),c(z.$$.fragment),Xe=t(),c(A.$$.fragment),Ce=t(),P=p("p"),P.textContent=Za,He=t(),c(q.$$.fragment),xe=t(),c(L.$$.fragment),Re=t(),D=p("p"),D.innerHTML=Ta,Ye=t(),c(K.$$.fragment),Ne=t(),c(O.$$.fragment),Ee=t(),y=p("div"),c(ee.$$.fragment),Ke=t(),ue=p("p"),ue.textContent=ja,Oe=t(),he=p("p"),he.innerHTML=Ua,ea=t(),b=p("div"),c(ae.$$.fragment),aa=t(),ge=p("p"),ge.textContent=va,na=t(),fe=p("p"),fe.textContent=Va,ta=t(),c(ne.$$.fragment),sa=t(),c(I.$$.fragment),la=t(),c(te.$$.fragment),oa=t(),c(F.$$.fragment),ia=t(),B=p("div"),c(se.$$.fragment),ra=t(),ye=p("p"),ye.innerHTML=Wa,pa=t(),G=p("div"),c(le.$$.fragment),ma=t(),Me=p("p"),Me.innerHTML=ka,ca=t(),X=p("div"),c(oe.$$.fragment),da=t(),be=p("p"),be.textContent=Ia,ua=t(),C=p("div"),c(ie.$$.fragment),ha=t(),_e=p("p"),_e.textContent=Fa,$e=t(),c(re.$$.fragment),Qe=t(),U=p("div"),c(pe.$$.fragment),ga=t(),Je=p("p"),Je.textContent=Ba,Se=t(),c(me.$$.fragment),ze=t(),we=p("p"),this.h()},l(e){const a=Ra("svelte-u9bgzb",document.head);o=m(a,"META",{name:!0,content:!0}),a.forEach(n),J=s(e),r=m(e,"P",{}),v(r).forEach(n),Z=s(e),d(H.$$.fragment,e),Te=s(e),W=m(e,"DIV",{class:!0,"data-svelte-h":!0}),_(W)!=="svelte-si9ct8"&&(W.innerHTML=ya),je=s(e),x=m(e,"P",{"data-svelte-h":!0}),_(x)!=="svelte-1hpc6jc"&&(x.innerHTML=Ma),Ue=s(e),R=m(e,"P",{"data-svelte-h":!0}),_(R)!=="svelte-1cw1ia9"&&(R.innerHTML=ba),ve=s(e),d(k.$$.fragment,e),Ve=s(e),d(Y.$$.fragment,e),We=s(e),N=m(e,"TABLE",{"data-svelte-h":!0}),_(N)!=="svelte-1to8fyy"&&(N.innerHTML=_a),ke=s(e),d(E.$$.fragment,e),Ie=s(e),$=m(e,"P",{"data-svelte-h":!0}),_($)!=="svelte-1e7rgkz"&&($.textContent=Ja),Fe=s(e),d(Q.$$.fragment,e),Be=s(e),S=m(e,"P",{"data-svelte-h":!0}),_(S)!=="svelte-5069ac"&&(S.textContent=wa),Ge=s(e),d(z.$$.fragment,e),Xe=s(e),d(A.$$.fragment,e),Ce=s(e),P=m(e,"P",{"data-svelte-h":!0}),_(P)!=="svelte-18fuv5c"&&(P.textContent=Za),He=s(e),d(q.$$.fragment,e),xe=s(e),d(L.$$.fragment,e),Re=s(e),D=m(e,"P",{"data-svelte-h":!0}),_(D)!=="svelte-4rj31b"&&(D.innerHTML=Ta),Ye=s(e),d(K.$$.fragment,e),Ne=s(e),d(O.$$.fragment,e),Ee=s(e),y=m(e,"DIV",{class:!0});var M=v(y);d(ee.$$.fragment,M),Ke=s(M),ue=m(M,"P",{"data-svelte-h":!0}),_(ue)!=="svelte-1a4x0ff"&&(ue.textContent=ja),Oe=s(M),he=m(M,"P",{"data-svelte-h":!0}),_(he)!=="svelte-787uj9"&&(he.innerHTML=Ua),ea=s(M),b=m(M,"DIV",{class:!0});var w=v(b);d(ae.$$.fragment,w),aa=s(w),ge=m(w,"P",{"data-svelte-h":!0}),_(ge)!=="svelte-50j04k"&&(ge.textContent=va),na=s(w),fe=m(w,"P",{"data-svelte-h":!0}),_(fe)!=="svelte-kvfsh7"&&(fe.textContent=Va),ta=s(w),d(ne.$$.fragment,w),sa=s(w),d(I.$$.fragment,w),la=s(w),d(te.$$.fragment,w),oa=s(w),d(F.$$.fragment,w),w.forEach(n),ia=s(M),B=m(M,"DIV",{class:!0});var ce=v(B);d(se.$$.fragment,ce),ra=s(ce),ye=m(ce,"P",{"data-svelte-h":!0}),_(ye)!=="svelte-1s3c06i"&&(ye.innerHTML=Wa),ce.forEach(n),pa=s(M),G=m(M,"DIV",{class:!0});var Pe=v(G);d(le.$$.fragment,Pe),ma=s(Pe),Me=m(Pe,"P",{"data-svelte-h":!0}),_(Me)!=="svelte-pkn4ui"&&(Me.innerHTML=ka),Pe.forEach(n),ca=s(M),X=m(M,"DIV",{class:!0});var qe=v(X);d(oe.$$.fragment,qe),da=s(qe),be=m(qe,"P",{"data-svelte-h":!0}),_(be)!=="svelte-14bnrb6"&&(be.textContent=Ia),qe.forEach(n),ua=s(M),C=m(M,"DIV",{class:!0});var Le=v(C);d(ie.$$.fragment,Le),ha=s(Le),_e=m(Le,"P",{"data-svelte-h":!0}),_(_e)!=="svelte-1xwrf7t"&&(_e.textContent=Fa),Le.forEach(n),M.forEach(n),$e=s(e),d(re.$$.fragment,e),Qe=s(e),U=m(e,"DIV",{class:!0});var De=v(U);d(pe.$$.fragment,De),ga=s(De),Je=m(De,"P",{"data-svelte-h":!0}),_(Je)!=="svelte-i6xdzu"&&(Je.textContent=Ba),De.forEach(n),Se=s(e),d(me.$$.fragment,e),ze=s(e),we=m(e,"P",{}),v(we).forEach(n),this.h()},h(){T(o,"name","hf:doc:metadata"),T(o,"content",za),T(W,"class","flex flex-wrap space-x-1"),T(b,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(X,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,a){i(document.head,o),l(e,J,a),l(e,r,a),l(e,Z,a),u(H,e,a),l(e,Te,a),l(e,W,a),l(e,je,a),l(e,x,a),l(e,Ue,a),l(e,R,a),l(e,ve,a),u(k,e,a),l(e,Ve,a),u(Y,e,a),l(e,We,a),l(e,N,a),l(e,ke,a),u(E,e,a),l(e,Ie,a),l(e,$,a),l(e,Fe,a),u(Q,e,a),l(e,Be,a),l(e,S,a),l(e,Ge,a),u(z,e,a),l(e,Xe,a),u(A,e,a),l(e,Ce,a),l(e,P,a),l(e,He,a),u(q,e,a),l(e,xe,a),u(L,e,a),l(e,Re,a),l(e,D,a),l(e,Ye,a),u(K,e,a),l(e,Ne,a),u(O,e,a),l(e,Ee,a),l(e,y,a),u(ee,y,null),i(y,Ke),i(y,ue),i(y,Oe),i(y,he),i(y,ea),i(y,b),u(ae,b,null),i(b,aa),i(b,ge),i(b,na),i(b,fe),i(b,ta),u(ne,b,null),i(b,sa),u(I,b,null),i(b,la),u(te,b,null),i(b,oa),u(F,b,null),i(y,ia),i(y,B),u(se,B,null),i(B,ra),i(B,ye),i(y,pa),i(y,G),u(le,G,null),i(G,ma),i(G,Me),i(y,ca),i(y,X),u(oe,X,null),i(X,da),i(X,be),i(y,ua),i(y,C),u(ie,C,null),i(C,ha),i(C,_e),l(e,$e,a),u(re,e,a),l(e,Qe,a),l(e,U,a),u(pe,U,null),i(U,ga),i(U,Je),l(e,Se,a),u(me,e,a),l(e,ze,a),l(e,we,a),Ae=!0},p(e,[a]){const M={};a&2&&(M.$$scope={dirty:a,ctx:e}),k.$set(M);const w={};a&2&&(w.$$scope={dirty:a,ctx:e}),I.$set(w);const ce={};a&2&&(ce.$$scope={dirty:a,ctx:e}),F.$set(ce)},i(e){Ae||(h(H.$$.fragment,e),h(k.$$.fragment,e),h(Y.$$.fragment,e),h(E.$$.fragment,e),h(Q.$$.fragment,e),h(z.$$.fragment,e),h(A.$$.fragment,e),h(q.$$.fragment,e),h(L.$$.fragment,e),h(K.$$.fragment,e),h(O.$$.fragment,e),h(ee.$$.fragment,e),h(ae.$$.fragment,e),h(ne.$$.fragment,e),h(I.$$.fragment,e),h(te.$$.fragment,e),h(F.$$.fragment,e),h(se.$$.fragment,e),h(le.$$.fragment,e),h(oe.$$.fragment,e),h(ie.$$.fragment,e),h(re.$$.fragment,e),h(pe.$$.fragment,e),h(me.$$.fragment,e),Ae=!0)},o(e){g(H.$$.fragment,e),g(k.$$.fragment,e),g(Y.$$.fragment,e),g(E.$$.fragment,e),g(Q.$$.fragment,e),g(z.$$.fragment,e),g(A.$$.fragment,e),g(q.$$.fragment,e),g(L.$$.fragment,e),g(K.$$.fragment,e),g(O.$$.fragment,e),g(ee.$$.fragment,e),g(ae.$$.fragment,e),g(ne.$$.fragment,e),g(I.$$.fragment,e),g(te.$$.fragment,e),g(F.$$.fragment,e),g(se.$$.fragment,e),g(le.$$.fragment,e),g(oe.$$.fragment,e),g(ie.$$.fragment,e),g(re.$$.fragment,e),g(pe.$$.fragment,e),g(me.$$.fragment,e),Ae=!1},d(e){e&&(n(J),n(r),n(Z),n(Te),n(W),n(je),n(x),n(Ue),n(R),n(ve),n(Ve),n(We),n(N),n(ke),n(Ie),n($),n(Fe),n(Be),n(S),n(Ge),n(Xe),n(Ce),n(P),n(He),n(xe),n(Re),n(D),n(Ye),n(Ne),n(Ee),n(y),n($e),n(Qe),n(U),n(Se),n(ze),n(we)),n(o),f(H,e),f(k,e),f(Y,e),f(E,e),f(Q,e),f(z,e),f(A,e),f(q,e),f(L,e),f(K,e),f(O,e),f(ee),f(ae),f(ne),f(I),f(te),f(F),f(se),f(le),f(oe),f(ie),f(re,e),f(pe),f(me,e)}}}const za='{"title":"Framepack","local":"framepack","sections":[{"title":"Available models","local":"available-models","sections":[],"depth":2},{"title":"Usage","local":"usage","sections":[{"title":"First and last frame to video","local":"first-and-last-frame-to-video","sections":[],"depth":3},{"title":"Vanilla sampling","local":"vanilla-sampling","sections":[],"depth":3},{"title":"Group offloading","local":"group-offloading","sections":[],"depth":3}],"depth":2},{"title":"HunyuanVideoFramepackPipeline","local":"diffusers.HunyuanVideoFramepackPipeline","sections":[{"title":"Image-to-Video","local":"image-to-video","sections":[],"depth":5},{"title":"First and Last Image-to-Video","local":"first-and-last-image-to-video","sections":[],"depth":5}],"depth":2},{"title":"HunyuanVideoPipelineOutput","local":"diffusers.pipelines.hunyuan_video.pipeline_output.HunyuanVideoPipelineOutput","sections":[],"depth":2}],"depth":1}';function Aa(V){return Ca(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class an extends Ha{constructor(o){super(),xa(this,o,Aa,Sa,Xa,{})}}export{an as component}; | |
Xet Storage Details
- Size:
- 67.5 kB
- Xet hash:
- d9ec51e8307d5302e5261eadb62015f386e9f1a1ed3ef0798cd10b61ed9016f7
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.