Buckets:

rtrm's picture
download
raw
47.8 kB
import{s as Tt,o as vt,n as at}from"../chunks/scheduler.53228c21.js";import{S as jt,i as Zt,e as p,s as l,c as b,h as Ut,a as c,d as t,b as i,f as V,j as _,g as y,k,w as Vt,l as r,m as d,n as M,t as w,o as J,p as T}from"../chunks/index.100fac89.js";import{D as ae}from"../chunks/Docstring.85467d5f.js";import{C as Re}from"../chunks/CodeBlock.d30a6509.js";import{E as It}from"../chunks/ExampleCodeBlock.c0a2ad65.js";import{H as Ee,E as Bt}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.9bafb610.js";import{H as Wt,a as Jt}from"../chunks/HfOption.fad27e59.js";function kt(I){let s,v='Refer to the <a href="../../optimization/memory">Reduce memory usage</a> guide for more details about the various memory saving techniques.',m,u,o="The quantized HunyuanVideo model below requires ~14GB of VRAM.",n,f,Z;return f=new Re({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwQXV0b01vZGVsJTJDJTIwSHVueXVhblZpZGVvUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnF1YW50aXplcnMlMjBpbXBvcnQlMjBQaXBlbGluZVF1YW50aXphdGlvbkNvbmZpZyUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBleHBvcnRfdG9fdmlkZW8lMEElMEElMjMlMjBxdWFudGl6ZSUyMHdlaWdodHMlMjB0byUyMGludDQlMjB3aXRoJTIwYml0c2FuZGJ5dGVzJTBBcGlwZWxpbmVfcXVhbnRfY29uZmlnJTIwJTNEJTIwUGlwZWxpbmVRdWFudGl6YXRpb25Db25maWcoJTBBJTIwJTIwJTIwJTIwcXVhbnRfYmFja2VuZCUzRCUyMmJpdHNhbmRieXRlc180Yml0JTIyJTJDJTBBJTIwJTIwJTIwJTIwcXVhbnRfa3dhcmdzJTNEJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIybG9hZF9pbl80Yml0JTIyJTNBJTIwVHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMmJuYl80Yml0X3F1YW50X3R5cGUlMjIlM0ElMjAlMjJuZjQlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjJibmJfNGJpdF9jb21wdXRlX2R0eXBlJTIyJTNBJTIwdG9yY2guYmZsb2F0MTYlMEElMjAlMjAlMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjBjb21wb25lbnRzX3RvX3F1YW50aXplJTNEJTIydHJhbnNmb3JtZXIlMjIlMEEpJTBBJTBBcGlwZWxpbmUlMjAlM0QlMjBIdW55dWFuVmlkZW9QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyaHVueXVhbnZpZGVvLWNvbW11bml0eSUyRkh1bnl1YW5WaWRlbyUyMiUyQyUwQSUyMCUyMCUyMCUyMHF1YW50aXphdGlvbl9jb25maWclM0RwaXBlbGluZV9xdWFudF9jb25maWclMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTJDJTBBKSUwQSUwQSUyMyUyMG1vZGVsLW9mZmxvYWRpbmclMjBhbmQlMjB0aWxpbmclMEFwaXBlbGluZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQXBpcGVsaW5lLnZhZS5lbmFibGVfdGlsaW5nKCklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJBJTIwZmx1ZmZ5JTIwdGVkZHklMjBiZWFyJTIwc2l0cyUyMG9uJTIwYSUyMGJlZCUyMG9mJTIwc29mdCUyMHBpbGxvd3MlMjBzdXJyb3VuZGVkJTIwYnklMjBjaGlsZHJlbidzJTIwdG95cy4lMjIlMEF2aWRlbyUyMCUzRCUyMHBpcGVsaW5lKHByb21wdCUzRHByb21wdCUyQyUyMG51bV9mcmFtZXMlM0Q2MSUyQyUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0QzMCkuZnJhbWVzJTVCMCU1RCUwQWV4cG9ydF90b192aWRlbyh2aWRlbyUyQyUyMCUyMm91dHB1dC5tcDQlMjIlMkMlMjBmcHMlM0QxNSk=",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoModel, HunyuanVideoPipeline
<span class="hljs-keyword">from</span> diffusers.quantizers <span class="hljs-keyword">import</span> PipelineQuantizationConfig
<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video
<span class="hljs-comment"># quantize weights to int4 with bitsandbytes</span>
pipeline_quant_config = PipelineQuantizationConfig(
quant_backend=<span class="hljs-string">&quot;bitsandbytes_4bit&quot;</span>,
quant_kwargs={
<span class="hljs-string">&quot;load_in_4bit&quot;</span>: <span class="hljs-literal">True</span>,
<span class="hljs-string">&quot;bnb_4bit_quant_type&quot;</span>: <span class="hljs-string">&quot;nf4&quot;</span>,
<span class="hljs-string">&quot;bnb_4bit_compute_dtype&quot;</span>: torch.bfloat16
},
components_to_quantize=<span class="hljs-string">&quot;transformer&quot;</span>
)
pipeline = HunyuanVideoPipeline.from_pretrained(
<span class="hljs-string">&quot;hunyuanvideo-community/HunyuanVideo&quot;</span>,
quantization_config=pipeline_quant_config,
torch_dtype=torch.bfloat16,
)
<span class="hljs-comment"># model-offloading and tiling</span>
pipeline.enable_model_cpu_offload()
pipeline.vae.enable_tiling()
prompt = <span class="hljs-string">&quot;A fluffy teddy bear sits on a bed of soft pillows surrounded by children&#x27;s toys.&quot;</span>
video = pipeline(prompt=prompt, num_frames=<span class="hljs-number">61</span>, num_inference_steps=<span class="hljs-number">30</span>).frames[<span class="hljs-number">0</span>]
export_to_video(video, <span class="hljs-string">&quot;output.mp4&quot;</span>, fps=<span class="hljs-number">15</span>)`,wrap:!1}}),{c(){s=p("p"),s.innerHTML=v,m=l(),u=p("p"),u.textContent=o,n=l(),b(f.$$.fragment)},l(h){s=c(h,"P",{"data-svelte-h":!0}),_(s)!=="svelte-iowzkr"&&(s.innerHTML=v),m=i(h),u=c(h,"P",{"data-svelte-h":!0}),_(u)!=="svelte-1lhulzs"&&(u.textContent=o),n=i(h),y(f.$$.fragment,h)},m(h,U){d(h,s,U),d(h,m,U),d(h,u,U),d(h,n,U),M(f,h,U),Z=!0},p:at,i(h){Z||(w(f.$$.fragment,h),Z=!0)},o(h){J(f.$$.fragment,h),Z=!1},d(h){h&&(t(s),t(m),t(u),t(n)),T(f,h)}}}function Ht(I){let s,v='<a href="../../optimization/fp16#torchcompile">Compilation</a> is slow the first time but subsequent calls to the pipeline are faster.',m,u,o;return u=new Re({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwQXV0b01vZGVsJTJDJTIwSHVueXVhblZpZGVvUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnF1YW50aXplcnMlMjBpbXBvcnQlMjBQaXBlbGluZVF1YW50aXphdGlvbkNvbmZpZyUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBleHBvcnRfdG9fdmlkZW8lMEElMEElMjMlMjBxdWFudGl6ZSUyMHdlaWdodHMlMjB0byUyMGludDQlMjB3aXRoJTIwYml0c2FuZGJ5dGVzJTBBcGlwZWxpbmVfcXVhbnRfY29uZmlnJTIwJTNEJTIwUGlwZWxpbmVRdWFudGl6YXRpb25Db25maWcoJTBBJTIwJTIwJTIwJTIwcXVhbnRfYmFja2VuZCUzRCUyMmJpdHNhbmRieXRlc180Yml0JTIyJTJDJTBBJTIwJTIwJTIwJTIwcXVhbnRfa3dhcmdzJTNEJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIybG9hZF9pbl80Yml0JTIyJTNBJTIwVHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMmJuYl80Yml0X3F1YW50X3R5cGUlMjIlM0ElMjAlMjJuZjQlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjJibmJfNGJpdF9jb21wdXRlX2R0eXBlJTIyJTNBJTIwdG9yY2guYmZsb2F0MTYlMEElMjAlMjAlMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjBjb21wb25lbnRzX3RvX3F1YW50aXplJTNEJTIydHJhbnNmb3JtZXIlMjIlMEEpJTBBJTBBcGlwZWxpbmUlMjAlM0QlMjBIdW55dWFuVmlkZW9QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyaHVueXVhbnZpZGVvLWNvbW11bml0eSUyRkh1bnl1YW5WaWRlbyUyMiUyQyUwQSUyMCUyMCUyMCUyMHF1YW50aXphdGlvbl9jb25maWclM0RwaXBlbGluZV9xdWFudF9jb25maWclMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTJDJTBBKSUwQSUwQSUyMyUyMG1vZGVsLW9mZmxvYWRpbmclMjBhbmQlMjB0aWxpbmclMEFwaXBlbGluZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQXBpcGVsaW5lLnZhZS5lbmFibGVfdGlsaW5nKCklMEElMEElMjMlMjB0b3JjaC5jb21waWxlJTBBcGlwZWxpbmUudHJhbnNmb3JtZXIudG8obWVtb3J5X2Zvcm1hdCUzRHRvcmNoLmNoYW5uZWxzX2xhc3QpJTBBcGlwZWxpbmUudHJhbnNmb3JtZXIlMjAlM0QlMjB0b3JjaC5jb21waWxlKCUwQSUyMCUyMCUyMCUyMHBpcGVsaW5lLnRyYW5zZm9ybWVyJTJDJTIwbW9kZSUzRCUyMm1heC1hdXRvdHVuZSUyMiUyQyUyMGZ1bGxncmFwaCUzRFRydWUlMEEpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyQSUyMGZsdWZmeSUyMHRlZGR5JTIwYmVhciUyMHNpdHMlMjBvbiUyMGElMjBiZWQlMjBvZiUyMHNvZnQlMjBwaWxsb3dzJTIwc3Vycm91bmRlZCUyMGJ5JTIwY2hpbGRyZW4ncyUyMHRveXMuJTIyJTBBdmlkZW8lMjAlM0QlMjBwaXBlbGluZShwcm9tcHQlM0Rwcm9tcHQlMkMlMjBudW1fZnJhbWVzJTNENjElMkMlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNEMzApLmZyYW1lcyU1QjAlNUQlMEFleHBvcnRfdG9fdmlkZW8odmlkZW8lMkMlMjAlMjJvdXRwdXQubXA0JTIyJTJDJTIwZnBzJTNEMTUp",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoModel, HunyuanVideoPipeline
<span class="hljs-keyword">from</span> diffusers.quantizers <span class="hljs-keyword">import</span> PipelineQuantizationConfig
<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video
<span class="hljs-comment"># quantize weights to int4 with bitsandbytes</span>
pipeline_quant_config = PipelineQuantizationConfig(
quant_backend=<span class="hljs-string">&quot;bitsandbytes_4bit&quot;</span>,
quant_kwargs={
<span class="hljs-string">&quot;load_in_4bit&quot;</span>: <span class="hljs-literal">True</span>,
<span class="hljs-string">&quot;bnb_4bit_quant_type&quot;</span>: <span class="hljs-string">&quot;nf4&quot;</span>,
<span class="hljs-string">&quot;bnb_4bit_compute_dtype&quot;</span>: torch.bfloat16
},
components_to_quantize=<span class="hljs-string">&quot;transformer&quot;</span>
)
pipeline = HunyuanVideoPipeline.from_pretrained(
<span class="hljs-string">&quot;hunyuanvideo-community/HunyuanVideo&quot;</span>,
quantization_config=pipeline_quant_config,
torch_dtype=torch.bfloat16,
)
<span class="hljs-comment"># model-offloading and tiling</span>
pipeline.enable_model_cpu_offload()
pipeline.vae.enable_tiling()
<span class="hljs-comment"># torch.compile</span>
pipeline.transformer.to(memory_format=torch.channels_last)
pipeline.transformer = torch.<span class="hljs-built_in">compile</span>(
pipeline.transformer, mode=<span class="hljs-string">&quot;max-autotune&quot;</span>, fullgraph=<span class="hljs-literal">True</span>
)
prompt = <span class="hljs-string">&quot;A fluffy teddy bear sits on a bed of soft pillows surrounded by children&#x27;s toys.&quot;</span>
video = pipeline(prompt=prompt, num_frames=<span class="hljs-number">61</span>, num_inference_steps=<span class="hljs-number">30</span>).frames[<span class="hljs-number">0</span>]
export_to_video(video, <span class="hljs-string">&quot;output.mp4&quot;</span>, fps=<span class="hljs-number">15</span>)`,wrap:!1}}),{c(){s=p("p"),s.innerHTML=v,m=l(),b(u.$$.fragment)},l(n){s=c(n,"P",{"data-svelte-h":!0}),_(s)!=="svelte-dcc01q"&&(s.innerHTML=v),m=i(n),y(u.$$.fragment,n)},m(n,f){d(n,s,f),d(n,m,f),M(u,n,f),o=!0},p:at,i(n){o||(w(u.$$.fragment,n),o=!0)},o(n){J(u.$$.fragment,n),o=!1},d(n){n&&(t(s),t(m)),T(u,n)}}}function xt(I){let s,v,m,u;return s=new Jt({props:{id:"usage",option:"memory",$$slots:{default:[kt]},$$scope:{ctx:I}}}),m=new Jt({props:{id:"usage",option:"inference speed",$$slots:{default:[Ht]},$$scope:{ctx:I}}}),{c(){b(s.$$.fragment),v=l(),b(m.$$.fragment)},l(o){y(s.$$.fragment,o),v=i(o),y(m.$$.fragment,o)},m(o,n){M(s,o,n),d(o,v,n),M(m,o,n),u=!0},p(o,n){const f={};n&2&&(f.$$scope={dirty:n,ctx:o}),s.$set(f);const Z={};n&2&&(Z.$$scope={dirty:n,ctx:o}),m.$set(Z)},i(o){u||(w(s.$$.fragment,o),w(m.$$.fragment,o),u=!0)},o(o){J(s.$$.fragment,o),J(m.$$.fragment,o),u=!1},d(o){o&&t(v),T(s,o),T(m,o)}}}function $t(I){let s,v="Examples:",m,u,o;return u=new Re({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwSHVueXVhblZpZGVvUGlwZWxpbmUlMkMlMjBIdW55dWFuVmlkZW9UcmFuc2Zvcm1lcjNETW9kZWwlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTBBJTBBbW9kZWxfaWQlMjAlM0QlMjAlMjJodW55dWFudmlkZW8tY29tbXVuaXR5JTJGSHVueXVhblZpZGVvJTIyJTBBdHJhbnNmb3JtZXIlMjAlM0QlMjBIdW55dWFuVmlkZW9UcmFuc2Zvcm1lcjNETW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTIwc3ViZm9sZGVyJTNEJTIydHJhbnNmb3JtZXIlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQXBpcGUlMjAlM0QlMjBIdW55dWFuVmlkZW9QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjB0cmFuc2Zvcm1lciUzRHRyYW5zZm9ybWVyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2KSUwQXBpcGUudmFlLmVuYWJsZV90aWxpbmcoKSUwQXBpcGUudG8oJTIyY3VkYSUyMiklMEElMEFvdXRwdXQlMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMHByb21wdCUzRCUyMkElMjBjYXQlMjB3YWxrcyUyMG9uJTIwdGhlJTIwZ3Jhc3MlMkMlMjByZWFsaXN0aWMlMjIlMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0QzMjAlMkMlMEElMjAlMjAlMjAlMjB3aWR0aCUzRDUxMiUyQyUwQSUyMCUyMCUyMCUyMG51bV9mcmFtZXMlM0Q2MSUyQyUwQSUyMCUyMCUyMCUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0QzMCUyQyUwQSkuZnJhbWVzJTVCMCU1RCUwQWV4cG9ydF90b192aWRlbyhvdXRwdXQlMkMlMjAlMjJvdXRwdXQubXA0JTIyJTJDJTIwZnBzJTNEMTUp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video
<span class="hljs-meta">&gt;&gt;&gt; </span>model_id = <span class="hljs-string">&quot;hunyuanvideo-community/HunyuanVideo&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>transformer = HunyuanVideoTransformer3DModel.from_pretrained(
<span class="hljs-meta">... </span> model_id, subfolder=<span class="hljs-string">&quot;transformer&quot;</span>, torch_dtype=torch.bfloat16
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>pipe = HunyuanVideoPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=torch.float16)
<span class="hljs-meta">&gt;&gt;&gt; </span>pipe.vae.enable_tiling()
<span class="hljs-meta">&gt;&gt;&gt; </span>pipe.to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>output = pipe(
<span class="hljs-meta">... </span> prompt=<span class="hljs-string">&quot;A cat walks on the grass, realistic&quot;</span>,
<span class="hljs-meta">... </span> height=<span class="hljs-number">320</span>,
<span class="hljs-meta">... </span> width=<span class="hljs-number">512</span>,
<span class="hljs-meta">... </span> num_frames=<span class="hljs-number">61</span>,
<span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">30</span>,
<span class="hljs-meta">... </span>).frames[<span class="hljs-number">0</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>export_to_video(output, <span class="hljs-string">&quot;output.mp4&quot;</span>, fps=<span class="hljs-number">15</span>)`,wrap:!1}}),{c(){s=p("p"),s.textContent=v,m=l(),b(u.$$.fragment)},l(n){s=c(n,"P",{"data-svelte-h":!0}),_(s)!=="svelte-kvfsh7"&&(s.textContent=v),m=i(n),y(u.$$.fragment,n)},m(n,f){d(n,s,f),d(n,m,f),M(u,n,f),o=!0},p:at,i(n){o||(w(u.$$.fragment,n),o=!0)},o(n){J(u.$$.fragment,n),o=!1},d(n){n&&(t(s),t(m)),T(u,n)}}}function Gt(I){let s,v,m,u,o,n='<div class="flex flex-wrap space-x-1"><a href="https://huggingface.co/docs/diffusers/main/en/tutorials/using_peft_for_inference" target="_blank" rel="noopener"><img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/></a></div>',f,Z,h,U,lt='<a href="https://huggingface.co/papers/2412.03603" rel="nofollow">HunyuanVideo</a> is a 13B parameter diffusion transformer model designed to be competitive with closed-source video foundation models and enable wider community access. This model uses a “dual-stream to single-stream” architecture to separately process the video and text tokens first, before concatenating and feeding them to the transformer to fuse the multimodal information. A pretrained multimodal large language model (MLLM) is used as the encoder because it has better image-text alignment, better image detail description and reasoning, and it can be used as a zero-shot learner if system instructions are added to user prompts. Finally, HunyuanVideo uses a 3D causal variational autoencoder to more efficiently process video data at the original resolution and frame rate.',Me,A,it='You can find all the original HunyuanVideo checkpoints under the <a href="https://huggingface.co/tencent" rel="nofollow">Tencent</a> organization.',we,$,rt='<p>Click on the HunyuanVideo models in the right sidebar for more examples of video generation tasks.</p> <p>The examples below use a checkpoint from <a href="https://huggingface.co/hunyuanvideo-community" rel="nofollow">hunyuanvideo-community</a> because the weights are stored in a layout compatible with Diffusers.</p>',Je,q,dt="The example below demonstrates how to generate a video optimized for memory or inference speed.",Te,G,ve,z,je,B,P,le,pt='HunyuanVideo supports LoRAs with <a href="/docs/diffusers/pr_12807/en/api/loaders/lora#diffusers.loaders.HunyuanVideoLoraLoaderMixin.load_lora_weights">load_lora_weights()</a>.',Qe,L,ie,ct="Show example code",Ae,F,qe,re,ut="<p>Refer to the table below for recommended inference values.</p> <table><thead><tr><th>parameter</th> <th>recommended value</th></tr></thead> <tbody><tr><td>text encoder dtype</td> <td><code>torch.float16</code></td></tr> <tr><td>transformer dtype</td> <td><code>torch.bfloat16</code></td></tr> <tr><td>vae dtype</td> <td><code>torch.float16</code></td></tr> <tr><td><code>num_frames (k)</code></td> <td>4 * <code>k</code> + 1</td></tr></tbody></table>",ze,de,mt="<p>Try lower <code>shift</code> values (<code>2.0</code> to <code>5.0</code>) for lower resolution videos and higher <code>shift</code> values (<code>7.0</code> to <code>12.0</code>) for higher resolution images.</p>",Ze,Y,Ue,g,N,Pe,pe,ft="Pipeline for text-to-video generation using HunyuanVideo.",Le,ce,ht=`This model inherits from <a href="/docs/diffusers/pr_12807/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods
implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,Fe,W,S,Ye,ue,gt="The call function to the pipeline for generation.",Ne,C,Se,X,D,De,me,_t=`Disable sliced VAE decoding. If <code>enable_vae_slicing</code> was previously enabled, this method will go back to
computing decoding in one step.`,Oe,E,O,Ke,fe,bt=`Disable tiled VAE decoding. If <code>enable_vae_tiling</code> was previously enabled, this method will go back to
computing decoding in one step.`,et,R,K,tt,he,yt=`Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.`,nt,Q,ee,st,ge,Mt=`Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
processing larger images.`,Ve,te,Ie,H,ne,ot,_e,wt="Output class for HunyuanVideo pipelines.",Be,se,We,ye,ke;return Z=new Ee({props:{title:"HunyuanVideo",local:"hunyuanvideo",headingTag:"h1"}}),G=new Wt({props:{id:"usage",options:["memory","inference speed"],$$slots:{default:[xt]},$$scope:{ctx:I}}}),z=new Ee({props:{title:"Notes",local:"notes",headingTag:"h2"}}),F=new Re({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwQXV0b01vZGVsJTJDJTIwSHVueXVhblZpZGVvUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnF1YW50aXplcnMlMjBpbXBvcnQlMjBQaXBlbGluZVF1YW50aXphdGlvbkNvbmZpZyUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBleHBvcnRfdG9fdmlkZW8lMEElMEElMjMlMjBxdWFudGl6ZSUyMHdlaWdodHMlMjB0byUyMGludDQlMjB3aXRoJTIwYml0c2FuZGJ5dGVzJTBBcGlwZWxpbmVfcXVhbnRfY29uZmlnJTIwJTNEJTIwUGlwZWxpbmVRdWFudGl6YXRpb25Db25maWcoJTBBJTIwJTIwJTIwJTIwcXVhbnRfYmFja2VuZCUzRCUyMmJpdHNhbmRieXRlc180Yml0JTIyJTJDJTBBJTIwJTIwJTIwJTIwcXVhbnRfa3dhcmdzJTNEJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIybG9hZF9pbl80Yml0JTIyJTNBJTIwVHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMmJuYl80Yml0X3F1YW50X3R5cGUlMjIlM0ElMjAlMjJuZjQlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjJibmJfNGJpdF9jb21wdXRlX2R0eXBlJTIyJTNBJTIwdG9yY2guYmZsb2F0MTYlMEElMjAlMjAlMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjBjb21wb25lbnRzX3RvX3F1YW50aXplJTNEJTIydHJhbnNmb3JtZXIlMjIlMEEpJTBBJTBBcGlwZWxpbmUlMjAlM0QlMjBIdW55dWFuVmlkZW9QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyaHVueXVhbnZpZGVvLWNvbW11bml0eSUyRkh1bnl1YW5WaWRlbyUyMiUyQyUwQSUyMCUyMCUyMCUyMHF1YW50aXphdGlvbl9jb25maWclM0RwaXBlbGluZV9xdWFudF9jb25maWclMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTJDJTBBKSUwQSUwQSUyMyUyMGxvYWQlMjBMb1JBJTIwd2VpZ2h0cyUwQXBpcGVsaW5lLmxvYWRfbG9yYV93ZWlnaHRzKCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZsdWNhdGFjbyUyRmh1bnl1YW4tc3RlYW1ib2F0LXdpbGxpZS0xMCUyMiUyQyUyMGFkYXB0ZXJfbmFtZSUzRCUyMnN0ZWFtYm9hdC13aWxsaWUlMjIpJTBBcGlwZWxpbmUuc2V0X2FkYXB0ZXJzKCUyMnN0ZWFtYm9hdC13aWxsaWUlMjIlMkMlMjAwLjkpJTBBJTBBJTIzJTIwbW9kZWwtb2ZmbG9hZGluZyUyMGFuZCUyMHRpbGluZyUwQXBpcGVsaW5lLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBcGlwZWxpbmUudmFlLmVuYWJsZV90aWxpbmcoKSUwQSUwQSUyMyUyMHVzZSUyMCUyMkluJTIwdGhlJTIwc3R5bGUlMjBvZiUyMFNXUiUyMiUyMHRvJTIwdHJpZ2dlciUyMHRoZSUyMExvUkElMEFwcm9tcHQlMjAlM0QlMjAlMjIlMjIlMjIlMEFJbiUyMHRoZSUyMHN0eWxlJTIwb2YlMjBTV1IuJTIwQSUyMGJsYWNrJTIwYW5kJTIwd2hpdGUlMjBhbmltYXRlZCUyMHNjZW5lJTIwZmVhdHVyaW5nJTIwYSUyMGZsdWZmeSUyMHRlZGR5JTIwYmVhciUyMHNpdHMlMjBvbiUyMGElMjBiZWQlMjBvZiUyMHNvZnQlMjBwaWxsb3dzJTIwc3Vycm91bmRlZCUyMGJ5JTIwY2hpbGRyZW4ncyUyMHRveXMuJTBBJTIyJTIyJTIyJTBBdmlkZW8lMjAlM0QlMjBwaXBlbGluZShwcm9tcHQlM0Rwcm9tcHQlMkMlMjBudW1fZnJhbWVzJTNENjElMkMlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNEMzApLmZyYW1lcyU1QjAlNUQlMEFleHBvcnRfdG9fdmlkZW8odmlkZW8lMkMlMjAlMjJvdXRwdXQubXA0JTIyJTJDJTIwZnBzJTNEMTUp",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoModel, HunyuanVideoPipeline
<span class="hljs-keyword">from</span> diffusers.quantizers <span class="hljs-keyword">import</span> PipelineQuantizationConfig
<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video
<span class="hljs-comment"># quantize weights to int4 with bitsandbytes</span>
pipeline_quant_config = PipelineQuantizationConfig(
quant_backend=<span class="hljs-string">&quot;bitsandbytes_4bit&quot;</span>,
quant_kwargs={
<span class="hljs-string">&quot;load_in_4bit&quot;</span>: <span class="hljs-literal">True</span>,
<span class="hljs-string">&quot;bnb_4bit_quant_type&quot;</span>: <span class="hljs-string">&quot;nf4&quot;</span>,
<span class="hljs-string">&quot;bnb_4bit_compute_dtype&quot;</span>: torch.bfloat16
},
components_to_quantize=<span class="hljs-string">&quot;transformer&quot;</span>
)
pipeline = HunyuanVideoPipeline.from_pretrained(
<span class="hljs-string">&quot;hunyuanvideo-community/HunyuanVideo&quot;</span>,
quantization_config=pipeline_quant_config,
torch_dtype=torch.bfloat16,
)
<span class="hljs-comment"># load LoRA weights</span>
pipeline.load_lora_weights(<span class="hljs-string">&quot;https://huggingface.co/lucataco/hunyuan-steamboat-willie-10&quot;</span>, adapter_name=<span class="hljs-string">&quot;steamboat-willie&quot;</span>)
pipeline.set_adapters(<span class="hljs-string">&quot;steamboat-willie&quot;</span>, <span class="hljs-number">0.9</span>)
<span class="hljs-comment"># model-offloading and tiling</span>
pipeline.enable_model_cpu_offload()
pipeline.vae.enable_tiling()
<span class="hljs-comment"># use &quot;In the style of SWR&quot; to trigger the LoRA</span>
prompt = <span class="hljs-string">&quot;&quot;&quot;
In the style of SWR. A black and white animated scene featuring a fluffy teddy bear sits on a bed of soft pillows surrounded by children&#x27;s toys.
&quot;&quot;&quot;</span>
video = pipeline(prompt=prompt, num_frames=<span class="hljs-number">61</span>, num_inference_steps=<span class="hljs-number">30</span>).frames[<span class="hljs-number">0</span>]
export_to_video(video, <span class="hljs-string">&quot;output.mp4&quot;</span>, fps=<span class="hljs-number">15</span>)`,wrap:!1}}),Y=new Ee({props:{title:"HunyuanVideoPipeline",local:"diffusers.HunyuanVideoPipeline",headingTag:"h2"}}),N=new ae({props:{name:"class diffusers.HunyuanVideoPipeline",anchor:"diffusers.HunyuanVideoPipeline",parameters:[{name:"text_encoder",val:": LlamaModel"},{name:"tokenizer",val:": LlamaTokenizerFast"},{name:"transformer",val:": HunyuanVideoTransformer3DModel"},{name:"vae",val:": AutoencoderKLHunyuanVideo"},{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"text_encoder_2",val:": CLIPTextModel"},{name:"tokenizer_2",val:": CLIPTokenizer"}],parametersDescription:[{anchor:"diffusers.HunyuanVideoPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>LlamaModel</code>) &#x2014;
<a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers" rel="nofollow">Llava Llama3-8B</a>.`,name:"text_encoder"},{anchor:"diffusers.HunyuanVideoPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>LlamaTokenizer</code>) &#x2014;
Tokenizer from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers" rel="nofollow">Llava Llama3-8B</a>.`,name:"tokenizer"},{anchor:"diffusers.HunyuanVideoPipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/pr_12807/en/api/models/hunyuan_video_transformer_3d#diffusers.HunyuanVideoTransformer3DModel">HunyuanVideoTransformer3DModel</a>) &#x2014;
Conditional Transformer to denoise the encoded image latents.`,name:"transformer"},{anchor:"diffusers.HunyuanVideoPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_12807/en/api/schedulers/flow_match_euler_discrete#diffusers.FlowMatchEulerDiscreteScheduler">FlowMatchEulerDiscreteScheduler</a>) &#x2014;
A scheduler to be used in combination with <code>transformer</code> to denoise the encoded image latents.`,name:"scheduler"},{anchor:"diffusers.HunyuanVideoPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_12807/en/api/models/autoencoder_kl_hunyuan_video#diffusers.AutoencoderKLHunyuanVideo">AutoencoderKLHunyuanVideo</a>) &#x2014;
Variational Auto-Encoder (VAE) Model to encode and decode videos to and from latent representations.`,name:"vae"},{anchor:"diffusers.HunyuanVideoPipeline.text_encoder_2",description:`<strong>text_encoder_2</strong> (<code>CLIPTextModel</code>) &#x2014;
<a href="https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel" rel="nofollow">CLIP</a>, specifically
the <a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a> variant.`,name:"text_encoder_2"},{anchor:"diffusers.HunyuanVideoPipeline.tokenizer_2",description:`<strong>tokenizer_2</strong> (<code>CLIPTokenizer</code>) &#x2014;
Tokenizer of class
<a href="https://huggingface.co/docs/transformers/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a>.`,name:"tokenizer_2"}],source:"https://github.com/huggingface/diffusers/blob/vr_12807/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py#L144"}}),S=new ae({props:{name:"__call__",anchor:"diffusers.HunyuanVideoPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"prompt_2",val:": typing.Union[str, typing.List[str]] = None"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"negative_prompt_2",val:": typing.Union[str, typing.List[str]] = None"},{name:"height",val:": int = 720"},{name:"width",val:": int = 1280"},{name:"num_frames",val:": int = 129"},{name:"num_inference_steps",val:": int = 50"},{name:"sigmas",val:": typing.List[float] = None"},{name:"true_cfg_scale",val:": float = 1.0"},{name:"guidance_scale",val:": float = 6.0"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"callback_on_step_end",val:": typing.Union[typing.Callable[[int, int, typing.Dict], NoneType], diffusers.callbacks.PipelineCallback, diffusers.callbacks.MultiPipelineCallbacks, NoneType] = None"},{name:"callback_on_step_end_tensor_inputs",val:": typing.List[str] = ['latents']"},{name:"prompt_template",val:": typing.Dict[str, typing.Any] = {'template': '<|start_header_id|>system<|end_header_id|>\\n\\nDescribe the video by detailing the following aspects: 1. The main content and theme of the video.2. The color, shape, size, texture, quantity, text, and spatial relationships of the objects.3. Actions, events, behaviors temporal relationships, physical movement changes of the objects.4. background environment, light, style and atmosphere.5. camera angles, movements, and transitions used in the video:<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\n{}<|eot_id|>', 'crop_start': 95}"},{name:"max_sequence_length",val:": int = 256"}],parametersDescription:[{anchor:"diffusers.HunyuanVideoPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) &#x2014;
The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>.
instead.`,name:"prompt"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.prompt_2",description:`<strong>prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) &#x2014;
The prompt or prompts to be sent to <code>tokenizer_2</code> and <code>text_encoder_2</code>. If not defined, <code>prompt</code> is
will be used instead.`,name:"prompt_2"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) &#x2014;
The prompt or prompts not to guide the image generation. If not defined, one has to pass
<code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>true_cfg_scale</code> is
not greater than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.negative_prompt_2",description:`<strong>negative_prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) &#x2014;
The prompt or prompts not to guide the image generation to be sent to <code>tokenizer_2</code> and
<code>text_encoder_2</code>. If not defined, <code>negative_prompt</code> is used in all the text-encoders.`,name:"negative_prompt_2"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, defaults to <code>720</code>) &#x2014;
The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, defaults to <code>1280</code>) &#x2014;
The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, defaults to <code>129</code>) &#x2014;
The number of frames in the generated video.`,name:"num_frames"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, defaults to <code>50</code>) &#x2014;
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.sigmas",description:`<strong>sigmas</strong> (<code>List[float]</code>, <em>optional</em>) &#x2014;
Custom sigmas to use for the denoising process with schedulers which support a <code>sigmas</code> argument in
their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is passed
will be used.`,name:"sigmas"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.true_cfg_scale",description:`<strong>true_cfg_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 1.0) &#x2014;
True classifier-free guidance (guidance scale) is enabled when <code>true_cfg_scale</code> &gt; 1 and
<code>negative_prompt</code> is provided.`,name:"true_cfg_scale"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, defaults to <code>6.0</code>) &#x2014;
Embedded guiddance scale is enabled by setting <code>guidance_scale</code> &gt; 1. Higher <code>guidance_scale</code> encourages
a model to generate images more aligned with <code>prompt</code> at the expense of lower image quality.</p>
<p>Guidance-distilled models approximates true classifer-free guidance for <code>guidance_scale</code> &gt; 1. Refer to
the <a href="https://huggingface.co/papers/2210.03142" rel="nofollow">paper</a> to learn more.`,name:"guidance_scale"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) &#x2014;
The number of images to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) &#x2014;
A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make
generation deterministic.`,name:"generator"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014;
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014;
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
provided, text embeddings are generated from the <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.pooled_prompt_embeds",description:`<strong>pooled_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) &#x2014;
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting.
If not provided, pooled text embeddings will be generated from <code>prompt</code> input argument.`,name:"pooled_prompt_embeds"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) &#x2014;
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt
weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input
argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.negative_pooled_prompt_embeds",description:`<strong>negative_pooled_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) &#x2014;
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt
weighting. If not provided, pooled negative_prompt_embeds will be generated from <code>negative_prompt</code>
input argument.`,name:"negative_pooled_prompt_embeds"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;pil&quot;</code>) &#x2014;
The output format of the generated image. Choose between <code>PIL.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether or not to return a <code>HunyuanVideoPipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) &#x2014;
A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under
<code>self.processor</code> in
<a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) &#x2014;
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <code>PipelineCallback</code>, <code>MultiPipelineCallbacks</code>, <em>optional</em>) &#x2014;
A function or a subclass of <code>PipelineCallback</code> or <code>MultiPipelineCallbacks</code> that is called at the end of
each denoising step during the inference. with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a
list of all tensors as specified by <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.HunyuanVideoPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) &#x2014;
The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list
will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the
<code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"}],source:"https://github.com/huggingface/diffusers/blob/vr_12807/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py#L491",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>If <code>return_dict</code> is <code>True</code>, <code>HunyuanVideoPipelineOutput</code> is returned, otherwise a <code>tuple</code> is returned
where the first element is a list with the generated images and the second element is a list of <code>bool</code>s
indicating whether the corresponding generated image contains “not-safe-for-work” (nsfw) content.</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>~HunyuanVideoPipelineOutput</code> or <code>tuple</code></p>
`}}),C=new It({props:{anchor:"diffusers.HunyuanVideoPipeline.__call__.example",$$slots:{default:[$t]},$$scope:{ctx:I}}}),D=new ae({props:{name:"disable_vae_slicing",anchor:"diffusers.HunyuanVideoPipeline.disable_vae_slicing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_12807/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py#L431"}}),O=new ae({props:{name:"disable_vae_tiling",anchor:"diffusers.HunyuanVideoPipeline.disable_vae_tiling",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_12807/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py#L458"}}),K=new ae({props:{name:"enable_vae_slicing",anchor:"diffusers.HunyuanVideoPipeline.enable_vae_slicing",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_12807/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py#L418"}}),ee=new ae({props:{name:"enable_vae_tiling",anchor:"diffusers.HunyuanVideoPipeline.enable_vae_tiling",parameters:[],source:"https://github.com/huggingface/diffusers/blob/vr_12807/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py#L444"}}),te=new Ee({props:{title:"HunyuanVideoPipelineOutput",local:"diffusers.pipelines.hunyuan_video.pipeline_output.HunyuanVideoPipelineOutput",headingTag:"h2"}}),ne=new ae({props:{name:"class diffusers.pipelines.hunyuan_video.pipeline_output.HunyuanVideoPipelineOutput",anchor:"diffusers.pipelines.hunyuan_video.pipeline_output.HunyuanVideoPipelineOutput",parameters:[{name:"frames",val:": Tensor"}],parametersDescription:[{anchor:"diffusers.pipelines.hunyuan_video.pipeline_output.HunyuanVideoPipelineOutput.frames",description:`<strong>frames</strong> (<code>torch.Tensor</code>, <code>np.ndarray</code>, or List[List[PIL.Image.Image]]) &#x2014;
List of video outputs - It can be a nested list of length <code>batch_size,</code> with each sub-list containing
denoised PIL image sequences of length <code>num_frames.</code> It can also be a NumPy array or Torch tensor of shape
<code>(batch_size, num_frames, channels, height, width)</code>.`,name:"frames"}],source:"https://github.com/huggingface/diffusers/blob/vr_12807/src/diffusers/pipelines/hunyuan_video/pipeline_output.py#L12"}}),se=new Bt({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/hunyuan_video.md"}}),{c(){s=p("meta"),v=l(),m=p("p"),u=l(),o=p("div"),o.innerHTML=n,f=l(),b(Z.$$.fragment),h=l(),U=p("p"),U.innerHTML=lt,Me=l(),A=p("p"),A.innerHTML=it,we=l(),$=p("blockquote"),$.innerHTML=rt,Je=l(),q=p("p"),q.textContent=dt,Te=l(),b(G.$$.fragment),ve=l(),b(z.$$.fragment),je=l(),B=p("ul"),P=p("li"),le=p("p"),le.innerHTML=pt,Qe=l(),L=p("details"),ie=p("summary"),ie.textContent=ct,Ae=l(),b(F.$$.fragment),qe=l(),re=p("li"),re.innerHTML=ut,ze=l(),de=p("li"),de.innerHTML=mt,Ze=l(),b(Y.$$.fragment),Ue=l(),g=p("div"),b(N.$$.fragment),Pe=l(),pe=p("p"),pe.textContent=ft,Le=l(),ce=p("p"),ce.innerHTML=ht,Fe=l(),W=p("div"),b(S.$$.fragment),Ye=l(),ue=p("p"),ue.textContent=gt,Ne=l(),b(C.$$.fragment),Se=l(),X=p("div"),b(D.$$.fragment),De=l(),me=p("p"),me.innerHTML=_t,Oe=l(),E=p("div"),b(O.$$.fragment),Ke=l(),fe=p("p"),fe.innerHTML=bt,et=l(),R=p("div"),b(K.$$.fragment),tt=l(),he=p("p"),he.textContent=yt,nt=l(),Q=p("div"),b(ee.$$.fragment),st=l(),ge=p("p"),ge.textContent=Mt,Ve=l(),b(te.$$.fragment),Ie=l(),H=p("div"),b(ne.$$.fragment),ot=l(),_e=p("p"),_e.textContent=wt,Be=l(),b(se.$$.fragment),We=l(),ye=p("p"),this.h()},l(e){const a=Ut("svelte-u9bgzb",document.head);s=c(a,"META",{name:!0,content:!0}),a.forEach(t),v=i(e),m=c(e,"P",{}),V(m).forEach(t),u=i(e),o=c(e,"DIV",{style:!0,"data-svelte-h":!0}),_(o)!=="svelte-p206qu"&&(o.innerHTML=n),f=i(e),y(Z.$$.fragment,e),h=i(e),U=c(e,"P",{"data-svelte-h":!0}),_(U)!=="svelte-1hd212d"&&(U.innerHTML=lt),Me=i(e),A=c(e,"P",{"data-svelte-h":!0}),_(A)!=="svelte-vyw78o"&&(A.innerHTML=it),we=i(e),$=c(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),_($)!=="svelte-91v6e"&&($.innerHTML=rt),Je=i(e),q=c(e,"P",{"data-svelte-h":!0}),_(q)!=="svelte-pzhop3"&&(q.textContent=dt),Te=i(e),y(G.$$.fragment,e),ve=i(e),y(z.$$.fragment,e),je=i(e),B=c(e,"UL",{});var x=V(B);P=c(x,"LI",{});var oe=V(P);le=c(oe,"P",{"data-svelte-h":!0}),_(le)!=="svelte-18j307y"&&(le.innerHTML=pt),Qe=i(oe),L=c(oe,"DETAILS",{});var He=V(L);ie=c(He,"SUMMARY",{"data-svelte-h":!0}),_(ie)!=="svelte-1m0l1gk"&&(ie.textContent=ct),Ae=i(He),y(F.$$.fragment,He),He.forEach(t),oe.forEach(t),qe=i(x),re=c(x,"LI",{"data-svelte-h":!0}),_(re)!=="svelte-bm2kpe"&&(re.innerHTML=ut),ze=i(x),de=c(x,"LI",{"data-svelte-h":!0}),_(de)!=="svelte-ck8fz4"&&(de.innerHTML=mt),x.forEach(t),Ze=i(e),y(Y.$$.fragment,e),Ue=i(e),g=c(e,"DIV",{class:!0});var j=V(g);y(N.$$.fragment,j),Pe=i(j),pe=c(j,"P",{"data-svelte-h":!0}),_(pe)!=="svelte-1a4x0ff"&&(pe.textContent=ft),Le=i(j),ce=c(j,"P",{"data-svelte-h":!0}),_(ce)!=="svelte-1ejnuzg"&&(ce.innerHTML=ht),Fe=i(j),W=c(j,"DIV",{class:!0});var be=V(W);y(S.$$.fragment,be),Ye=i(be),ue=c(be,"P",{"data-svelte-h":!0}),_(ue)!=="svelte-50j04k"&&(ue.textContent=gt),Ne=i(be),y(C.$$.fragment,be),be.forEach(t),Se=i(j),X=c(j,"DIV",{class:!0});var xe=V(X);y(D.$$.fragment,xe),De=i(xe),me=c(xe,"P",{"data-svelte-h":!0}),_(me)!=="svelte-1s3c06i"&&(me.innerHTML=_t),xe.forEach(t),Oe=i(j),E=c(j,"DIV",{class:!0});var $e=V(E);y(O.$$.fragment,$e),Ke=i($e),fe=c($e,"P",{"data-svelte-h":!0}),_(fe)!=="svelte-pkn4ui"&&(fe.innerHTML=bt),$e.forEach(t),et=i(j),R=c(j,"DIV",{class:!0});var Ge=V(R);y(K.$$.fragment,Ge),tt=i(Ge),he=c(Ge,"P",{"data-svelte-h":!0}),_(he)!=="svelte-14bnrb6"&&(he.textContent=yt),Ge.forEach(t),nt=i(j),Q=c(j,"DIV",{class:!0});var Ce=V(Q);y(ee.$$.fragment,Ce),st=i(Ce),ge=c(Ce,"P",{"data-svelte-h":!0}),_(ge)!=="svelte-1xwrf7t"&&(ge.textContent=Mt),Ce.forEach(t),j.forEach(t),Ve=i(e),y(te.$$.fragment,e),Ie=i(e),H=c(e,"DIV",{class:!0});var Xe=V(H);y(ne.$$.fragment,Xe),ot=i(Xe),_e=c(Xe,"P",{"data-svelte-h":!0}),_(_e)!=="svelte-i6xdzu"&&(_e.textContent=wt),Xe.forEach(t),Be=i(e),y(se.$$.fragment,e),We=i(e),ye=c(e,"P",{}),V(ye).forEach(t),this.h()},h(){k(s,"name","hf:doc:metadata"),k(s,"content",Ct),Vt(o,"float","right"),k($,"class","tip"),k(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(X,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(E,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(Q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(g,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(H,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,a){r(document.head,s),d(e,v,a),d(e,m,a),d(e,u,a),d(e,o,a),d(e,f,a),M(Z,e,a),d(e,h,a),d(e,U,a),d(e,Me,a),d(e,A,a),d(e,we,a),d(e,$,a),d(e,Je,a),d(e,q,a),d(e,Te,a),M(G,e,a),d(e,ve,a),M(z,e,a),d(e,je,a),d(e,B,a),r(B,P),r(P,le),r(P,Qe),r(P,L),r(L,ie),r(L,Ae),M(F,L,null),r(B,qe),r(B,re),r(B,ze),r(B,de),d(e,Ze,a),M(Y,e,a),d(e,Ue,a),d(e,g,a),M(N,g,null),r(g,Pe),r(g,pe),r(g,Le),r(g,ce),r(g,Fe),r(g,W),M(S,W,null),r(W,Ye),r(W,ue),r(W,Ne),M(C,W,null),r(g,Se),r(g,X),M(D,X,null),r(X,De),r(X,me),r(g,Oe),r(g,E),M(O,E,null),r(E,Ke),r(E,fe),r(g,et),r(g,R),M(K,R,null),r(R,tt),r(R,he),r(g,nt),r(g,Q),M(ee,Q,null),r(Q,st),r(Q,ge),d(e,Ve,a),M(te,e,a),d(e,Ie,a),d(e,H,a),M(ne,H,null),r(H,ot),r(H,_e),d(e,Be,a),M(se,e,a),d(e,We,a),d(e,ye,a),ke=!0},p(e,[a]){const x={};a&2&&(x.$$scope={dirty:a,ctx:e}),G.$set(x);const oe={};a&2&&(oe.$$scope={dirty:a,ctx:e}),C.$set(oe)},i(e){ke||(w(Z.$$.fragment,e),w(G.$$.fragment,e),w(z.$$.fragment,e),w(F.$$.fragment,e),w(Y.$$.fragment,e),w(N.$$.fragment,e),w(S.$$.fragment,e),w(C.$$.fragment,e),w(D.$$.fragment,e),w(O.$$.fragment,e),w(K.$$.fragment,e),w(ee.$$.fragment,e),w(te.$$.fragment,e),w(ne.$$.fragment,e),w(se.$$.fragment,e),ke=!0)},o(e){J(Z.$$.fragment,e),J(G.$$.fragment,e),J(z.$$.fragment,e),J(F.$$.fragment,e),J(Y.$$.fragment,e),J(N.$$.fragment,e),J(S.$$.fragment,e),J(C.$$.fragment,e),J(D.$$.fragment,e),J(O.$$.fragment,e),J(K.$$.fragment,e),J(ee.$$.fragment,e),J(te.$$.fragment,e),J(ne.$$.fragment,e),J(se.$$.fragment,e),ke=!1},d(e){e&&(t(v),t(m),t(u),t(o),t(f),t(h),t(U),t(Me),t(A),t(we),t($),t(Je),t(q),t(Te),t(ve),t(je),t(B),t(Ze),t(Ue),t(g),t(Ve),t(Ie),t(H),t(Be),t(We),t(ye)),t(s),T(Z,e),T(G,e),T(z,e),T(F),T(Y,e),T(N),T(S),T(C),T(D),T(O),T(K),T(ee),T(te,e),T(ne),T(se,e)}}}const Ct='{"title":"HunyuanVideo","local":"hunyuanvideo","sections":[{"title":"Notes","local":"notes","sections":[],"depth":2},{"title":"HunyuanVideoPipeline","local":"diffusers.HunyuanVideoPipeline","sections":[],"depth":2},{"title":"HunyuanVideoPipelineOutput","local":"diffusers.pipelines.hunyuan_video.pipeline_output.HunyuanVideoPipelineOutput","sections":[],"depth":2}],"depth":1}';function Xt(I){return vt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Lt extends jt{constructor(s){super(),Zt(this,s,Xt,Gt,Tt,{})}}export{Lt as component};

Xet Storage Details

Size:
47.8 kB
·
Xet hash:
836a35b0b1b35c442db21866d98acc479c1d29dadd821edd56f5d70250547af8

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.