Buckets:
| import{s as Ma,o as _a,n as Cn}from"../chunks/scheduler.53228c21.js";import{S as Ta,i as ya,e as l,s as a,c,h as Ja,a as i,d as t,b as o,f as U,g as m,j as p,k as b,l as n,m as d,n as u,t as h,o as g,p as f}from"../chunks/index.cac5d66a.js";import{D as v}from"../chunks/Docstring.468b4f14.js";import{C as Y}from"../chunks/CodeBlock.606cbaf4.js";import{E as Bn}from"../chunks/ExampleCodeBlock.9de28705.js";import{H as x,E as wa}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.bb40ef82.js";function ba(V){let M,I="Examples:",y,_,T;return _=new Y({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYMlBpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGVuY29kZV92aWRlbyUwQSUwQXBpcGUlMjAlM0QlMjBMVFgyUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUyMkxpZ2h0cmlja3MlMkZMVFgtMiUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYpJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkElMjB3b21hbiUyMHdpdGglMjBsb25nJTIwYnJvd24lMjBoYWlyJTIwYW5kJTIwbGlnaHQlMjBza2luJTIwc21pbGVzJTIwYXQlMjBhbm90aGVyJTIwd29tYW4lMjB3aXRoJTIwbG9uZyUyMGJsb25kZSUyMGhhaXIuJTIwVGhlJTIwd29tYW4lMjB3aXRoJTIwYnJvd24lMjBoYWlyJTIwd2VhcnMlMjBhJTIwYmxhY2slMjBqYWNrZXQlMjBhbmQlMjBoYXMlMjBhJTIwc21hbGwlMkMlMjBiYXJlbHklMjBub3RpY2VhYmxlJTIwbW9sZSUyMG9uJTIwaGVyJTIwcmlnaHQlMjBjaGVlay4lMjBUaGUlMjBjYW1lcmElMjBhbmdsZSUyMGlzJTIwYSUyMGNsb3NlLXVwJTJDJTIwZm9jdXNlZCUyMG9uJTIwdGhlJTIwd29tYW4lMjB3aXRoJTIwYnJvd24lMjBoYWlyJ3MlMjBmYWNlLiUyMFRoZSUyMGxpZ2h0aW5nJTIwaXMlMjB3YXJtJTIwYW5kJTIwbmF0dXJhbCUyQyUyMGxpa2VseSUyMGZyb20lMjB0aGUlMjBzZXR0aW5nJTIwc3VuJTJDJTIwY2FzdGluZyUyMGElMjBzb2Z0JTIwZ2xvdyUyMG9uJTIwdGhlJTIwc2NlbmUuJTIwVGhlJTIwc2NlbmUlMjBhcHBlYXJzJTIwdG8lMjBiZSUyMHJlYWwtbGlmZSUyMGZvb3RhZ2UlMjIlMEFuZWdhdGl2ZV9wcm9tcHQlMjAlM0QlMjAlMjJ3b3JzdCUyMHF1YWxpdHklMkMlMjBpbmNvbnNpc3RlbnQlMjBtb3Rpb24lMkMlMjBibHVycnklMkMlMjBqaXR0ZXJ5JTJDJTIwZGlzdG9ydGVkJTIyJTBBJTBBZnJhbWVfcmF0ZSUyMCUzRCUyMDI0LjAlMEF2aWRlbyUyQyUyMGF1ZGlvJTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBuZWdhdGl2ZV9wcm9tcHQlM0RuZWdhdGl2ZV9wcm9tcHQlMkMlMEElMjAlMjAlMjAlMjB3aWR0aCUzRDc2OCUyQyUwQSUyMCUyMCUyMCUyMGhlaWdodCUzRDUxMiUyQyUwQSUyMCUyMCUyMCUyMG51bV9mcmFtZXMlM0QxMjElMkMlMEElMjAlMjAlMjAlMjBmcmFtZV9yYXRlJTNEZnJhbWVfcmF0ZSUyQyUwQSUyMCUyMCUyMCUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0Q0MCUyQyUwQSUyMCUyMCUyMCUyMGd1aWRhbmNlX3NjYWxlJTNENC4wJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3R5cGUlM0QlMjJucCUyMiUyQyUwQSUyMCUyMCUyMCUyMHJldHVybl9kaWN0JTNERmFsc2UlMkMlMEEpJTBBJTBBZW5jb2RlX3ZpZGVvKCUwQSUyMCUyMCUyMCUyMHZpZGVvJTVCMCU1RCUyQyUwQSUyMCUyMCUyMCUyMGZwcyUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBhdWRpbyUzRGF1ZGlvJTVCMCU1RC5mbG9hdCgpLmNwdSgpJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fc2FtcGxlX3JhdGUlM0RwaXBlLnZvY29kZXIuY29uZmlnLm91dHB1dF9zYW1wbGluZ19yYXRlJTJDJTIwJTIwJTIzJTIwc2hvdWxkJTIwYmUlMjAyNDAwMCUwQSUyMCUyMCUyMCUyMG91dHB1dF9wYXRoJTNEJTIydmlkZW8ubXA0JTIyJTJDJTBBKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2Pipeline | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> encode_video | |
| <span class="hljs-meta">>>> </span>pipe = LTX2Pipeline.from_pretrained(<span class="hljs-string">"Lightricks/LTX-2"</span>, torch_dtype=torch.bfloat16) | |
| <span class="hljs-meta">>>> </span>pipe.enable_model_cpu_offload() | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage"</span> | |
| <span class="hljs-meta">>>> </span>negative_prompt = <span class="hljs-string">"worst quality, inconsistent motion, blurry, jittery, distorted"</span> | |
| <span class="hljs-meta">>>> </span>frame_rate = <span class="hljs-number">24.0</span> | |
| <span class="hljs-meta">>>> </span>video, audio = pipe( | |
| <span class="hljs-meta">... </span> prompt=prompt, | |
| <span class="hljs-meta">... </span> negative_prompt=negative_prompt, | |
| <span class="hljs-meta">... </span> width=<span class="hljs-number">768</span>, | |
| <span class="hljs-meta">... </span> height=<span class="hljs-number">512</span>, | |
| <span class="hljs-meta">... </span> num_frames=<span class="hljs-number">121</span>, | |
| <span class="hljs-meta">... </span> frame_rate=frame_rate, | |
| <span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">40</span>, | |
| <span class="hljs-meta">... </span> guidance_scale=<span class="hljs-number">4.0</span>, | |
| <span class="hljs-meta">... </span> output_type=<span class="hljs-string">"np"</span>, | |
| <span class="hljs-meta">... </span> return_dict=<span class="hljs-literal">False</span>, | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>encode_video( | |
| <span class="hljs-meta">... </span> video[<span class="hljs-number">0</span>], | |
| <span class="hljs-meta">... </span> fps=frame_rate, | |
| <span class="hljs-meta">... </span> audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(), | |
| <span class="hljs-meta">... </span> audio_sample_rate=pipe.vocoder.config.output_sampling_rate, <span class="hljs-comment"># should be 24000</span> | |
| <span class="hljs-meta">... </span> output_path=<span class="hljs-string">"video.mp4"</span>, | |
| <span class="hljs-meta">... </span>)`,lang:"py",wrap:!1}}),{c(){M=l("p"),M.textContent=I,y=a(),c(_.$$.fragment)},l(r){M=i(r,"P",{"data-svelte-h":!0}),p(M)!=="svelte-kvfsh7"&&(M.textContent=I),y=o(r),m(_.$$.fragment,r)},m(r,w){d(r,M,w),d(r,y,w),u(_,r,w),T=!0},p:Cn,i(r){T||(h(_.$$.fragment,r),T=!0)},o(r){g(_.$$.fragment,r),T=!1},d(r){r&&(t(M),t(y)),f(_,r)}}}function Ua(V){let M,I="Examples:",y,_,T;return _=new Y({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYMkltYWdlVG9WaWRlb1BpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGVuY29kZV92aWRlbyUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBsb2FkX2ltYWdlJTBBJTBBcGlwZSUyMCUzRCUyMExUWDJJbWFnZVRvVmlkZW9QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTIyTGlnaHRyaWNrcyUyRkxUWC0yJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBJTBBaW1hZ2UlMjAlM0QlMjBsb2FkX2ltYWdlKCUwQSUyMCUyMCUyMCUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZkYXRhc2V0cyUyRmEtci1yLW8tdyUyRnRpbnktbWVtZS1kYXRhc2V0LWNhcHRpb25lZCUyRnJlc29sdmUlMkZtYWluJTJGaW1hZ2VzJTJGOC5wbmclMjIlMEEpJTBBcHJvbXB0JTIwJTNEJTIwJTIyQSUyMHlvdW5nJTIwZ2lybCUyMHN0YW5kcyUyMGNhbG1seSUyMGluJTIwdGhlJTIwZm9yZWdyb3VuZCUyQyUyMGxvb2tpbmclMjBkaXJlY3RseSUyMGF0JTIwdGhlJTIwY2FtZXJhJTJDJTIwYXMlMjBhJTIwaG91c2UlMjBmaXJlJTIwcmFnZXMlMjBpbiUyMHRoZSUyMGJhY2tncm91bmQuJTIyJTBBbmVnYXRpdmVfcHJvbXB0JTIwJTNEJTIwJTIyd29yc3QlMjBxdWFsaXR5JTJDJTIwaW5jb25zaXN0ZW50JTIwbW90aW9uJTJDJTIwYmx1cnJ5JTJDJTIwaml0dGVyeSUyQyUyMGRpc3RvcnRlZCUyMiUwQSUwQWZyYW1lX3JhdGUlMjAlM0QlMjAyNC4wJTBBdmlkZW8lMkMlMjBhdWRpbyUyMCUzRCUyMHBpcGUoJTBBJTIwJTIwJTIwJTIwaW1hZ2UlM0RpbWFnZSUyQyUwQSUyMCUyMCUyMCUyMHByb21wdCUzRHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMG5lZ2F0aXZlX3Byb21wdCUzRG5lZ2F0aXZlX3Byb21wdCUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNENzY4JTJDJTBBJTIwJTIwJTIwJTIwaGVpZ2h0JTNENTEyJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2ZyYW1lcyUzRDEyMSUyQyUwQSUyMCUyMCUyMCUyMGZyYW1lX3JhdGUlM0RmcmFtZV9yYXRlJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDQwJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0Q0LjAlMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfdHlwZSUzRCUyMm5wJTIyJTJDJTBBJTIwJTIwJTIwJTIwcmV0dXJuX2RpY3QlM0RGYWxzZSUyQyUwQSklMEElMEFlbmNvZGVfdmlkZW8oJTBBJTIwJTIwJTIwJTIwdmlkZW8lNUIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwZnBzJTNEZnJhbWVfcmF0ZSUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvJTNEYXVkaW8lNUIwJTVELmZsb2F0KCkuY3B1KCklMkMlMEElMjAlMjAlMjAlMjBhdWRpb19zYW1wbGVfcmF0ZSUzRHBpcGUudm9jb2Rlci5jb25maWcub3V0cHV0X3NhbXBsaW5nX3JhdGUlMkMlMjAlMjAlMjMlMjBzaG91bGQlMjBiZSUyMDI0MDAwJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3BhdGglM0QlMjJ2aWRlby5tcDQlMjIlMkMlMEEp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2ImageToVideoPipeline | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> encode_video | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image | |
| <span class="hljs-meta">>>> </span>pipe = LTX2ImageToVideoPipeline.from_pretrained(<span class="hljs-string">"Lightricks/LTX-2"</span>, torch_dtype=torch.bfloat16) | |
| <span class="hljs-meta">>>> </span>pipe.enable_model_cpu_offload() | |
| <span class="hljs-meta">>>> </span>image = load_image( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"https://huggingface.co/datasets/a-r-r-o-w/tiny-meme-dataset-captioned/resolve/main/images/8.png"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"A young girl stands calmly in the foreground, looking directly at the camera, as a house fire rages in the background."</span> | |
| <span class="hljs-meta">>>> </span>negative_prompt = <span class="hljs-string">"worst quality, inconsistent motion, blurry, jittery, distorted"</span> | |
| <span class="hljs-meta">>>> </span>frame_rate = <span class="hljs-number">24.0</span> | |
| <span class="hljs-meta">>>> </span>video, audio = pipe( | |
| <span class="hljs-meta">... </span> image=image, | |
| <span class="hljs-meta">... </span> prompt=prompt, | |
| <span class="hljs-meta">... </span> negative_prompt=negative_prompt, | |
| <span class="hljs-meta">... </span> width=<span class="hljs-number">768</span>, | |
| <span class="hljs-meta">... </span> height=<span class="hljs-number">512</span>, | |
| <span class="hljs-meta">... </span> num_frames=<span class="hljs-number">121</span>, | |
| <span class="hljs-meta">... </span> frame_rate=frame_rate, | |
| <span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">40</span>, | |
| <span class="hljs-meta">... </span> guidance_scale=<span class="hljs-number">4.0</span>, | |
| <span class="hljs-meta">... </span> output_type=<span class="hljs-string">"np"</span>, | |
| <span class="hljs-meta">... </span> return_dict=<span class="hljs-literal">False</span>, | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>encode_video( | |
| <span class="hljs-meta">... </span> video[<span class="hljs-number">0</span>], | |
| <span class="hljs-meta">... </span> fps=frame_rate, | |
| <span class="hljs-meta">... </span> audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(), | |
| <span class="hljs-meta">... </span> audio_sample_rate=pipe.vocoder.config.output_sampling_rate, <span class="hljs-comment"># should be 24000</span> | |
| <span class="hljs-meta">... </span> output_path=<span class="hljs-string">"video.mp4"</span>, | |
| <span class="hljs-meta">... </span>)`,lang:"py",wrap:!1}}),{c(){M=l("p"),M.textContent=I,y=a(),c(_.$$.fragment)},l(r){M=i(r,"P",{"data-svelte-h":!0}),p(M)!=="svelte-kvfsh7"&&(M.textContent=I),y=o(r),m(_.$$.fragment,r)},m(r,w){d(r,M,w),d(r,y,w),u(_,r,w),T=!0},p:Cn,i(r){T||(h(_.$$.fragment,r),T=!0)},o(r){g(_.$$.fragment,r),T=!1},d(r){r&&(t(M),t(y)),f(_,r)}}}function ja(V){let M,I="Examples:",y,_,T;return _=new Y({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYMkNvbmRpdGlvblBpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGVuY29kZV92aWRlbyUwQWZyb20lMjBkaWZmdXNlcnMucGlwZWxpbmVzLmx0eDIucGlwZWxpbmVfbHR4Ml9jb25kaXRpb24lMjBpbXBvcnQlMjBMVFgyVmlkZW9Db25kaXRpb24lMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwbG9hZF9pbWFnZSUwQSUwQXBpcGUlMjAlM0QlMjBMVFgyQ29uZGl0aW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUyMkxpZ2h0cmlja3MlMkZMVFgtMiUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYpJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQSUwQWZpcnN0X2ltYWdlJTIwJTNEJTIwbG9hZF9pbWFnZSglMEElMjAlMjAlMjAlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZodWdnaW5nZmFjZSUyRmRvY3VtZW50YXRpb24taW1hZ2VzJTJGcmVzb2x2ZSUyRm1haW4lMkZkaWZmdXNlcnMlMkZmbGYydl9pbnB1dF9maXJzdF9mcmFtZS5wbmclMjIlMEEpJTBBbGFzdF9pbWFnZSUyMCUzRCUyMGxvYWRfaW1hZ2UoJTBBJTIwJTIwJTIwJTIwJTIyaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmRhdGFzZXRzJTJGaHVnZ2luZ2ZhY2UlMkZkb2N1bWVudGF0aW9uLWltYWdlcyUyRnJlc29sdmUlMkZtYWluJTJGZGlmZnVzZXJzJTJGZmxmMnZfaW5wdXRfbGFzdF9mcmFtZS5wbmclMjIlMEEpJTBBZmlyc3RfY29uZCUyMCUzRCUyMExUWDJWaWRlb0NvbmRpdGlvbihmcmFtZXMlM0RmaXJzdF9pbWFnZSUyQyUyMGluZGV4JTNEMCUyQyUyMHN0cmVuZ3RoJTNEMS4wKSUwQWxhc3RfY29uZCUyMCUzRCUyMExUWDJWaWRlb0NvbmRpdGlvbihmcmFtZXMlM0RsYXN0X2ltYWdlJTJDJTIwaW5kZXglM0QtMSUyQyUyMHN0cmVuZ3RoJTNEMS4wKSUwQWNvbmRpdGlvbnMlMjAlM0QlMjAlNUJmaXJzdF9jb25kJTJDJTIwbGFzdF9jb25kJTVEJTBBcHJvbXB0JTIwJTNEJTIwJTIyQ0clMjBhbmltYXRpb24lMjBzdHlsZSUyQyUyMGElMjBzbWFsbCUyMGJsdWUlMjBiaXJkJTIwdGFrZXMlMjBvZmYlMjBmcm9tJTIwdGhlJTIwZ3JvdW5kJTJDJTIwZmxhcHBpbmclMjBpdHMlMjB3aW5ncy4lMjIlMEFuZWdhdGl2ZV9wcm9tcHQlMjAlM0QlMjAlMjJ3b3JzdCUyMHF1YWxpdHklMkMlMjBpbmNvbnNpc3RlbnQlMjBtb3Rpb24lMkMlMjBibHVycnklMkMlMjBqaXR0ZXJ5JTJDJTIwZGlzdG9ydGVkJTJDJTIwc3RhdGljJTIyJTBBJTBBZnJhbWVfcmF0ZSUyMCUzRCUyMDI0LjAlMEF2aWRlbyUyMCUzRCUyMHBpcGUoJTBBJTIwJTIwJTIwJTIwY29uZGl0aW9ucyUzRGNvbmRpdGlvbnMlMkMlMEElMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBuZWdhdGl2ZV9wcm9tcHQlM0RuZWdhdGl2ZV9wcm9tcHQlMkMlMEElMjAlMjAlMjAlMjB3aWR0aCUzRDc2OCUyQyUwQSUyMCUyMCUyMCUyMGhlaWdodCUzRDUxMiUyQyUwQSUyMCUyMCUyMCUyMG51bV9mcmFtZXMlM0QxMjElMkMlMEElMjAlMjAlMjAlMjBmcmFtZV9yYXRlJTNEZnJhbWVfcmF0ZSUyQyUwQSUyMCUyMCUyMCUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0Q0MCUyQyUwQSUyMCUyMCUyMCUyMGd1aWRhbmNlX3NjYWxlJTNENC4wJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3R5cGUlM0QlMjJucCUyMiUyQyUwQSUyMCUyMCUyMCUyMHJldHVybl9kaWN0JTNERmFsc2UlMkMlMEEpJTBBdmlkZW8lMjAlM0QlMjAodmlkZW8lMjAqJTIwMjU1KS5yb3VuZCgpLmFzdHlwZSglMjJ1aW50OCUyMiklMEF2aWRlbyUyMCUzRCUyMHRvcmNoLmZyb21fbnVtcHkodmlkZW8pJTBBJTBBZW5jb2RlX3ZpZGVvKCUwQSUyMCUyMCUyMCUyMHZpZGVvJTVCMCU1RCUyQyUwQSUyMCUyMCUyMCUyMGZwcyUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBhdWRpbyUzRGF1ZGlvJTVCMCU1RC5mbG9hdCgpLmNwdSgpJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fc2FtcGxlX3JhdGUlM0RwaXBlLnZvY29kZXIuY29uZmlnLm91dHB1dF9zYW1wbGluZ19yYXRlJTJDJTIwJTIwJTIzJTIwc2hvdWxkJTIwYmUlMjAyNDAwMCUwQSUyMCUyMCUyMCUyMG91dHB1dF9wYXRoJTNEJTIydmlkZW8ubXA0JTIyJTJDJTBBKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2ConditionPipeline | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> encode_video | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.pipeline_ltx2_condition <span class="hljs-keyword">import</span> LTX2VideoCondition | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image | |
| <span class="hljs-meta">>>> </span>pipe = LTX2ConditionPipeline.from_pretrained(<span class="hljs-string">"Lightricks/LTX-2"</span>, torch_dtype=torch.bfloat16) | |
| <span class="hljs-meta">>>> </span>pipe.enable_model_cpu_offload() | |
| <span class="hljs-meta">>>> </span>first_image = load_image( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_first_frame.png"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>last_image = load_image( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_last_frame.png"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>first_cond = LTX2VideoCondition(frames=first_image, index=<span class="hljs-number">0</span>, strength=<span class="hljs-number">1.0</span>) | |
| <span class="hljs-meta">>>> </span>last_cond = LTX2VideoCondition(frames=last_image, index=-<span class="hljs-number">1</span>, strength=<span class="hljs-number">1.0</span>) | |
| <span class="hljs-meta">>>> </span>conditions = [first_cond, last_cond] | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"CG animation style, a small blue bird takes off from the ground, flapping its wings."</span> | |
| <span class="hljs-meta">>>> </span>negative_prompt = <span class="hljs-string">"worst quality, inconsistent motion, blurry, jittery, distorted, static"</span> | |
| <span class="hljs-meta">>>> </span>frame_rate = <span class="hljs-number">24.0</span> | |
| <span class="hljs-meta">>>> </span>video = pipe( | |
| <span class="hljs-meta">... </span> conditions=conditions, | |
| <span class="hljs-meta">... </span> prompt=prompt, | |
| <span class="hljs-meta">... </span> negative_prompt=negative_prompt, | |
| <span class="hljs-meta">... </span> width=<span class="hljs-number">768</span>, | |
| <span class="hljs-meta">... </span> height=<span class="hljs-number">512</span>, | |
| <span class="hljs-meta">... </span> num_frames=<span class="hljs-number">121</span>, | |
| <span class="hljs-meta">... </span> frame_rate=frame_rate, | |
| <span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">40</span>, | |
| <span class="hljs-meta">... </span> guidance_scale=<span class="hljs-number">4.0</span>, | |
| <span class="hljs-meta">... </span> output_type=<span class="hljs-string">"np"</span>, | |
| <span class="hljs-meta">... </span> return_dict=<span class="hljs-literal">False</span>, | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>video = (video * <span class="hljs-number">255</span>).<span class="hljs-built_in">round</span>().astype(<span class="hljs-string">"uint8"</span>) | |
| <span class="hljs-meta">>>> </span>video = torch.from_numpy(video) | |
| <span class="hljs-meta">>>> </span>encode_video( | |
| <span class="hljs-meta">... </span> video[<span class="hljs-number">0</span>], | |
| <span class="hljs-meta">... </span> fps=frame_rate, | |
| <span class="hljs-meta">... </span> audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(), | |
| <span class="hljs-meta">... </span> audio_sample_rate=pipe.vocoder.config.output_sampling_rate, <span class="hljs-comment"># should be 24000</span> | |
| <span class="hljs-meta">... </span> output_path=<span class="hljs-string">"video.mp4"</span>, | |
| <span class="hljs-meta">... </span>)`,lang:"py",wrap:!1}}),{c(){M=l("p"),M.textContent=I,y=a(),c(_.$$.fragment)},l(r){M=i(r,"P",{"data-svelte-h":!0}),p(M)!=="svelte-kvfsh7"&&(M.textContent=I),y=o(r),m(_.$$.fragment,r)},m(r,w){d(r,M,w),d(r,y,w),u(_,r,w),T=!0},p:Cn,i(r){T||(h(_.$$.fragment,r),T=!0)},o(r){g(_.$$.fragment,r),T=!1},d(r){r&&(t(M),t(y)),f(_,r)}}}function va(V){let M,I="Examples:",y,_,T;return _=new Y({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYMkltYWdlVG9WaWRlb1BpcGVsaW5lJTJDJTIwTFRYMkxhdGVudFVwc2FtcGxlUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZW5jb2RlX3ZpZGVvJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMubHR4Mi5sYXRlbnRfdXBzYW1wbGVyJTIwaW1wb3J0JTIwTFRYMkxhdGVudFVwc2FtcGxlck1vZGVsJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGxvYWRfaW1hZ2UlMEElMEFwaXBlJTIwJTNEJTIwTFRYMkltYWdlVG9WaWRlb1BpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJMaWdodHJpY2tzJTJGTFRYLTIlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQXBpcGUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKCklMEElMEFpbWFnZSUyMCUzRCUyMGxvYWRfaW1hZ2UoJTBBJTIwJTIwJTIwJTIwJTIyaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmRhdGFzZXRzJTJGYS1yLXItby13JTJGdGlueS1tZW1lLWRhdGFzZXQtY2FwdGlvbmVkJTJGcmVzb2x2ZSUyRm1haW4lMkZpbWFnZXMlMkY4LnBuZyUyMiUwQSklMEFwcm9tcHQlMjAlM0QlMjAlMjJBJTIweW91bmclMjBnaXJsJTIwc3RhbmRzJTIwY2FsbWx5JTIwaW4lMjB0aGUlMjBmb3JlZ3JvdW5kJTJDJTIwbG9va2luZyUyMGRpcmVjdGx5JTIwYXQlMjB0aGUlMjBjYW1lcmElMkMlMjBhcyUyMGElMjBob3VzZSUyMGZpcmUlMjByYWdlcyUyMGluJTIwdGhlJTIwYmFja2dyb3VuZC4lMjIlMEFuZWdhdGl2ZV9wcm9tcHQlMjAlM0QlMjAlMjJ3b3JzdCUyMHF1YWxpdHklMkMlMjBpbmNvbnNpc3RlbnQlMjBtb3Rpb24lMkMlMjBibHVycnklMkMlMjBqaXR0ZXJ5JTJDJTIwZGlzdG9ydGVkJTIyJTBBJTBBZnJhbWVfcmF0ZSUyMCUzRCUyMDI0LjAlMEF2aWRlbyUyQyUyMGF1ZGlvJTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBpbWFnZSUzRGltYWdlJTJDJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbmVnYXRpdmVfcHJvbXB0JTNEbmVnYXRpdmVfcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwd2lkdGglM0Q3NjglMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0Q1MTIlMkMlMEElMjAlMjAlMjAlMjBudW1fZnJhbWVzJTNEMTIxJTJDJTBBJTIwJTIwJTIwJTIwZnJhbWVfcmF0ZSUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNENDAlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDQuMCUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF90eXBlJTNEJTIycGlsJTIyJTJDJTBBJTIwJTIwJTIwJTIwcmV0dXJuX2RpY3QlM0RGYWxzZSUyQyUwQSklMEElMEFsYXRlbnRfdXBzYW1wbGVyJTIwJTNEJTIwTFRYMkxhdGVudFVwc2FtcGxlck1vZGVsLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJMaWdodHJpY2tzJTJGTFRYLTIlMjIlMkMlMjBzdWJmb2xkZXIlM0QlMjJsYXRlbnRfdXBzYW1wbGVyJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUwQSklMEF1cHNhbXBsZV9waXBlJTIwJTNEJTIwTFRYMkxhdGVudFVwc2FtcGxlUGlwZWxpbmUodmFlJTNEcGlwZS52YWUlMkMlMjBsYXRlbnRfdXBzYW1wbGVyJTNEbGF0ZW50X3Vwc2FtcGxlciklMEF1cHNhbXBsZV9waXBlLnZhZS5lbmFibGVfdGlsaW5nKCklMEF1cHNhbXBsZV9waXBlLnRvKGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQSUwQXZpZGVvJTIwJTNEJTIwdXBzYW1wbGVfcGlwZSglMEElMjAlMjAlMjAlMjB2aWRlbyUzRHZpZGVvJTJDJTBBJTIwJTIwJTIwJTIwd2lkdGglM0Q3NjglMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0Q1MTIlMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfdHlwZSUzRCUyMm5wJTIyJTJDJTBBJTIwJTIwJTIwJTIwcmV0dXJuX2RpY3QlM0RGYWxzZSUyQyUwQSklNUIwJTVEJTBBJTBBZW5jb2RlX3ZpZGVvKCUwQSUyMCUyMCUyMCUyMHZpZGVvJTVCMCU1RCUyQyUwQSUyMCUyMCUyMCUyMGZwcyUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBhdWRpbyUzRGF1ZGlvJTVCMCU1RC5mbG9hdCgpLmNwdSgpJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fc2FtcGxlX3JhdGUlM0RwaXBlLnZvY29kZXIuY29uZmlnLm91dHB1dF9zYW1wbGluZ19yYXRlJTJDJTIwJTIwJTIzJTIwc2hvdWxkJTIwYmUlMjAyNDAwMCUwQSUyMCUyMCUyMCUyMG91dHB1dF9wYXRoJTNEJTIydmlkZW8ubXA0JTIyJTJDJTBBKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2ImageToVideoPipeline, LTX2LatentUpsamplePipeline | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> encode_video | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.latent_upsampler <span class="hljs-keyword">import</span> LTX2LatentUpsamplerModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image | |
| <span class="hljs-meta">>>> </span>pipe = LTX2ImageToVideoPipeline.from_pretrained(<span class="hljs-string">"Lightricks/LTX-2"</span>, torch_dtype=torch.bfloat16) | |
| <span class="hljs-meta">>>> </span>pipe.enable_model_cpu_offload() | |
| <span class="hljs-meta">>>> </span>image = load_image( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"https://huggingface.co/datasets/a-r-r-o-w/tiny-meme-dataset-captioned/resolve/main/images/8.png"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"A young girl stands calmly in the foreground, looking directly at the camera, as a house fire rages in the background."</span> | |
| <span class="hljs-meta">>>> </span>negative_prompt = <span class="hljs-string">"worst quality, inconsistent motion, blurry, jittery, distorted"</span> | |
| <span class="hljs-meta">>>> </span>frame_rate = <span class="hljs-number">24.0</span> | |
| <span class="hljs-meta">>>> </span>video, audio = pipe( | |
| <span class="hljs-meta">... </span> image=image, | |
| <span class="hljs-meta">... </span> prompt=prompt, | |
| <span class="hljs-meta">... </span> negative_prompt=negative_prompt, | |
| <span class="hljs-meta">... </span> width=<span class="hljs-number">768</span>, | |
| <span class="hljs-meta">... </span> height=<span class="hljs-number">512</span>, | |
| <span class="hljs-meta">... </span> num_frames=<span class="hljs-number">121</span>, | |
| <span class="hljs-meta">... </span> frame_rate=frame_rate, | |
| <span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">40</span>, | |
| <span class="hljs-meta">... </span> guidance_scale=<span class="hljs-number">4.0</span>, | |
| <span class="hljs-meta">... </span> output_type=<span class="hljs-string">"pil"</span>, | |
| <span class="hljs-meta">... </span> return_dict=<span class="hljs-literal">False</span>, | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>latent_upsampler = LTX2LatentUpsamplerModel.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"Lightricks/LTX-2"</span>, subfolder=<span class="hljs-string">"latent_upsampler"</span>, torch_dtype=torch.bfloat16 | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>upsample_pipe = LTX2LatentUpsamplePipeline(vae=pipe.vae, latent_upsampler=latent_upsampler) | |
| <span class="hljs-meta">>>> </span>upsample_pipe.vae.enable_tiling() | |
| <span class="hljs-meta">>>> </span>upsample_pipe.to(device=<span class="hljs-string">"cuda"</span>, dtype=torch.bfloat16) | |
| <span class="hljs-meta">>>> </span>video = upsample_pipe( | |
| <span class="hljs-meta">... </span> video=video, | |
| <span class="hljs-meta">... </span> width=<span class="hljs-number">768</span>, | |
| <span class="hljs-meta">... </span> height=<span class="hljs-number">512</span>, | |
| <span class="hljs-meta">... </span> output_type=<span class="hljs-string">"np"</span>, | |
| <span class="hljs-meta">... </span> return_dict=<span class="hljs-literal">False</span>, | |
| <span class="hljs-meta">... </span>)[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>encode_video( | |
| <span class="hljs-meta">... </span> video[<span class="hljs-number">0</span>], | |
| <span class="hljs-meta">... </span> fps=frame_rate, | |
| <span class="hljs-meta">... </span> audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(), | |
| <span class="hljs-meta">... </span> audio_sample_rate=pipe.vocoder.config.output_sampling_rate, <span class="hljs-comment"># should be 24000</span> | |
| <span class="hljs-meta">... </span> output_path=<span class="hljs-string">"video.mp4"</span>, | |
| <span class="hljs-meta">... </span>)`,lang:"py",wrap:!1}}),{c(){M=l("p"),M.textContent=I,y=a(),c(_.$$.fragment)},l(r){M=i(r,"P",{"data-svelte-h":!0}),p(M)!=="svelte-kvfsh7"&&(M.textContent=I),y=o(r),m(_.$$.fragment,r)},m(r,w){d(r,M,w),d(r,y,w),u(_,r,w),T=!0},p:Cn,i(r){T||(h(_.$$.fragment,r),T=!0)},o(r){g(_.$$.fragment,r),T=!1},d(r){r&&(t(M),t(y)),f(_,r)}}}function Za(V){let M,I,y,_,T,r,w,Zs='<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>',Lt,oe,Gs='<a href="https://hf.co/papers/2601.03233" rel="nofollow">LTX-2</a> is a DiT-based foundation model designed to generate synchronized video and audio within a single model. It brings together the core building blocks of modern video generation, with open weights and a focus on practical, local execution.',Ft,le,Is='You can find all the original LTX-Video checkpoints under the <a href="https://huggingface.co/Lightricks" rel="nofollow">Lightricks</a> organization.',Et,ie,Xs='The original codebase for LTX-2 can be found <a href="https://github.com/Lightricks/LTX-2" rel="nofollow">here</a>.',Qt,de,Yt,re,Bs="Recommended pipeline to achieve production quality generation, this pipeline is composed of two stages:",St,pe,Cs="<li>Stage 1: Generate a video at the target resolution using diffusion sampling with classifier-free guidance (CFG). This stage produces a coherent low-noise video sequence that respects the text/image conditioning.</li> <li>Stage 2: Upsample the Stage 1 output by 2 and refine details using a distilled LoRA model to improve fidelity and visual quality. Stage 2 may apply lighter CFG to preserve the structure from Stage 1 while enhancing texture and sharpness.</li>",At,ce,Ws="Sample usage of text-to-video two stages pipeline",zt,me,Ht,ue,Pt,he,xs="Fastest two-stages generation pipeline using a distilled checkpoint.",$t,ge,Dt,fe,qt,Me,Vs="You can use <code>LTX2ConditionPipeline</code> to specify image and/or video conditions at arbitrary latent indices. For example, we can specify both a first-frame and last-frame condition to perform first-last-frame-to-video (FLF2V) generation:",Kt,_e,Ot,Te,ks="You can use both image and video conditions:",en,ye,tn,Je,Rs="Because the conditioning is done via latent frames, the 8 data space frames corresponding to the specified latent frame for an image condition will tend to be static.",nn,we,sn,be,Ns="LTX-2.X pipelines support multimodal guidance. It is composed of three terms, all using a CFG-style update rule:",an,Ue,Ls='<li>Classifier-Free Guidance (CFG): standard <a href="https://huggingface.co/papers/2207.12598" rel="nofollow">CFG</a> where the perturbed (“weaker”) output is generated using the negative prompt.</li> <li>Spatio-Temporal Guidance (STG): <a href="https://huggingface.co/papers/2411.18664" rel="nofollow">STG</a> moves away from a perturbed output created from short-cutting self-attention operations and substitutes in the attention values instead. The idea is that this creates sharper videos and better spatiotemporal consistency.</li> <li>Modality Isolation Guidance: moves away from a perturbed output created from disabling cross-modality (audio-to-video and video-to-audio) cross attention. This guidance is more specific to <a href="https://huggingface.co/papers/2601.03233" rel="nofollow">LTX-2.X</a> models, with the idea that this produces better consistency between the generated audio and video.</li>',on,je,Fs='These are controlled by the <code>guidance_scale</code>, <code>stg_scale</code>, and <code>modality_scale</code> arguments and can be set separately for video and audio. Additionally, for STG the transformer block indices where self-attention is skipped needs to be specified via the <code>spatio_temporal_guidance_blocks</code> argument. The LTX-2.X pipelines also support <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">guidance rescaling</a> to help reduce over-exposure, which can be a problem when the guidance scales are set to high values.',ln,ve,dn,Ze,rn,Ge,Es='The LTX-2.X models are sensitive to prompting style. Refer to the <a href="https://ltx.io/model/model-blog/prompting-guide-for-ltx-2" rel="nofollow">official prompting guide</a> for recommendations on how to write a good prompt. Using prompt enhancement, where the supplied prompts are enhanced using the pipeline’s text encoder (by default a <a href="https://huggingface.co/google/gemma-3-12b-it-qat-q4_0-unquantized" rel="nofollow">Gemma 3</a> model) given a system prompt, can also improve sample quality. The optional <code>processor</code> pipeline component needs to be present to use prompt enhancement. Enable prompt enhancement by supplying a <code>system_prompt</code> argument:',pn,Ie,cn,Xe,mn,G,Be,Wn,at,Qs="Pipeline for text-to-video generation.",xn,ot,Ys='Reference: <a href="https://github.com/Lightricks/LTX-Video" rel="nofollow">https://github.com/Lightricks/LTX-Video</a>',Vn,k,Ce,kn,lt,Ss="Function invoked when calling the pipeline for generation.",Rn,z,Nn,H,We,Ln,it,As="Encodes the prompt into text encoder hidden states.",Fn,P,xe,En,dt,zs=`Enhances the supplied <code>prompt</code> by generating a new prompt using the current text encoder (default is a | |
| <code>transformers.Gemma3ForConditionalGeneration</code> model) from it and a system prompt.`,un,Ve,hn,Z,ke,Qn,rt,Hs="Pipeline for image-to-video generation.",Yn,pt,Ps='Reference: <a href="https://github.com/Lightricks/LTX-Video" rel="nofollow">https://github.com/Lightricks/LTX-Video</a>',Sn,ct,$s="TODO",An,R,Re,zn,mt,Ds="Function invoked when calling the pipeline for generation.",Hn,$,Pn,D,Ne,$n,ut,qs="Encodes the prompt into text encoder hidden states.",Dn,q,Le,qn,ht,Ks=`Enhances the supplied <code>prompt</code> by generating a new prompt using the current text encoder (default is a | |
| <code>transformers.Gemma3ForConditionalGeneration</code> model) from it and a system prompt.`,gn,Fe,fn,J,Ee,Kn,gt,Os="Pipeline for video generation which allows image conditions to be inserted at arbitary parts of the video.",On,ft,ea='Reference: <a href="https://github.com/Lightricks/LTX-Video" rel="nofollow">https://github.com/Lightricks/LTX-Video</a>',es,Mt,ta="TODO",ts,N,Qe,ns,_t,na="Function invoked when calling the pipeline for generation.",ss,K,as,L,Ye,os,Tt,sa="Apply first-frame visual conditioning by overwriting tokens at the first-frame positions.",ls,yt,aa=`Only conditions with <code>latent_idx == 0</code> are applied here (matching <code>VideoConditionByLatentIndex</code> in the | |
| reference implementation). Conditions at non-zero latent indices are appended as separate keyframe tokens via | |
| <code>prepare_keyframe_extras</code> (matching <code>VideoConditionByKeyframeIndex</code>) and are skipped here.`,is,O,Se,ds,Jt,oa="Encodes the prompt into text encoder hidden states.",rs,B,Ae,ps,wt,la="Prepare noisy video latents, applying frame conditions.",cs,bt,ia=`First-frame conditions (<code>latent_idx == 0</code>) are applied by overwriting tokens at the first-frame positions | |
| (<code>VideoConditionByLatentIndex</code> semantics). Non-first-frame conditions (<code>latent_idx > 0</code>) are concatenated onto | |
| the main latent sequence with per-token <code>conditioning_mask = strength</code> (<code>VideoConditionByKeyframeIndex</code> | |
| semantics) — the denoising loop’s existing timestep formula <code>t * (1 - conditioning_mask)</code> and post-process | |
| blend <code>denoised * (1 - conditioning_mask) + clean * conditioning_mask</code> then drive them across steps.`,ms,Ut,da="Returns a 4-tuple:",us,jt,ra=`<li><code>latents</code>: packed noisy latents (base tokens + any keyframe tokens cat’d onto the sequence dim).</li> <li><code>conditioning_mask</code>: packed conditioning mask with values in <code>[0, 1]</code> — <code>1</code> at first-frame positions, | |
| <code>strength</code> at keyframe positions, <code>0</code> elsewhere.</li> <li><code>clean_latents</code>: clean condition values at conditioned positions (zeros elsewhere); same shape as | |
| <code>latents</code>.</li> <li><code>keyframe_coords</code>: <code>[B, 3, num_keyframe_patches, 2]</code> positional coordinates to append to <code>video_coords</code>, | |
| or <code>None</code> if there are no non-first-frame conditions.</li>`,hs,ee,ze,gs,vt,pa="Preprocesses the condition images/videos to torch tensors.",fs,te,He,Ms,Zt,ca="Trim a conditioning sequence to the allowed number of frames.",Mn,Pe,_n,W,$e,_s,F,De,Ts,Gt,ma="Function invoked when calling the pipeline for generation.",ys,ne,Js,se,qe,ws,It,ua=`Applies Adaptive Instance Normalization (AdaIN) to a latent tensor based on statistics from a reference latent | |
| tensor.`,bs,E,Ke,Us,Xt,ha=`Applies a non-linear tone-mapping function to latent values to reduce their dynamic range in a perceptually | |
| smooth way using a sigmoid-based compression.`,js,Bt,ga=`This is useful for regularizing high-variance latents or for conditioning outputs during generation, especially | |
| when controlling dynamic behavior with a <code>compression</code> factor.`,Tn,Oe,yn,S,et,vs,Ct,fa="Output class for LTX pipelines.",Jn,tt,wn,Nt,bn;return T=new x({props:{title:"LTX-2",local:"ltx-2",headingTag:"h1"}}),de=new x({props:{title:"Two-stages Generation",local:"two-stages-generation",headingTag:"h2"}}),me=new Y({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRmxvd01hdGNoRXVsZXJEaXNjcmV0ZVNjaGVkdWxlciUwQWZyb20lMjBkaWZmdXNlcnMucGlwZWxpbmVzLmx0eDIlMjBpbXBvcnQlMjBMVFgyUGlwZWxpbmUlMkMlMjBMVFgyTGF0ZW50VXBzYW1wbGVQaXBlbGluZSUwQWZyb20lMjBkaWZmdXNlcnMucGlwZWxpbmVzLmx0eDIubGF0ZW50X3Vwc2FtcGxlciUyMGltcG9ydCUyMExUWDJMYXRlbnRVcHNhbXBsZXJNb2RlbCUwQWZyb20lMjBkaWZmdXNlcnMucGlwZWxpbmVzLmx0eDIudXRpbHMlMjBpbXBvcnQlMjBTVEFHRV8yX0RJU1RJTExFRF9TSUdNQV9WQUxVRVMlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZW5jb2RlX3ZpZGVvJTBBJTBBZGV2aWNlJTIwJTNEJTIwJTIyY3VkYSUzQTAlMjIlMEF3aWR0aCUyMCUzRCUyMDc2OCUwQWhlaWdodCUyMCUzRCUyMDUxMiUwQSUwQXBpcGUlMjAlM0QlMjBMVFgyUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMkxpZ2h0cmlja3MlMkZMVFgtMiUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEpJTBBcGlwZS5lbmFibGVfc2VxdWVudGlhbF9jcHVfb2ZmbG9hZChkZXZpY2UlM0RkZXZpY2UpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyQSUyMGJlYXV0aWZ1bCUyMHN1bnNldCUyMG92ZXIlMjB0aGUlMjBvY2VhbiUyMiUwQW5lZ2F0aXZlX3Byb21wdCUyMCUzRCUyMCUyMnNoYWt5JTJDJTIwZ2xpdGNoeSUyQyUyMGxvdyUyMHF1YWxpdHklMkMlMjB3b3JzdCUyMHF1YWxpdHklMkMlMjBkZWZvcm1lZCUyQyUyMGRpc3RvcnRlZCUyQyUyMGRpc2ZpZ3VyZWQlMkMlMjBtb3Rpb24lMjBzbWVhciUyQyUyMG1vdGlvbiUyMGFydGlmYWN0cyUyQyUyMGZ1c2VkJTIwZmluZ2VycyUyQyUyMGJhZCUyMGFuYXRvbXklMkMlMjB3ZWlyZCUyMGhhbmQlMkMlMjB1Z2x5JTJDJTIwdHJhbnNpdGlvbiUyQyUyMHN0YXRpYy4lMjIlMEElMEElMjMlMjBTdGFnZSUyMDElMjBkZWZhdWx0JTIwKG5vbi1kaXN0aWxsZWQpJTIwaW5mZXJlbmNlJTBBZnJhbWVfcmF0ZSUyMCUzRCUyMDI0LjAlMEF2aWRlb19sYXRlbnQlMkMlMjBhdWRpb19sYXRlbnQlMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMHByb21wdCUzRHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMG5lZ2F0aXZlX3Byb21wdCUzRG5lZ2F0aXZlX3Byb21wdCUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNEd2lkdGglMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0RoZWlnaHQlMkMlMEElMjAlMjAlMjAlMjBudW1fZnJhbWVzJTNEMTIxJTJDJTBBJTIwJTIwJTIwJTIwZnJhbWVfcmF0ZSUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNENDAlMkMlMEElMjAlMjAlMjAlMjBzaWdtYXMlM0ROb25lJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0Q0LjAlMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfdHlwZSUzRCUyMmxhdGVudCUyMiUyQyUwQSUyMCUyMCUyMCUyMHJldHVybl9kaWN0JTNERmFsc2UlMkMlMEEpJTBBJTBBbGF0ZW50X3Vwc2FtcGxlciUyMCUzRCUyMExUWDJMYXRlbnRVcHNhbXBsZXJNb2RlbC5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyTGlnaHRyaWNrcyUyRkxUWC0yJTIyJTJDJTBBJTIwJTIwJTIwJTIwc3ViZm9sZGVyJTNEJTIybGF0ZW50X3Vwc2FtcGxlciUyMiUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMkMlMEEpJTBBdXBzYW1wbGVfcGlwZSUyMCUzRCUyMExUWDJMYXRlbnRVcHNhbXBsZVBpcGVsaW5lKHZhZSUzRHBpcGUudmFlJTJDJTIwbGF0ZW50X3Vwc2FtcGxlciUzRGxhdGVudF91cHNhbXBsZXIpJTBBdXBzYW1wbGVfcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoZGV2aWNlJTNEZGV2aWNlKSUwQXVwc2NhbGVkX3ZpZGVvX2xhdGVudCUyMCUzRCUyMHVwc2FtcGxlX3BpcGUoJTBBJTIwJTIwJTIwJTIwbGF0ZW50cyUzRHZpZGVvX2xhdGVudCUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF90eXBlJTNEJTIybGF0ZW50JTIyJTJDJTBBJTIwJTIwJTIwJTIwcmV0dXJuX2RpY3QlM0RGYWxzZSUyQyUwQSklNUIwJTVEJTBBJTBBJTIzJTIwTG9hZCUyMFN0YWdlJTIwMiUyMGRpc3RpbGxlZCUyMExvUkElMEFwaXBlLmxvYWRfbG9yYV93ZWlnaHRzKCUwQSUyMCUyMCUyMCUyMCUyMkxpZ2h0cmlja3MlMkZMVFgtMiUyMiUyQyUyMGFkYXB0ZXJfbmFtZSUzRCUyMnN0YWdlXzJfZGlzdGlsbGVkJTIyJTJDJTIwd2VpZ2h0X25hbWUlM0QlMjJsdHgtMi0xOWItZGlzdGlsbGVkLWxvcmEtMzg0LnNhZmV0ZW5zb3JzJTIyJTBBKSUwQXBpcGUuc2V0X2FkYXB0ZXJzKCUyMnN0YWdlXzJfZGlzdGlsbGVkJTIyJTJDJTIwMS4wKSUwQSUyMyUyMFZBRSUyMHRpbGluZyUyMGlzJTIwdXN1YWxseSUyMG5lY2Vzc2FyeSUyMHRvJTIwYXZvaWQlMjBPT00lMjBlcnJvciUyMHdoZW4lMjBWQUUlMjBkZWNvZGluZyUwQXBpcGUudmFlLmVuYWJsZV90aWxpbmcoKSUwQSUyMyUyMENoYW5nZSUyMHNjaGVkdWxlciUyMHRvJTIwdXNlJTIwU3RhZ2UlMjAyJTIwZGlzdGlsbGVkJTIwc2lnbWFzJTIwYXMlMjBpcyUwQW5ld19zY2hlZHVsZXIlMjAlM0QlMjBGbG93TWF0Y2hFdWxlckRpc2NyZXRlU2NoZWR1bGVyLmZyb21fY29uZmlnKCUwQSUyMCUyMCUyMCUyMHBpcGUuc2NoZWR1bGVyLmNvbmZpZyUyQyUyMHVzZV9keW5hbWljX3NoaWZ0aW5nJTNERmFsc2UlMkMlMjBzaGlmdF90ZXJtaW5hbCUzRE5vbmUlMEEpJTBBcGlwZS5zY2hlZHVsZXIlMjAlM0QlMjBuZXdfc2NoZWR1bGVyJTBBJTIzJTIwU3RhZ2UlMjAyJTIwaW5mZXJlbmNlJTIwd2l0aCUyMGRpc3RpbGxlZCUyMExvUkElMjBhbmQlMjBzaWdtYXMlMEF2aWRlbyUyQyUyMGF1ZGlvJTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBsYXRlbnRzJTNEdXBzY2FsZWRfdmlkZW9fbGF0ZW50JTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fbGF0ZW50cyUzRGF1ZGlvX2xhdGVudCUyQyUwQSUyMCUyMCUyMCUyMHByb21wdCUzRHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMG5lZ2F0aXZlX3Byb21wdCUzRG5lZ2F0aXZlX3Byb21wdCUyQyUwQSUyMCUyMCUyMCUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0QzJTJDJTBBJTIwJTIwJTIwJTIwbm9pc2Vfc2NhbGUlM0RTVEFHRV8yX0RJU1RJTExFRF9TSUdNQV9WQUxVRVMlNUIwJTVEJTJDJTIwJTIzJTIwcmVub2lzZSUyMHdpdGglMjBmaXJzdCUyMHNpZ21hJTIwdmFsdWUlMjBodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZMaWdodHJpY2tzJTJGTFRYLTIlMkZibG9iJTJGbWFpbiUyRnBhY2thZ2VzJTJGbHR4LXBpcGVsaW5lcyUyRnNyYyUyRmx0eF9waXBlbGluZXMlMkZ0aTJ2aWRfdHdvX3N0YWdlcy5weSUyM0wyMTglMEElMjAlMjAlMjAlMjBzaWdtYXMlM0RTVEFHRV8yX0RJU1RJTExFRF9TSUdNQV9WQUxVRVMlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDEuMCUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF90eXBlJTNEJTIybnAlMjIlMkMlMEElMjAlMjAlMjAlMjByZXR1cm5fZGljdCUzREZhbHNlJTJDJTBBKSUwQSUwQWVuY29kZV92aWRlbyglMEElMjAlMjAlMjAlMjB2aWRlbyU1QjAlNUQlMkMlMEElMjAlMjAlMjAlMjBmcHMlM0RmcmFtZV9yYXRlJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW8lM0RhdWRpbyU1QjAlNUQuZmxvYXQoKS5jcHUoKSUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX3NhbXBsZV9yYXRlJTNEcGlwZS52b2NvZGVyLmNvbmZpZy5vdXRwdXRfc2FtcGxpbmdfcmF0ZSUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF9wYXRoJTNEJTIybHR4Ml9sb3JhX2Rpc3RpbGxlZF9zYW1wbGUubXA0JTIyJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FlowMatchEulerDiscreteScheduler | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.ltx2 <span class="hljs-keyword">import</span> LTX2Pipeline, LTX2LatentUpsamplePipeline | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.latent_upsampler <span class="hljs-keyword">import</span> LTX2LatentUpsamplerModel | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.utils <span class="hljs-keyword">import</span> STAGE_2_DISTILLED_SIGMA_VALUES | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> encode_video | |
| device = <span class="hljs-string">"cuda:0"</span> | |
| width = <span class="hljs-number">768</span> | |
| height = <span class="hljs-number">512</span> | |
| pipe = LTX2Pipeline.from_pretrained( | |
| <span class="hljs-string">"Lightricks/LTX-2"</span>, torch_dtype=torch.bfloat16 | |
| ) | |
| pipe.enable_sequential_cpu_offload(device=device) | |
| prompt = <span class="hljs-string">"A beautiful sunset over the ocean"</span> | |
| negative_prompt = <span class="hljs-string">"shaky, glitchy, low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly, transition, static."</span> | |
| <span class="hljs-comment"># Stage 1 default (non-distilled) inference</span> | |
| frame_rate = <span class="hljs-number">24.0</span> | |
| video_latent, audio_latent = pipe( | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| width=width, | |
| height=height, | |
| num_frames=<span class="hljs-number">121</span>, | |
| frame_rate=frame_rate, | |
| num_inference_steps=<span class="hljs-number">40</span>, | |
| sigmas=<span class="hljs-literal">None</span>, | |
| guidance_scale=<span class="hljs-number">4.0</span>, | |
| output_type=<span class="hljs-string">"latent"</span>, | |
| return_dict=<span class="hljs-literal">False</span>, | |
| ) | |
| latent_upsampler = LTX2LatentUpsamplerModel.from_pretrained( | |
| <span class="hljs-string">"Lightricks/LTX-2"</span>, | |
| subfolder=<span class="hljs-string">"latent_upsampler"</span>, | |
| torch_dtype=torch.bfloat16, | |
| ) | |
| upsample_pipe = LTX2LatentUpsamplePipeline(vae=pipe.vae, latent_upsampler=latent_upsampler) | |
| upsample_pipe.enable_model_cpu_offload(device=device) | |
| upscaled_video_latent = upsample_pipe( | |
| latents=video_latent, | |
| output_type=<span class="hljs-string">"latent"</span>, | |
| return_dict=<span class="hljs-literal">False</span>, | |
| )[<span class="hljs-number">0</span>] | |
| <span class="hljs-comment"># Load Stage 2 distilled LoRA</span> | |
| pipe.load_lora_weights( | |
| <span class="hljs-string">"Lightricks/LTX-2"</span>, adapter_name=<span class="hljs-string">"stage_2_distilled"</span>, weight_name=<span class="hljs-string">"ltx-2-19b-distilled-lora-384.safetensors"</span> | |
| ) | |
| pipe.set_adapters(<span class="hljs-string">"stage_2_distilled"</span>, <span class="hljs-number">1.0</span>) | |
| <span class="hljs-comment"># VAE tiling is usually necessary to avoid OOM error when VAE decoding</span> | |
| pipe.vae.enable_tiling() | |
| <span class="hljs-comment"># Change scheduler to use Stage 2 distilled sigmas as is</span> | |
| new_scheduler = FlowMatchEulerDiscreteScheduler.from_config( | |
| pipe.scheduler.config, use_dynamic_shifting=<span class="hljs-literal">False</span>, shift_terminal=<span class="hljs-literal">None</span> | |
| ) | |
| pipe.scheduler = new_scheduler | |
| <span class="hljs-comment"># Stage 2 inference with distilled LoRA and sigmas</span> | |
| video, audio = pipe( | |
| latents=upscaled_video_latent, | |
| audio_latents=audio_latent, | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| num_inference_steps=<span class="hljs-number">3</span>, | |
| noise_scale=STAGE_2_DISTILLED_SIGMA_VALUES[<span class="hljs-number">0</span>], <span class="hljs-comment"># renoise with first sigma value https://github.com/Lightricks/LTX-2/blob/main/packages/ltx-pipelines/src/ltx_pipelines/ti2vid_two_stages.py#L218</span> | |
| sigmas=STAGE_2_DISTILLED_SIGMA_VALUES, | |
| guidance_scale=<span class="hljs-number">1.0</span>, | |
| output_type=<span class="hljs-string">"np"</span>, | |
| return_dict=<span class="hljs-literal">False</span>, | |
| ) | |
| encode_video( | |
| video[<span class="hljs-number">0</span>], | |
| fps=frame_rate, | |
| audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(), | |
| audio_sample_rate=pipe.vocoder.config.output_sampling_rate, | |
| output_path=<span class="hljs-string">"ltx2_lora_distilled_sample.mp4"</span>, | |
| )`,lang:"py",wrap:!1}}),ue=new x({props:{title:"Distilled checkpoint generation",local:"distilled-checkpoint-generation",headingTag:"h2"}}),ge=new Y({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyJTIwaW1wb3J0JTIwTFRYMlBpcGVsaW5lJTJDJTIwTFRYMkxhdGVudFVwc2FtcGxlUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyLmxhdGVudF91cHNhbXBsZXIlMjBpbXBvcnQlMjBMVFgyTGF0ZW50VXBzYW1wbGVyTW9kZWwlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyLnV0aWxzJTIwaW1wb3J0JTIwRElTVElMTEVEX1NJR01BX1ZBTFVFUyUyQyUyMFNUQUdFXzJfRElTVElMTEVEX1NJR01BX1ZBTFVFUyUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBlbmNvZGVfdmlkZW8lMEElMEFkZXZpY2UlMjAlM0QlMjAlMjJjdWRhJTIyJTBBd2lkdGglMjAlM0QlMjA3NjglMEFoZWlnaHQlMjAlM0QlMjA1MTIlMEFyYW5kb21fc2VlZCUyMCUzRCUyMDQyJTBBZ2VuZXJhdG9yJTIwJTNEJTIwdG9yY2guR2VuZXJhdG9yKGRldmljZSkubWFudWFsX3NlZWQocmFuZG9tX3NlZWQpJTBBbW9kZWxfcGF0aCUyMCUzRCUyMCUyMnJvb3RvbmNoYWlyJTJGTFRYLTItMTliLWRpc3RpbGxlZCUyMiUwQSUwQXBpcGUlMjAlM0QlMjBMVFgyUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX3BhdGglMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQXBpcGUuZW5hYmxlX3NlcXVlbnRpYWxfY3B1X29mZmxvYWQoZGV2aWNlJTNEZGV2aWNlKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkElMjBiZWF1dGlmdWwlMjBzdW5zZXQlMjBvdmVyJTIwdGhlJTIwb2NlYW4lMjIlMEFuZWdhdGl2ZV9wcm9tcHQlMjAlM0QlMjAlMjJzaGFreSUyQyUyMGdsaXRjaHklMkMlMjBsb3clMjBxdWFsaXR5JTJDJTIwd29yc3QlMjBxdWFsaXR5JTJDJTIwZGVmb3JtZWQlMkMlMjBkaXN0b3J0ZWQlMkMlMjBkaXNmaWd1cmVkJTJDJTIwbW90aW9uJTIwc21lYXIlMkMlMjBtb3Rpb24lMjBhcnRpZmFjdHMlMkMlMjBmdXNlZCUyMGZpbmdlcnMlMkMlMjBiYWQlMjBhbmF0b215JTJDJTIwd2VpcmQlMjBoYW5kJTJDJTIwdWdseSUyQyUyMHRyYW5zaXRpb24lMkMlMjBzdGF0aWMuJTIyJTBBJTBBZnJhbWVfcmF0ZSUyMCUzRCUyMDI0LjAlMEF2aWRlb19sYXRlbnQlMkMlMjBhdWRpb19sYXRlbnQlMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMHByb21wdCUzRHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMG5lZ2F0aXZlX3Byb21wdCUzRG5lZ2F0aXZlX3Byb21wdCUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNEd2lkdGglMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0RoZWlnaHQlMkMlMEElMjAlMjAlMjAlMjBudW1fZnJhbWVzJTNEMTIxJTJDJTBBJTIwJTIwJTIwJTIwZnJhbWVfcmF0ZSUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNEOCUyQyUwQSUyMCUyMCUyMCUyMHNpZ21hcyUzRERJU1RJTExFRF9TSUdNQV9WQUxVRVMlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDEuMCUyQyUwQSUyMCUyMCUyMCUyMGdlbmVyYXRvciUzRGdlbmVyYXRvciUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF90eXBlJTNEJTIybGF0ZW50JTIyJTJDJTBBJTIwJTIwJTIwJTIwcmV0dXJuX2RpY3QlM0RGYWxzZSUyQyUwQSklMEElMEFsYXRlbnRfdXBzYW1wbGVyJTIwJTNEJTIwTFRYMkxhdGVudFVwc2FtcGxlck1vZGVsLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9wYXRoJTJDJTBBJTIwJTIwJTIwJTIwc3ViZm9sZGVyJTNEJTIybGF0ZW50X3Vwc2FtcGxlciUyMiUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMkMlMEEpJTBBdXBzYW1wbGVfcGlwZSUyMCUzRCUyMExUWDJMYXRlbnRVcHNhbXBsZVBpcGVsaW5lKHZhZSUzRHBpcGUudmFlJTJDJTIwbGF0ZW50X3Vwc2FtcGxlciUzRGxhdGVudF91cHNhbXBsZXIpJTBBdXBzYW1wbGVfcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoZGV2aWNlJTNEZGV2aWNlKSUwQXVwc2NhbGVkX3ZpZGVvX2xhdGVudCUyMCUzRCUyMHVwc2FtcGxlX3BpcGUoJTBBJTIwJTIwJTIwJTIwbGF0ZW50cyUzRHZpZGVvX2xhdGVudCUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF90eXBlJTNEJTIybGF0ZW50JTIyJTJDJTBBJTIwJTIwJTIwJTIwcmV0dXJuX2RpY3QlM0RGYWxzZSUyQyUwQSklNUIwJTVEJTBBJTBBdmlkZW8lMkMlMjBhdWRpbyUyMCUzRCUyMHBpcGUoJTBBJTIwJTIwJTIwJTIwbGF0ZW50cyUzRHVwc2NhbGVkX3ZpZGVvX2xhdGVudCUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX2xhdGVudHMlM0RhdWRpb19sYXRlbnQlMkMlMEElMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBuZWdhdGl2ZV9wcm9tcHQlM0RuZWdhdGl2ZV9wcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNEMyUyQyUwQSUyMCUyMCUyMCUyMG5vaXNlX3NjYWxlJTNEU1RBR0VfMl9ESVNUSUxMRURfU0lHTUFfVkFMVUVTJTVCMCU1RCUyQyUyMCUyMyUyMHJlbm9pc2UlMjB3aXRoJTIwZmlyc3QlMjBzaWdtYSUyMHZhbHVlJTIwaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGTGlnaHRyaWNrcyUyRkxUWC0yJTJGYmxvYiUyRm1haW4lMkZwYWNrYWdlcyUyRmx0eC1waXBlbGluZXMlMkZzcmMlMkZsdHhfcGlwZWxpbmVzJTJGZGlzdGlsbGVkLnB5JTIzTDE3OCUwQSUyMCUyMCUyMCUyMHNpZ21hcyUzRFNUQUdFXzJfRElTVElMTEVEX1NJR01BX1ZBTFVFUyUyQyUwQSUyMCUyMCUyMCUyMGdlbmVyYXRvciUzRGdlbmVyYXRvciUyQyUwQSUyMCUyMCUyMCUyMGd1aWRhbmNlX3NjYWxlJTNEMS4wJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3R5cGUlM0QlMjJucCUyMiUyQyUwQSUyMCUyMCUyMCUyMHJldHVybl9kaWN0JTNERmFsc2UlMkMlMEEpJTBBJTBBZW5jb2RlX3ZpZGVvKCUwQSUyMCUyMCUyMCUyMHZpZGVvJTVCMCU1RCUyQyUwQSUyMCUyMCUyMCUyMGZwcyUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBhdWRpbyUzRGF1ZGlvJTVCMCU1RC5mbG9hdCgpLmNwdSgpJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fc2FtcGxlX3JhdGUlM0RwaXBlLnZvY29kZXIuY29uZmlnLm91dHB1dF9zYW1wbGluZ19yYXRlJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3BhdGglM0QlMjJsdHgyX2Rpc3RpbGxlZF9zYW1wbGUubXA0JTIyJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.ltx2 <span class="hljs-keyword">import</span> LTX2Pipeline, LTX2LatentUpsamplePipeline | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.latent_upsampler <span class="hljs-keyword">import</span> LTX2LatentUpsamplerModel | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.utils <span class="hljs-keyword">import</span> DISTILLED_SIGMA_VALUES, STAGE_2_DISTILLED_SIGMA_VALUES | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> encode_video | |
| device = <span class="hljs-string">"cuda"</span> | |
| width = <span class="hljs-number">768</span> | |
| height = <span class="hljs-number">512</span> | |
| random_seed = <span class="hljs-number">42</span> | |
| generator = torch.Generator(device).manual_seed(random_seed) | |
| model_path = <span class="hljs-string">"rootonchair/LTX-2-19b-distilled"</span> | |
| pipe = LTX2Pipeline.from_pretrained( | |
| model_path, torch_dtype=torch.bfloat16 | |
| ) | |
| pipe.enable_sequential_cpu_offload(device=device) | |
| prompt = <span class="hljs-string">"A beautiful sunset over the ocean"</span> | |
| negative_prompt = <span class="hljs-string">"shaky, glitchy, low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly, transition, static."</span> | |
| frame_rate = <span class="hljs-number">24.0</span> | |
| video_latent, audio_latent = pipe( | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| width=width, | |
| height=height, | |
| num_frames=<span class="hljs-number">121</span>, | |
| frame_rate=frame_rate, | |
| num_inference_steps=<span class="hljs-number">8</span>, | |
| sigmas=DISTILLED_SIGMA_VALUES, | |
| guidance_scale=<span class="hljs-number">1.0</span>, | |
| generator=generator, | |
| output_type=<span class="hljs-string">"latent"</span>, | |
| return_dict=<span class="hljs-literal">False</span>, | |
| ) | |
| latent_upsampler = LTX2LatentUpsamplerModel.from_pretrained( | |
| model_path, | |
| subfolder=<span class="hljs-string">"latent_upsampler"</span>, | |
| torch_dtype=torch.bfloat16, | |
| ) | |
| upsample_pipe = LTX2LatentUpsamplePipeline(vae=pipe.vae, latent_upsampler=latent_upsampler) | |
| upsample_pipe.enable_model_cpu_offload(device=device) | |
| upscaled_video_latent = upsample_pipe( | |
| latents=video_latent, | |
| output_type=<span class="hljs-string">"latent"</span>, | |
| return_dict=<span class="hljs-literal">False</span>, | |
| )[<span class="hljs-number">0</span>] | |
| video, audio = pipe( | |
| latents=upscaled_video_latent, | |
| audio_latents=audio_latent, | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| num_inference_steps=<span class="hljs-number">3</span>, | |
| noise_scale=STAGE_2_DISTILLED_SIGMA_VALUES[<span class="hljs-number">0</span>], <span class="hljs-comment"># renoise with first sigma value https://github.com/Lightricks/LTX-2/blob/main/packages/ltx-pipelines/src/ltx_pipelines/distilled.py#L178</span> | |
| sigmas=STAGE_2_DISTILLED_SIGMA_VALUES, | |
| generator=generator, | |
| guidance_scale=<span class="hljs-number">1.0</span>, | |
| output_type=<span class="hljs-string">"np"</span>, | |
| return_dict=<span class="hljs-literal">False</span>, | |
| ) | |
| encode_video( | |
| video[<span class="hljs-number">0</span>], | |
| fps=frame_rate, | |
| audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(), | |
| audio_sample_rate=pipe.vocoder.config.output_sampling_rate, | |
| output_path=<span class="hljs-string">"ltx2_distilled_sample.mp4"</span>, | |
| )`,lang:"py",wrap:!1}}),fe=new x({props:{title:"Condition Pipeline Generation",local:"condition-pipeline-generation",headingTag:"h2"}}),_e=new Y({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYMkNvbmRpdGlvblBpcGVsaW5lJTJDJTIwTFRYMkxhdGVudFVwc2FtcGxlUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyLmxhdGVudF91cHNhbXBsZXIlMjBpbXBvcnQlMjBMVFgyTGF0ZW50VXBzYW1wbGVyTW9kZWwlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyLnBpcGVsaW5lX2x0eDJfY29uZGl0aW9uJTIwaW1wb3J0JTIwTFRYMlZpZGVvQ29uZGl0aW9uJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMubHR4Mi51dGlscyUyMGltcG9ydCUyMERJU1RJTExFRF9TSUdNQV9WQUxVRVMlMkMlMjBTVEFHRV8yX0RJU1RJTExFRF9TSUdNQV9WQUxVRVMlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZW5jb2RlX3ZpZGVvJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGxvYWRfaW1hZ2UlMEElMEFkZXZpY2UlMjAlM0QlMjAlMjJjdWRhJTIyJTBBd2lkdGglMjAlM0QlMjA3NjglMEFoZWlnaHQlMjAlM0QlMjA1MTIlMEFyYW5kb21fc2VlZCUyMCUzRCUyMDQyJTBBZ2VuZXJhdG9yJTIwJTNEJTIwdG9yY2guR2VuZXJhdG9yKGRldmljZSkubWFudWFsX3NlZWQocmFuZG9tX3NlZWQpJTBBbW9kZWxfcGF0aCUyMCUzRCUyMCUyMnJvb3RvbmNoYWlyJTJGTFRYLTItMTliLWRpc3RpbGxlZCUyMiUwQSUwQXBpcGUlMjAlM0QlMjBMVFgyQ29uZGl0aW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX3BhdGglMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQXBpcGUuZW5hYmxlX3NlcXVlbnRpYWxfY3B1X29mZmxvYWQoZGV2aWNlJTNEZGV2aWNlKSUwQXBpcGUudmFlLmVuYWJsZV90aWxpbmcoKSUwQSUwQXByb21wdCUyMCUzRCUyMCglMEElMjAlMjAlMjAlMjAlMjJDRyUyMGFuaW1hdGlvbiUyMHN0eWxlJTJDJTIwYSUyMHNtYWxsJTIwYmx1ZSUyMGJpcmQlMjB0YWtlcyUyMG9mZiUyMGZyb20lMjB0aGUlMjBncm91bmQlMkMlMjBmbGFwcGluZyUyMGl0cyUyMHdpbmdzLiUyMFRoZSUyMGJpcmQncyUyMGZlYXRoZXJzJTIwYXJlJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIyZGVsaWNhdGUlMkMlMjB3aXRoJTIwYSUyMHVuaXF1ZSUyMHBhdHRlcm4lMjBvbiUyMGl0cyUyMGNoZXN0LiUyMFRoZSUyMGJhY2tncm91bmQlMjBzaG93cyUyMGElMjBibHVlJTIwc2t5JTIwd2l0aCUyMHdoaXRlJTIwY2xvdWRzJTIwdW5kZXIlMjBicmlnaHQlMjAlMjIlMEElMjAlMjAlMjAlMjAlMjJzdW5zaGluZS4lMjBUaGUlMjBjYW1lcmElMjBmb2xsb3dzJTIwdGhlJTIwYmlyZCUyMHVwd2FyZCUyQyUyMGNhcHR1cmluZyUyMGl0cyUyMGZsaWdodCUyMGFuZCUyMHRoZSUyMHZhc3RuZXNzJTIwb2YlMjB0aGUlMjBza3klMjBmcm9tJTIwYSUyMGNsb3NlLXVwJTJDJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIybG93LWFuZ2xlJTIwcGVyc3BlY3RpdmUuJTIyJTBBKSUwQSUwQWZpcnN0X2ltYWdlJTIwJTNEJTIwbG9hZF9pbWFnZSglMEElMjAlMjAlMjAlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZodWdnaW5nZmFjZSUyRmRvY3VtZW50YXRpb24taW1hZ2VzJTJGcmVzb2x2ZSUyRm1haW4lMkZkaWZmdXNlcnMlMkZmbGYydl9pbnB1dF9maXJzdF9mcmFtZS5wbmclMjIlMkMlMEEpJTBBbGFzdF9pbWFnZSUyMCUzRCUyMGxvYWRfaW1hZ2UoJTBBJTIwJTIwJTIwJTIwJTIyaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmRhdGFzZXRzJTJGaHVnZ2luZ2ZhY2UlMkZkb2N1bWVudGF0aW9uLWltYWdlcyUyRnJlc29sdmUlMkZtYWluJTJGZGlmZnVzZXJzJTJGZmxmMnZfaW5wdXRfbGFzdF9mcmFtZS5wbmclMjIlMkMlMEEpJTBBZmlyc3RfY29uZCUyMCUzRCUyMExUWDJWaWRlb0NvbmRpdGlvbihmcmFtZXMlM0RmaXJzdF9pbWFnZSUyQyUyMGluZGV4JTNEMCUyQyUyMHN0cmVuZ3RoJTNEMS4wKSUwQWxhc3RfY29uZCUyMCUzRCUyMExUWDJWaWRlb0NvbmRpdGlvbihmcmFtZXMlM0RsYXN0X2ltYWdlJTJDJTIwaW5kZXglM0QtMSUyQyUyMHN0cmVuZ3RoJTNEMS4wKSUwQWNvbmRpdGlvbnMlMjAlM0QlMjAlNUJmaXJzdF9jb25kJTJDJTIwbGFzdF9jb25kJTVEJTBBJTBBZnJhbWVfcmF0ZSUyMCUzRCUyMDI0LjAlMEF2aWRlb19sYXRlbnQlMkMlMjBhdWRpb19sYXRlbnQlMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMGNvbmRpdGlvbnMlM0Rjb25kaXRpb25zJTJDJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwd2lkdGglM0R3aWR0aCUyQyUwQSUyMCUyMCUyMCUyMGhlaWdodCUzRGhlaWdodCUyQyUwQSUyMCUyMCUyMCUyMG51bV9mcmFtZXMlM0QxMjElMkMlMEElMjAlMjAlMjAlMjBmcmFtZV9yYXRlJTNEZnJhbWVfcmF0ZSUyQyUwQSUyMCUyMCUyMCUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0Q4JTJDJTBBJTIwJTIwJTIwJTIwc2lnbWFzJTNERElTVElMTEVEX1NJR01BX1ZBTFVFUyUyQyUwQSUyMCUyMCUyMCUyMGd1aWRhbmNlX3NjYWxlJTNEMS4wJTJDJTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yJTNEZ2VuZXJhdG9yJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3R5cGUlM0QlMjJsYXRlbnQlMjIlMkMlMEElMjAlMjAlMjAlMjByZXR1cm5fZGljdCUzREZhbHNlJTJDJTBBKSUwQSUwQWxhdGVudF91cHNhbXBsZXIlMjAlM0QlMjBMVFgyTGF0ZW50VXBzYW1wbGVyTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX3BhdGglMkMlMEElMjAlMjAlMjAlMjBzdWJmb2xkZXIlM0QlMjJsYXRlbnRfdXBzYW1wbGVyJTIyJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUyQyUwQSklMEF1cHNhbXBsZV9waXBlJTIwJTNEJTIwTFRYMkxhdGVudFVwc2FtcGxlUGlwZWxpbmUodmFlJTNEcGlwZS52YWUlMkMlMjBsYXRlbnRfdXBzYW1wbGVyJTNEbGF0ZW50X3Vwc2FtcGxlciklMEF1cHNhbXBsZV9waXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZChkZXZpY2UlM0RkZXZpY2UpJTBBdXBzY2FsZWRfdmlkZW9fbGF0ZW50JTIwJTNEJTIwdXBzYW1wbGVfcGlwZSglMEElMjAlMjAlMjAlMjBsYXRlbnRzJTNEdmlkZW9fbGF0ZW50JTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3R5cGUlM0QlMjJsYXRlbnQlMjIlMkMlMEElMjAlMjAlMjAlMjByZXR1cm5fZGljdCUzREZhbHNlJTJDJTBBKSU1QjAlNUQlMEElMEF2aWRlbyUyQyUyMGF1ZGlvJTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBsYXRlbnRzJTNEdXBzY2FsZWRfdmlkZW9fbGF0ZW50JTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fbGF0ZW50cyUzRGF1ZGlvX2xhdGVudCUyQyUwQSUyMCUyMCUyMCUyMHByb21wdCUzRHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNEd2lkdGglMjAqJTIwMiUyQyUwQSUyMCUyMCUyMCUyMGhlaWdodCUzRGhlaWdodCUyMColMjAyJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDMlMkMlMEElMjAlMjAlMjAlMjBzaWdtYXMlM0RTVEFHRV8yX0RJU1RJTExFRF9TSUdNQV9WQUxVRVMlMkMlMEElMjAlMjAlMjAlMjBnZW5lcmF0b3IlM0RnZW5lcmF0b3IlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDEuMCUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF90eXBlJTNEJTIybnAlMjIlMkMlMEElMjAlMjAlMjAlMjByZXR1cm5fZGljdCUzREZhbHNlJTJDJTBBKSUwQSUwQWVuY29kZV92aWRlbyglMEElMjAlMjAlMjAlMjB2aWRlbyU1QjAlNUQlMkMlMEElMjAlMjAlMjAlMjBmcHMlM0RmcmFtZV9yYXRlJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW8lM0RhdWRpbyU1QjAlNUQuZmxvYXQoKS5jcHUoKSUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX3NhbXBsZV9yYXRlJTNEcGlwZS52b2NvZGVyLmNvbmZpZy5vdXRwdXRfc2FtcGxpbmdfcmF0ZSUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF9wYXRoJTNEJTIybHR4Ml9kaXN0aWxsZWRfZmxmMnYubXA0JTIyJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2ConditionPipeline, LTX2LatentUpsamplePipeline | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.latent_upsampler <span class="hljs-keyword">import</span> LTX2LatentUpsamplerModel | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.pipeline_ltx2_condition <span class="hljs-keyword">import</span> LTX2VideoCondition | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.utils <span class="hljs-keyword">import</span> DISTILLED_SIGMA_VALUES, STAGE_2_DISTILLED_SIGMA_VALUES | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> encode_video | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image | |
| device = <span class="hljs-string">"cuda"</span> | |
| width = <span class="hljs-number">768</span> | |
| height = <span class="hljs-number">512</span> | |
| random_seed = <span class="hljs-number">42</span> | |
| generator = torch.Generator(device).manual_seed(random_seed) | |
| model_path = <span class="hljs-string">"rootonchair/LTX-2-19b-distilled"</span> | |
| pipe = LTX2ConditionPipeline.from_pretrained(model_path, torch_dtype=torch.bfloat16) | |
| pipe.enable_sequential_cpu_offload(device=device) | |
| pipe.vae.enable_tiling() | |
| prompt = ( | |
| <span class="hljs-string">"CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird's feathers are "</span> | |
| <span class="hljs-string">"delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright "</span> | |
| <span class="hljs-string">"sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, "</span> | |
| <span class="hljs-string">"low-angle perspective."</span> | |
| ) | |
| first_image = load_image( | |
| <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_first_frame.png"</span>, | |
| ) | |
| last_image = load_image( | |
| <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_last_frame.png"</span>, | |
| ) | |
| first_cond = LTX2VideoCondition(frames=first_image, index=<span class="hljs-number">0</span>, strength=<span class="hljs-number">1.0</span>) | |
| last_cond = LTX2VideoCondition(frames=last_image, index=-<span class="hljs-number">1</span>, strength=<span class="hljs-number">1.0</span>) | |
| conditions = [first_cond, last_cond] | |
| frame_rate = <span class="hljs-number">24.0</span> | |
| video_latent, audio_latent = pipe( | |
| conditions=conditions, | |
| prompt=prompt, | |
| width=width, | |
| height=height, | |
| num_frames=<span class="hljs-number">121</span>, | |
| frame_rate=frame_rate, | |
| num_inference_steps=<span class="hljs-number">8</span>, | |
| sigmas=DISTILLED_SIGMA_VALUES, | |
| guidance_scale=<span class="hljs-number">1.0</span>, | |
| generator=generator, | |
| output_type=<span class="hljs-string">"latent"</span>, | |
| return_dict=<span class="hljs-literal">False</span>, | |
| ) | |
| latent_upsampler = LTX2LatentUpsamplerModel.from_pretrained( | |
| model_path, | |
| subfolder=<span class="hljs-string">"latent_upsampler"</span>, | |
| torch_dtype=torch.bfloat16, | |
| ) | |
| upsample_pipe = LTX2LatentUpsamplePipeline(vae=pipe.vae, latent_upsampler=latent_upsampler) | |
| upsample_pipe.enable_model_cpu_offload(device=device) | |
| upscaled_video_latent = upsample_pipe( | |
| latents=video_latent, | |
| output_type=<span class="hljs-string">"latent"</span>, | |
| return_dict=<span class="hljs-literal">False</span>, | |
| )[<span class="hljs-number">0</span>] | |
| video, audio = pipe( | |
| latents=upscaled_video_latent, | |
| audio_latents=audio_latent, | |
| prompt=prompt, | |
| width=width * <span class="hljs-number">2</span>, | |
| height=height * <span class="hljs-number">2</span>, | |
| num_inference_steps=<span class="hljs-number">3</span>, | |
| sigmas=STAGE_2_DISTILLED_SIGMA_VALUES, | |
| generator=generator, | |
| guidance_scale=<span class="hljs-number">1.0</span>, | |
| output_type=<span class="hljs-string">"np"</span>, | |
| return_dict=<span class="hljs-literal">False</span>, | |
| ) | |
| encode_video( | |
| video[<span class="hljs-number">0</span>], | |
| fps=frame_rate, | |
| audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(), | |
| audio_sample_rate=pipe.vocoder.config.output_sampling_rate, | |
| output_path=<span class="hljs-string">"ltx2_distilled_flf2v.mp4"</span>, | |
| )`,lang:"py",wrap:!1}}),ye=new Y({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYMkNvbmRpdGlvblBpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMubHR4Mi5waXBlbGluZV9sdHgyX2NvbmRpdGlvbiUyMGltcG9ydCUyMExUWDJWaWRlb0NvbmRpdGlvbiUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBlbmNvZGVfdmlkZW8lMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyLnV0aWxzJTIwaW1wb3J0JTIwREVGQVVMVF9ORUdBVElWRV9QUk9NUFQlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwbG9hZF9pbWFnZSUyQyUyMGxvYWRfdmlkZW8lMEElMEFkZXZpY2UlMjAlM0QlMjAlMjJjdWRhJTIyJTBBd2lkdGglMjAlM0QlMjA3NjglMEFoZWlnaHQlMjAlM0QlMjA1MTIlMEFyYW5kb21fc2VlZCUyMCUzRCUyMDQyJTBBZ2VuZXJhdG9yJTIwJTNEJTIwdG9yY2guR2VuZXJhdG9yKGRldmljZSkubWFudWFsX3NlZWQocmFuZG9tX3NlZWQpJTBBbW9kZWxfcGF0aCUyMCUzRCUyMCUyMnJvb3RvbmNoYWlyJTJGTFRYLTItMTliLWRpc3RpbGxlZCUyMiUwQSUwQXBpcGUlMjAlM0QlMjBMVFgyQ29uZGl0aW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX3BhdGglMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQXBpcGUuZW5hYmxlX3NlcXVlbnRpYWxfY3B1X29mZmxvYWQoZGV2aWNlJTNEZGV2aWNlKSUwQXBpcGUudmFlLmVuYWJsZV90aWxpbmcoKSUwQSUwQXByb21wdCUyMCUzRCUyMCglMEElMjAlMjAlMjAlMjAlMjJUaGUlMjB2aWRlbyUyMGRlcGljdHMlMjBhJTIwbG9uZyUyQyUyMHN0cmFpZ2h0JTIwaGlnaHdheSUyMHN0cmV0Y2hpbmclMjBpbnRvJTIwdGhlJTIwZGlzdGFuY2UlMkMlMjBmbGFua2VkJTIwYnklMjBtZXRhbCUyMGd1YXJkcmFpbHMuJTIwVGhlJTIwcm9hZCUyMGlzJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIyZGl2aWRlZCUyMGludG8lMjBtdWx0aXBsZSUyMGxhbmVzJTJDJTIwd2l0aCUyMGElMjBmZXclMjB2ZWhpY2xlcyUyMHZpc2libGUlMjBpbiUyMHRoZSUyMGZhciUyMGRpc3RhbmNlLiUyMFRoZSUyMHN1cnJvdW5kaW5nJTIwbGFuZHNjYXBlJTIwZmVhdHVyZXMlMjAlMjIlMEElMjAlMjAlMjAlMjAlMjJkcnklMkMlMjBncmFzc3klMjBmaWVsZHMlMjBvbiUyMG9uZSUyMHNpZGUlMjBhbmQlMjByb2xsaW5nJTIwaGlsbHMlMjBvbiUyMHRoZSUyMG90aGVyLiUyMFRoZSUyMHNreSUyMGlzJTIwbW9zdGx5JTIwY2xlYXIlMjB3aXRoJTIwYSUyMGZldyUyMHNjYXR0ZXJlZCUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMmNsb3VkcyUyQyUyMHN1Z2dlc3RpbmclMjBhJTIwYnJpZ2h0JTJDJTIwc3VubnklMjBkYXkuJTIwQW5kJTIwdGhlbiUyMHRoZSUyMGNhbWVyYSUyMHN3aXRjaCUyMHRvJTIwYSUyMHdpbmRpbmclMjBtb3VudGFpbiUyMHJvYWQlMjBjb3ZlcmVkJTIwaW4lMjBzbm93JTJDJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIyd2l0aCUyMGElMjBzaW5nbGUlMjB2ZWhpY2xlJTIwdHJhdmVsaW5nJTIwYWxvbmclMjBpdC4lMjBUaGUlMjByb2FkJTIwaXMlMjBmbGFua2VkJTIwYnklMjBzdGVlcCUyQyUyMHJvY2t5JTIwY2xpZmZzJTIwYW5kJTIwc3BhcnNlJTIwdmVnZXRhdGlvbi4lMjBUaGUlMjAlMjIlMEElMjAlMjAlMjAlMjAlMjJsYW5kc2NhcGUlMjBpcyUyMGNoYXJhY3Rlcml6ZWQlMjBieSUyMHJ1Z2dlZCUyMHRlcnJhaW4lMjBhbmQlMjBhJTIwcml2ZXIlMjB2aXNpYmxlJTIwaW4lMjB0aGUlMjBkaXN0YW5jZS4lMjBUaGUlMjBzY2VuZSUyMGNhcHR1cmVzJTIwdGhlJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIyc29saXR1ZGUlMjBhbmQlMjBiZWF1dHklMjBvZiUyMGElMjB3aW50ZXIlMjBkcml2ZSUyMHRocm91Z2glMjBhJTIwbW91bnRhaW5vdXMlMjByZWdpb24uJTIyJTBBKSUwQSUwQWNvbmRfdmlkZW8lMjAlM0QlMjBsb2FkX3ZpZGVvKCUwQSUyMCUyMCUyMCUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZkYXRhc2V0cyUyRmh1Z2dpbmdmYWNlJTJGZG9jdW1lbnRhdGlvbi1pbWFnZXMlMkZyZXNvbHZlJTJGbWFpbiUyRmRpZmZ1c2VycyUyRmNvc21vcyUyRmNvc21vcy12aWRlbzJ3b3JsZC1pbnB1dC12aWQubXA0JTIyJTBBKSUwQWNvbmRfaW1hZ2UlMjAlM0QlMjBsb2FkX2ltYWdlKCUwQSUyMCUyMCUyMCUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZkYXRhc2V0cyUyRmh1Z2dpbmdmYWNlJTJGZG9jdW1lbnRhdGlvbi1pbWFnZXMlMkZyZXNvbHZlJTJGbWFpbiUyRmRpZmZ1c2VycyUyRmNvc21vcyUyRmNvc21vcy12aWRlbzJ3b3JsZC1pbnB1dC5qcGclMjIlMEEpJTBBdmlkZW9fY29uZCUyMCUzRCUyMExUWDJWaWRlb0NvbmRpdGlvbihmcmFtZXMlM0Rjb25kX3ZpZGVvJTJDJTIwaW5kZXglM0QwJTJDJTIwc3RyZW5ndGglM0QxLjApJTBBaW1hZ2VfY29uZCUyMCUzRCUyMExUWDJWaWRlb0NvbmRpdGlvbihmcmFtZXMlM0Rjb25kX2ltYWdlJTJDJTIwaW5kZXglM0Q4JTJDJTIwc3RyZW5ndGglM0QxLjApJTBBY29uZGl0aW9ucyUyMCUzRCUyMCU1QnZpZGVvX2NvbmQlMkMlMjBpbWFnZV9jb25kJTVEJTBBJTBBZnJhbWVfcmF0ZSUyMCUzRCUyMDI0LjAlMEF2aWRlbyUyQyUyMGF1ZGlvJTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBjb25kaXRpb25zJTNEY29uZGl0aW9ucyUyQyUwQSUyMCUyMCUyMCUyMHByb21wdCUzRHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMG5lZ2F0aXZlX3Byb21wdCUzRERFRkFVTFRfTkVHQVRJVkVfUFJPTVBUJTJDJTBBJTIwJTIwJTIwJTIwd2lkdGglM0R3aWR0aCUyQyUwQSUyMCUyMCUyMCUyMGhlaWdodCUzRGhlaWdodCUyQyUwQSUyMCUyMCUyMCUyMG51bV9mcmFtZXMlM0QxMjElMkMlMEElMjAlMjAlMjAlMjBmcmFtZV9yYXRlJTNEZnJhbWVfcmF0ZSUyQyUwQSUyMCUyMCUyMCUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0Q0MCUyQyUwQSUyMCUyMCUyMCUyMGd1aWRhbmNlX3NjYWxlJTNENC4wJTJDJTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yJTNEZ2VuZXJhdG9yJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3R5cGUlM0QlMjJucCUyMiUyQyUwQSUyMCUyMCUyMCUyMHJldHVybl9kaWN0JTNERmFsc2UlMkMlMEEpJTBBJTBBZW5jb2RlX3ZpZGVvKCUwQSUyMCUyMCUyMCUyMHZpZGVvJTVCMCU1RCUyQyUwQSUyMCUyMCUyMCUyMGZwcyUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBhdWRpbyUzRGF1ZGlvJTVCMCU1RC5mbG9hdCgpLmNwdSgpJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fc2FtcGxlX3JhdGUlM0RwaXBlLnZvY29kZXIuY29uZmlnLm91dHB1dF9zYW1wbGluZ19yYXRlJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3BhdGglM0QlMjJsdHgyX2NvbmRfdmlkZW8ubXA0JTIyJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2ConditionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.pipeline_ltx2_condition <span class="hljs-keyword">import</span> LTX2VideoCondition | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> encode_video | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.utils <span class="hljs-keyword">import</span> DEFAULT_NEGATIVE_PROMPT | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, load_video | |
| device = <span class="hljs-string">"cuda"</span> | |
| width = <span class="hljs-number">768</span> | |
| height = <span class="hljs-number">512</span> | |
| random_seed = <span class="hljs-number">42</span> | |
| generator = torch.Generator(device).manual_seed(random_seed) | |
| model_path = <span class="hljs-string">"rootonchair/LTX-2-19b-distilled"</span> | |
| pipe = LTX2ConditionPipeline.from_pretrained(model_path, torch_dtype=torch.bfloat16) | |
| pipe.enable_sequential_cpu_offload(device=device) | |
| pipe.vae.enable_tiling() | |
| prompt = ( | |
| <span class="hljs-string">"The video depicts a long, straight highway stretching into the distance, flanked by metal guardrails. The road is "</span> | |
| <span class="hljs-string">"divided into multiple lanes, with a few vehicles visible in the far distance. The surrounding landscape features "</span> | |
| <span class="hljs-string">"dry, grassy fields on one side and rolling hills on the other. The sky is mostly clear with a few scattered "</span> | |
| <span class="hljs-string">"clouds, suggesting a bright, sunny day. And then the camera switch to a winding mountain road covered in snow, "</span> | |
| <span class="hljs-string">"with a single vehicle traveling along it. The road is flanked by steep, rocky cliffs and sparse vegetation. The "</span> | |
| <span class="hljs-string">"landscape is characterized by rugged terrain and a river visible in the distance. The scene captures the "</span> | |
| <span class="hljs-string">"solitude and beauty of a winter drive through a mountainous region."</span> | |
| ) | |
| cond_video = load_video( | |
| <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cosmos/cosmos-video2world-input-vid.mp4"</span> | |
| ) | |
| cond_image = load_image( | |
| <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cosmos/cosmos-video2world-input.jpg"</span> | |
| ) | |
| video_cond = LTX2VideoCondition(frames=cond_video, index=<span class="hljs-number">0</span>, strength=<span class="hljs-number">1.0</span>) | |
| image_cond = LTX2VideoCondition(frames=cond_image, index=<span class="hljs-number">8</span>, strength=<span class="hljs-number">1.0</span>) | |
| conditions = [video_cond, image_cond] | |
| frame_rate = <span class="hljs-number">24.0</span> | |
| video, audio = pipe( | |
| conditions=conditions, | |
| prompt=prompt, | |
| negative_prompt=DEFAULT_NEGATIVE_PROMPT, | |
| width=width, | |
| height=height, | |
| num_frames=<span class="hljs-number">121</span>, | |
| frame_rate=frame_rate, | |
| num_inference_steps=<span class="hljs-number">40</span>, | |
| guidance_scale=<span class="hljs-number">4.0</span>, | |
| generator=generator, | |
| output_type=<span class="hljs-string">"np"</span>, | |
| return_dict=<span class="hljs-literal">False</span>, | |
| ) | |
| encode_video( | |
| video[<span class="hljs-number">0</span>], | |
| fps=frame_rate, | |
| audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(), | |
| audio_sample_rate=pipe.vocoder.config.output_sampling_rate, | |
| output_path=<span class="hljs-string">"ltx2_cond_video.mp4"</span>, | |
| )`,lang:"py",wrap:!1}}),we=new x({props:{title:"Multimodal Guidance",local:"multimodal-guidance",headingTag:"h2"}}),ve=new Y({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYMkltYWdlVG9WaWRlb1BpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGVuY29kZV92aWRlbyUwQWZyb20lMjBkaWZmdXNlcnMucGlwZWxpbmVzLmx0eDIudXRpbHMlMjBpbXBvcnQlMjBERUZBVUxUX05FR0FUSVZFX1BST01QVCUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBsb2FkX2ltYWdlJTBBJTBBZGV2aWNlJTIwJTNEJTIwJTIyY3VkYSUyMiUwQXdpZHRoJTIwJTNEJTIwNzY4JTBBaGVpZ2h0JTIwJTNEJTIwNTEyJTBBcmFuZG9tX3NlZWQlMjAlM0QlMjA0MiUwQWZyYW1lX3JhdGUlMjAlM0QlMjAyNC4wJTBBZ2VuZXJhdG9yJTIwJTNEJTIwdG9yY2guR2VuZXJhdG9yKGRldmljZSkubWFudWFsX3NlZWQocmFuZG9tX3NlZWQpJTBBbW9kZWxfcGF0aCUyMCUzRCUyMCUyMmRpZmZ1c2VycyUyRkxUWC0yLjMtRGlmZnVzZXJzJTIyJTBBJTBBcGlwZSUyMCUzRCUyMExUWDJJbWFnZVRvVmlkZW9QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfcGF0aCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYpJTBBcGlwZS5lbmFibGVfc2VxdWVudGlhbF9jcHVfb2ZmbG9hZChkZXZpY2UlM0RkZXZpY2UpJTBBcGlwZS52YWUuZW5hYmxlX3RpbGluZygpJTBBJTBBcHJvbXB0JTIwJTNEJTIwKCUwQSUyMCUyMCUyMCUyMCUyMkFuJTIwYXN0cm9uYXV0JTIwaGF0Y2hlcyUyMGZyb20lMjBhJTIwZnJhZ2lsZSUyMGVnZyUyMG9uJTIwdGhlJTIwc3VyZmFjZSUyMG9mJTIwdGhlJTIwTW9vbiUyQyUyMHRoZSUyMHNoZWxsJTIwY3JhY2tpbmclMjBhbmQlMjBwZWVsaW5nJTIwYXBhcnQlMjBpbiUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMmdlbnRsZSUyMGxvdy1ncmF2aXR5JTIwbW90aW9uLiUyMEZpbmUlMjBsdW5hciUyMGR1c3QlMjBsaWZ0cyUyMGFuZCUyMGRyaWZ0cyUyMG91dHdhcmQlMjB3aXRoJTIwZWFjaCUyMG1vdmVtZW50JTJDJTIwZmxvYXRpbmclMjBpbiUyMHNsb3clMjBhcmNzJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIyYmVmb3JlJTIwc2V0dGxpbmclMjBiYWNrJTIwb250byUyMHRoZSUyMGdyb3VuZC4lMjBUaGUlMjBhc3Ryb25hdXQlMjBwdXNoZXMlMjBmcmVlJTIwaW4lMjBhJTIwZGVsaWJlcmF0ZSUyQyUyMHdlaWdodGxlc3MlMjBtb3Rpb24lMkMlMjBzbWFsbCUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMmZyYWdtZW50cyUyMG9mJTIwdGhlJTIwZWdnJTIwdHVtYmxpbmclMjBhbmQlMjBzcGlubmluZyUyMHRocm91Z2glMjB0aGUlMjBhaXIuJTIwSW4lMjB0aGUlMjBiYWNrZ3JvdW5kJTJDJTIwdGhlJTIwZGVlcCUyMGRhcmtuZXNzJTIwb2YlMjBzcGFjZSUyMHN1YnRseSUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMnNoaWZ0cyUyMGFzJTIwc3RhcnMlMjBnbGlkZSUyMHdpdGglMjB0aGUlMjBjYW1lcmEncyUyMG1vdmVtZW50JTJDJTIwZW1waGFzaXppbmclMjB2YXN0JTIwZGVwdGglMjBhbmQlMjBzY2FsZS4lMjBUaGUlMjBjYW1lcmElMjBwZXJmb3JtcyUyMGElMjAlMjIlMEElMjAlMjAlMjAlMjAlMjJzbW9vdGglMkMlMjBjaW5lbWF0aWMlMjBzbG93JTIwcHVzaC1pbiUyQyUyMHdpdGglMjBuYXR1cmFsJTIwcGFyYWxsYXglMjBiZXR3ZWVuJTIwdGhlJTIwZm9yZWdyb3VuZCUyMGR1c3QlMkMlMjB0aGUlMjBhc3Ryb25hdXQlMkMlMjBhbmQlMjB0aGUlMjAlMjIlMEElMjAlMjAlMjAlMjAlMjJkaXN0YW50JTIwc3RhcmZpZWxkLiUyMFVsdHJhLXJlYWxpc3RpYyUyMGRldGFpbCUyQyUyMHBoeXNpY2FsbHklMjBhY2N1cmF0ZSUyMGxvdy1ncmF2aXR5JTIwbW90aW9uJTJDJTIwY2luZW1hdGljJTIwbGlnaHRpbmclMkMlMjBhbmQlMjBhJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIyYnJlYXRoLXRha2luZyUyQyUyMG1vdmllLWxpa2UlMjBzaG90LiUyMiUwQSklMEElMEFpbWFnZSUyMCUzRCUyMGxvYWRfaW1hZ2UoJTBBJTIwJTIwJTIwJTIwJTIyaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmRhdGFzZXRzJTJGaHVnZ2luZ2ZhY2UlMkZkb2N1bWVudGF0aW9uLWltYWdlcyUyRnJlc29sdmUlMkZtYWluJTJGZGlmZnVzZXJzJTJGYXN0cm9uYXV0LmpwZyUyMiUyQyUwQSklMEElMEF2aWRlbyUyQyUyMGF1ZGlvJTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBpbWFnZSUzRGltYWdlJTJDJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbmVnYXRpdmVfcHJvbXB0JTNEREVGQVVMVF9ORUdBVElWRV9QUk9NUFQlMkMlMEElMjAlMjAlMjAlMjB3aWR0aCUzRHdpZHRoJTJDJTBBJTIwJTIwJTIwJTIwaGVpZ2h0JTNEaGVpZ2h0JTJDJTBBJTIwJTIwJTIwJTIwbnVtX2ZyYW1lcyUzRDEyMSUyQyUwQSUyMCUyMCUyMCUyMGZyYW1lX3JhdGUlM0RmcmFtZV9yYXRlJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDMwJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0QzLjAlMkMlMjAlMjAlMjMlMjBSZWNvbW1lbmRlZCUyMExUWC0yLjMlMjBndWlkYW5jZSUyMHBhcmFtZXRlcnMlMEElMjAlMjAlMjAlMjBzdGdfc2NhbGUlM0QxLjAlMkMlMjAlMjAlMjMlMjBOb3RlJTIwdGhhdCUyMDAuMCUyMChub3QlMjAxLjApJTIwbWVhbnMlMjB0aGF0JTIwU1RHJTIwaXMlMjBkaXNhYmxlZCUyMChhbGwlMjBvdGhlciUyMGd1aWRhbmNlJTIwaXMlMjBkaXNhYmxlZCUyMGF0JTIwMS4wKSUwQSUyMCUyMCUyMCUyMG1vZGFsaXR5X3NjYWxlJTNEMy4wJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2VfcmVzY2FsZSUzRDAuNyUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX2d1aWRhbmNlX3NjYWxlJTNENy4wJTJDJTIwJTIwJTIzJTIwTm90ZSUyMHRoYXQlMjBhJTIwaGlnaGVyJTIwQ0ZHJTIwZ3VpZGFuY2UlMjBzY2FsZSUyMGlzJTIwcmVjb21tZW5kZWQlMjBmb3IlMjBhdWRpbyUwQSUyMCUyMCUyMCUyMGF1ZGlvX3N0Z19zY2FsZSUzRDEuMCUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX21vZGFsaXR5X3NjYWxlJTNEMy4wJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fZ3VpZGFuY2VfcmVzY2FsZSUzRDAuNyUyQyUwQSUyMCUyMCUyMCUyMHNwYXRpb190ZW1wb3JhbF9ndWlkYW5jZV9ibG9ja3MlM0QlNUIyOCU1RCUyQyUwQSUyMCUyMCUyMCUyMHVzZV9jcm9zc190aW1lc3RlcCUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBnZW5lcmF0b3IlM0RnZW5lcmF0b3IlMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfdHlwZSUzRCUyMm5wJTIyJTJDJTBBJTIwJTIwJTIwJTIwcmV0dXJuX2RpY3QlM0RGYWxzZSUyQyUwQSklMEElMEFlbmNvZGVfdmlkZW8oJTBBJTIwJTIwJTIwJTIwdmlkZW8lNUIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwZnBzJTNEZnJhbWVfcmF0ZSUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvJTNEYXVkaW8lNUIwJTVELmZsb2F0KCkuY3B1KCklMkMlMEElMjAlMjAlMjAlMjBhdWRpb19zYW1wbGVfcmF0ZSUzRHBpcGUudm9jb2Rlci5jb25maWcub3V0cHV0X3NhbXBsaW5nX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfcGF0aCUzRCUyMmx0eDJfM19pMnZfc3RhZ2VfMS5tcDQlMjIlMkMlMEEp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2ImageToVideoPipeline | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> encode_video | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.utils <span class="hljs-keyword">import</span> DEFAULT_NEGATIVE_PROMPT | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image | |
| device = <span class="hljs-string">"cuda"</span> | |
| width = <span class="hljs-number">768</span> | |
| height = <span class="hljs-number">512</span> | |
| random_seed = <span class="hljs-number">42</span> | |
| frame_rate = <span class="hljs-number">24.0</span> | |
| generator = torch.Generator(device).manual_seed(random_seed) | |
| model_path = <span class="hljs-string">"diffusers/LTX-2.3-Diffusers"</span> | |
| pipe = LTX2ImageToVideoPipeline.from_pretrained(model_path, torch_dtype=torch.bfloat16) | |
| pipe.enable_sequential_cpu_offload(device=device) | |
| pipe.vae.enable_tiling() | |
| prompt = ( | |
| <span class="hljs-string">"An astronaut hatches from a fragile egg on the surface of the Moon, the shell cracking and peeling apart in "</span> | |
| <span class="hljs-string">"gentle low-gravity motion. Fine lunar dust lifts and drifts outward with each movement, floating in slow arcs "</span> | |
| <span class="hljs-string">"before settling back onto the ground. The astronaut pushes free in a deliberate, weightless motion, small "</span> | |
| <span class="hljs-string">"fragments of the egg tumbling and spinning through the air. In the background, the deep darkness of space subtly "</span> | |
| <span class="hljs-string">"shifts as stars glide with the camera's movement, emphasizing vast depth and scale. The camera performs a "</span> | |
| <span class="hljs-string">"smooth, cinematic slow push-in, with natural parallax between the foreground dust, the astronaut, and the "</span> | |
| <span class="hljs-string">"distant starfield. Ultra-realistic detail, physically accurate low-gravity motion, cinematic lighting, and a "</span> | |
| <span class="hljs-string">"breath-taking, movie-like shot."</span> | |
| ) | |
| image = load_image( | |
| <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg"</span>, | |
| ) | |
| video, audio = pipe( | |
| image=image, | |
| prompt=prompt, | |
| negative_prompt=DEFAULT_NEGATIVE_PROMPT, | |
| width=width, | |
| height=height, | |
| num_frames=<span class="hljs-number">121</span>, | |
| frame_rate=frame_rate, | |
| num_inference_steps=<span class="hljs-number">30</span>, | |
| guidance_scale=<span class="hljs-number">3.0</span>, <span class="hljs-comment"># Recommended LTX-2.3 guidance parameters</span> | |
| stg_scale=<span class="hljs-number">1.0</span>, <span class="hljs-comment"># Note that 0.0 (not 1.0) means that STG is disabled (all other guidance is disabled at 1.0)</span> | |
| modality_scale=<span class="hljs-number">3.0</span>, | |
| guidance_rescale=<span class="hljs-number">0.7</span>, | |
| audio_guidance_scale=<span class="hljs-number">7.0</span>, <span class="hljs-comment"># Note that a higher CFG guidance scale is recommended for audio</span> | |
| audio_stg_scale=<span class="hljs-number">1.0</span>, | |
| audio_modality_scale=<span class="hljs-number">3.0</span>, | |
| audio_guidance_rescale=<span class="hljs-number">0.7</span>, | |
| spatio_temporal_guidance_blocks=[<span class="hljs-number">28</span>], | |
| use_cross_timestep=<span class="hljs-literal">True</span>, | |
| generator=generator, | |
| output_type=<span class="hljs-string">"np"</span>, | |
| return_dict=<span class="hljs-literal">False</span>, | |
| ) | |
| encode_video( | |
| video[<span class="hljs-number">0</span>], | |
| fps=frame_rate, | |
| audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(), | |
| audio_sample_rate=pipe.vocoder.config.output_sampling_rate, | |
| output_path=<span class="hljs-string">"ltx2_3_i2v_stage_1.mp4"</span>, | |
| )`,lang:"py",wrap:!1}}),Ze=new x({props:{title:"Prompt Enhancement",local:"prompt-enhancement",headingTag:"h2"}}),Ie=new Y({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwR2VtbWEzUHJvY2Vzc29yJTBBZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMExUWDJQaXBlbGluZSUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBlbmNvZGVfdmlkZW8lMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyLnV0aWxzJTIwaW1wb3J0JTIwREVGQVVMVF9ORUdBVElWRV9QUk9NUFQlMkMlMjBUMlZfREVGQVVMVF9TWVNURU1fUFJPTVBUJTBBJTBBZGV2aWNlJTIwJTNEJTIwJTIyY3VkYSUyMiUwQXdpZHRoJTIwJTNEJTIwNzY4JTBBaGVpZ2h0JTIwJTNEJTIwNTEyJTBBcmFuZG9tX3NlZWQlMjAlM0QlMjA0MiUwQWZyYW1lX3JhdGUlMjAlM0QlMjAyNC4wJTBBZ2VuZXJhdG9yJTIwJTNEJTIwdG9yY2guR2VuZXJhdG9yKGRldmljZSkubWFudWFsX3NlZWQocmFuZG9tX3NlZWQpJTBBbW9kZWxfcGF0aCUyMCUzRCUyMCUyMmRpZmZ1c2VycyUyRkxUWC0yLjMtRGlmZnVzZXJzJTIyJTBBJTBBcGlwZSUyMCUzRCUyMExUWDJQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfcGF0aCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYpJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoZGV2aWNlJTNEZGV2aWNlKSUwQXBpcGUudmFlLmVuYWJsZV90aWxpbmcoKSUwQWlmJTIwZ2V0YXR0cihwaXBlJTJDJTIwJTIycHJvY2Vzc29yJTIyJTJDJTIwTm9uZSklMjBpcyUyME5vbmUlM0ElMEElMjAlMjAlMjAlMjBwcm9jZXNzb3IlMjAlM0QlMjBHZW1tYTNQcm9jZXNzb3IuZnJvbV9wcmV0cmFpbmVkKCUyMmdvb2dsZSUyRmdlbW1hLTMtMTJiLWl0LXFhdC1xNF8wLXVucXVhbnRpemVkJTIyKSUwQSUyMCUyMCUyMCUyMHBpcGUucHJvY2Vzc29yJTIwJTNEJTIwcHJvY2Vzc29yJTBBJTBBcHJvbXB0JTIwJTNEJTIwKCUwQSUyMCUyMCUyMCUyMCUyMkFuJTIwYXN0cm9uYXV0JTIwaGF0Y2hlcyUyMGZyb20lMjBhJTIwZnJhZ2lsZSUyMGVnZyUyMG9uJTIwdGhlJTIwc3VyZmFjZSUyMG9mJTIwdGhlJTIwTW9vbiUyQyUyMHRoZSUyMHNoZWxsJTIwY3JhY2tpbmclMjBhbmQlMjBwZWVsaW5nJTIwYXBhcnQlMjBpbiUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMmdlbnRsZSUyMGxvdy1ncmF2aXR5JTIwbW90aW9uLiUyMEZpbmUlMjBsdW5hciUyMGR1c3QlMjBsaWZ0cyUyMGFuZCUyMGRyaWZ0cyUyMG91dHdhcmQlMjB3aXRoJTIwZWFjaCUyMG1vdmVtZW50JTJDJTIwZmxvYXRpbmclMjBpbiUyMHNsb3clMjBhcmNzJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIyYmVmb3JlJTIwc2V0dGxpbmclMjBiYWNrJTIwb250byUyMHRoZSUyMGdyb3VuZC4lMjBUaGUlMjBhc3Ryb25hdXQlMjBwdXNoZXMlMjBmcmVlJTIwaW4lMjBhJTIwZGVsaWJlcmF0ZSUyQyUyMHdlaWdodGxlc3MlMjBtb3Rpb24lMkMlMjBzbWFsbCUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMmZyYWdtZW50cyUyMG9mJTIwdGhlJTIwZWdnJTIwdHVtYmxpbmclMjBhbmQlMjBzcGlubmluZyUyMHRocm91Z2glMjB0aGUlMjBhaXIuJTIwSW4lMjB0aGUlMjBiYWNrZ3JvdW5kJTJDJTIwdGhlJTIwZGVlcCUyMGRhcmtuZXNzJTIwb2YlMjBzcGFjZSUyMHN1YnRseSUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMnNoaWZ0cyUyMGFzJTIwc3RhcnMlMjBnbGlkZSUyMHdpdGglMjB0aGUlMjBjYW1lcmEncyUyMG1vdmVtZW50JTJDJTIwZW1waGFzaXppbmclMjB2YXN0JTIwZGVwdGglMjBhbmQlMjBzY2FsZS4lMjBUaGUlMjBjYW1lcmElMjBwZXJmb3JtcyUyMGElMjAlMjIlMEElMjAlMjAlMjAlMjAlMjJzbW9vdGglMkMlMjBjaW5lbWF0aWMlMjBzbG93JTIwcHVzaC1pbiUyQyUyMHdpdGglMjBuYXR1cmFsJTIwcGFyYWxsYXglMjBiZXR3ZWVuJTIwdGhlJTIwZm9yZWdyb3VuZCUyMGR1c3QlMkMlMjB0aGUlMjBhc3Ryb25hdXQlMkMlMjBhbmQlMjB0aGUlMjAlMjIlMEElMjAlMjAlMjAlMjAlMjJkaXN0YW50JTIwc3RhcmZpZWxkLiUyMFVsdHJhLXJlYWxpc3RpYyUyMGRldGFpbCUyQyUyMHBoeXNpY2FsbHklMjBhY2N1cmF0ZSUyMGxvdy1ncmF2aXR5JTIwbW90aW9uJTJDJTIwY2luZW1hdGljJTIwbGlnaHRpbmclMkMlMjBhbmQlMjBhJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIyYnJlYXRoLXRha2luZyUyQyUyMG1vdmllLWxpa2UlMjBzaG90LiUyMiUwQSklMEElMEF2aWRlbyUyQyUyMGF1ZGlvJTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBuZWdhdGl2ZV9wcm9tcHQlM0RERUZBVUxUX05FR0FUSVZFX1BST01QVCUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNEd2lkdGglMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0RoZWlnaHQlMkMlMEElMjAlMjAlMjAlMjBudW1fZnJhbWVzJTNEMTIxJTJDJTBBJTIwJTIwJTIwJTIwZnJhbWVfcmF0ZSUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNEMzAlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDMuMCUyQyUwQSUyMCUyMCUyMCUyMHN0Z19zY2FsZSUzRDEuMCUyQyUwQSUyMCUyMCUyMCUyMG1vZGFsaXR5X3NjYWxlJTNEMy4wJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2VfcmVzY2FsZSUzRDAuNyUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX2d1aWRhbmNlX3NjYWxlJTNENy4wJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fc3RnX3NjYWxlJTNEMS4wJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fbW9kYWxpdHlfc2NhbGUlM0QzLjAlMkMlMEElMjAlMjAlMjAlMjBhdWRpb19ndWlkYW5jZV9yZXNjYWxlJTNEMC43JTJDJTBBJTIwJTIwJTIwJTIwc3BhdGlvX3RlbXBvcmFsX2d1aWRhbmNlX2Jsb2NrcyUzRCU1QjI4JTVEJTJDJTBBJTIwJTIwJTIwJTIwdXNlX2Nyb3NzX3RpbWVzdGVwJTNEVHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMHN5c3RlbV9wcm9tcHQlM0RUMlZfREVGQVVMVF9TWVNURU1fUFJPTVBUJTJDJTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yJTNEZ2VuZXJhdG9yJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3R5cGUlM0QlMjJucCUyMiUyQyUwQSUyMCUyMCUyMCUyMHJldHVybl9kaWN0JTNERmFsc2UlMkMlMEEpJTBBJTBBZW5jb2RlX3ZpZGVvKCUwQSUyMCUyMCUyMCUyMHZpZGVvJTVCMCU1RCUyQyUwQSUyMCUyMCUyMCUyMGZwcyUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBhdWRpbyUzRGF1ZGlvJTVCMCU1RC5mbG9hdCgpLmNwdSgpJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fc2FtcGxlX3JhdGUlM0RwaXBlLnZvY29kZXIuY29uZmlnLm91dHB1dF9zYW1wbGluZ19yYXRlJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3BhdGglM0QlMjJsdHgyXzNfdDJ2X3N0YWdlXzEubXA0JTIyJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> Gemma3Processor | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2Pipeline | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> encode_video | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.utils <span class="hljs-keyword">import</span> DEFAULT_NEGATIVE_PROMPT, T2V_DEFAULT_SYSTEM_PROMPT | |
| device = <span class="hljs-string">"cuda"</span> | |
| width = <span class="hljs-number">768</span> | |
| height = <span class="hljs-number">512</span> | |
| random_seed = <span class="hljs-number">42</span> | |
| frame_rate = <span class="hljs-number">24.0</span> | |
| generator = torch.Generator(device).manual_seed(random_seed) | |
| model_path = <span class="hljs-string">"diffusers/LTX-2.3-Diffusers"</span> | |
| pipe = LTX2Pipeline.from_pretrained(model_path, torch_dtype=torch.bfloat16) | |
| pipe.enable_model_cpu_offload(device=device) | |
| pipe.vae.enable_tiling() | |
| <span class="hljs-keyword">if</span> <span class="hljs-built_in">getattr</span>(pipe, <span class="hljs-string">"processor"</span>, <span class="hljs-literal">None</span>) <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>: | |
| processor = Gemma3Processor.from_pretrained(<span class="hljs-string">"google/gemma-3-12b-it-qat-q4_0-unquantized"</span>) | |
| pipe.processor = processor | |
| prompt = ( | |
| <span class="hljs-string">"An astronaut hatches from a fragile egg on the surface of the Moon, the shell cracking and peeling apart in "</span> | |
| <span class="hljs-string">"gentle low-gravity motion. Fine lunar dust lifts and drifts outward with each movement, floating in slow arcs "</span> | |
| <span class="hljs-string">"before settling back onto the ground. The astronaut pushes free in a deliberate, weightless motion, small "</span> | |
| <span class="hljs-string">"fragments of the egg tumbling and spinning through the air. In the background, the deep darkness of space subtly "</span> | |
| <span class="hljs-string">"shifts as stars glide with the camera's movement, emphasizing vast depth and scale. The camera performs a "</span> | |
| <span class="hljs-string">"smooth, cinematic slow push-in, with natural parallax between the foreground dust, the astronaut, and the "</span> | |
| <span class="hljs-string">"distant starfield. Ultra-realistic detail, physically accurate low-gravity motion, cinematic lighting, and a "</span> | |
| <span class="hljs-string">"breath-taking, movie-like shot."</span> | |
| ) | |
| video, audio = pipe( | |
| prompt=prompt, | |
| negative_prompt=DEFAULT_NEGATIVE_PROMPT, | |
| width=width, | |
| height=height, | |
| num_frames=<span class="hljs-number">121</span>, | |
| frame_rate=frame_rate, | |
| num_inference_steps=<span class="hljs-number">30</span>, | |
| guidance_scale=<span class="hljs-number">3.0</span>, | |
| stg_scale=<span class="hljs-number">1.0</span>, | |
| modality_scale=<span class="hljs-number">3.0</span>, | |
| guidance_rescale=<span class="hljs-number">0.7</span>, | |
| audio_guidance_scale=<span class="hljs-number">7.0</span>, | |
| audio_stg_scale=<span class="hljs-number">1.0</span>, | |
| audio_modality_scale=<span class="hljs-number">3.0</span>, | |
| audio_guidance_rescale=<span class="hljs-number">0.7</span>, | |
| spatio_temporal_guidance_blocks=[<span class="hljs-number">28</span>], | |
| use_cross_timestep=<span class="hljs-literal">True</span>, | |
| system_prompt=T2V_DEFAULT_SYSTEM_PROMPT, | |
| generator=generator, | |
| output_type=<span class="hljs-string">"np"</span>, | |
| return_dict=<span class="hljs-literal">False</span>, | |
| ) | |
| encode_video( | |
| video[<span class="hljs-number">0</span>], | |
| fps=frame_rate, | |
| audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(), | |
| audio_sample_rate=pipe.vocoder.config.output_sampling_rate, | |
| output_path=<span class="hljs-string">"ltx2_3_t2v_stage_1.mp4"</span>, | |
| )`,lang:"py",wrap:!1}}),Xe=new x({props:{title:"LTX2Pipeline",local:"diffusers.LTX2Pipeline",headingTag:"h2"}}),Be=new v({props:{name:"class diffusers.LTX2Pipeline",anchor:"diffusers.LTX2Pipeline",parameters:[{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"vae",val:": AutoencoderKLLTX2Video"},{name:"audio_vae",val:": AutoencoderKLLTX2Audio"},{name:"text_encoder",val:": Gemma3ForConditionalGeneration"},{name:"tokenizer",val:": GemmaTokenizer"},{name:"connectors",val:": LTX2TextConnectors"},{name:"transformer",val:": LTX2VideoTransformer3DModel"},{name:"vocoder",val:": diffusers.pipelines.ltx2.vocoder.LTX2Vocoder | diffusers.pipelines.ltx2.vocoder.LTX2VocoderWithBWE"},{name:"processor",val:": transformers.models.gemma3.processing_gemma3.Gemma3Processor | None = None"}],parametersDescription:[{anchor:"diffusers.LTX2Pipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/pr_13893/en/api/models/ltx_video_transformer3d#diffusers.LTXVideoTransformer3DModel">LTXVideoTransformer3DModel</a>) — | |
| Conditional Transformer architecture to denoise the encoded video latents.`,name:"transformer"},{anchor:"diffusers.LTX2Pipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_13893/en/api/schedulers/flow_match_euler_discrete#diffusers.FlowMatchEulerDiscreteScheduler">FlowMatchEulerDiscreteScheduler</a>) — | |
| A scheduler to be used in combination with <code>transformer</code> to denoise the encoded image latents.`,name:"scheduler"},{anchor:"diffusers.LTX2Pipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_13893/en/api/models/autoencoderkl_ltx_video#diffusers.AutoencoderKLLTXVideo">AutoencoderKLLTXVideo</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.LTX2Pipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>T5EncoderModel</code>) — | |
| <a href="https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5EncoderModel" rel="nofollow">T5</a>, specifically | |
| the <a href="https://huggingface.co/google/t5-v1_1-xxl" rel="nofollow">google/t5-v1_1-xxl</a> variant.`,name:"text_encoder"},{anchor:"diffusers.LTX2Pipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) — | |
| Tokenizer of class | |
| <a href="https://huggingface.co/docs/transformers/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a>.`,name:"tokenizer"},{anchor:"diffusers.LTX2Pipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>T5TokenizerFast</code>) — | |
| Second Tokenizer of class | |
| <a href="https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5TokenizerFast" rel="nofollow">T5TokenizerFast</a>.`,name:"tokenizer"},{anchor:"diffusers.LTX2Pipeline.connectors",description:`<strong>connectors</strong> (<code>LTX2TextConnectors</code>) — | |
| Text connector stack used to adapt text encoder hidden states for the video and audio branches.`,name:"connectors"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2.py#L185"}}),Ce=new v({props:{name:"__call__",anchor:"diffusers.LTX2Pipeline.__call__",parameters:[{name:"prompt",val:": str | list[str] = None"},{name:"negative_prompt",val:": str | list[str] | None = None"},{name:"height",val:": int = 512"},{name:"width",val:": int = 768"},{name:"num_frames",val:": int = 121"},{name:"frame_rate",val:": float = 24.0"},{name:"num_inference_steps",val:": int = 40"},{name:"sigmas",val:": list[float] | None = None"},{name:"timesteps",val:": list = None"},{name:"guidance_scale",val:": float = 4.0"},{name:"stg_scale",val:": float = 0.0"},{name:"modality_scale",val:": float = 1.0"},{name:"guidance_rescale",val:": float = 0.0"},{name:"audio_guidance_scale",val:": float | None = None"},{name:"audio_stg_scale",val:": float | None = None"},{name:"audio_modality_scale",val:": float | None = None"},{name:"audio_guidance_rescale",val:": float | None = None"},{name:"spatio_temporal_guidance_blocks",val:": list[int] | None = None"},{name:"noise_scale",val:": float = 0.0"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"generator",val:": torch._C.Generator | list[torch._C.Generator] | None = None"},{name:"latents",val:": torch.Tensor | None = None"},{name:"audio_latents",val:": torch.Tensor | None = None"},{name:"prompt_embeds",val:": torch.Tensor | None = None"},{name:"prompt_attention_mask",val:": torch.Tensor | None = None"},{name:"negative_prompt_embeds",val:": torch.Tensor | None = None"},{name:"negative_prompt_attention_mask",val:": torch.Tensor | None = None"},{name:"decode_timestep",val:": float | list[float] = 0.0"},{name:"decode_noise_scale",val:": float | list[float] | None = None"},{name:"use_cross_timestep",val:": bool = False"},{name:"system_prompt",val:": str | None = None"},{name:"prompt_max_new_tokens",val:": int = 512"},{name:"prompt_enhancement_kwargs",val:": dict[str, typing.Any] | None = None"},{name:"prompt_enhancement_seed",val:": int = 10"},{name:"output_type",val:": str = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"attention_kwargs",val:": dict[str, typing.Any] | None = None"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": list = ['latents']"},{name:"max_sequence_length",val:": int = 1024"}],parametersDescription:[{anchor:"diffusers.LTX2Pipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.LTX2Pipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (<code>guidance_scale < 1</code>).`,name:"negative_prompt"},{anchor:"diffusers.LTX2Pipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>512</code>) — | |
| The height in pixels of the generated image. This is set to 480 by default for the best results.`,name:"height"},{anchor:"diffusers.LTX2Pipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>768</code>) — | |
| The width in pixels of the generated image. This is set to 848 by default for the best results.`,name:"width"},{anchor:"diffusers.LTX2Pipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, <em>optional</em>, defaults to <code>121</code>) — | |
| The number of video frames to generate`,name:"num_frames"},{anchor:"diffusers.LTX2Pipeline.__call__.frame_rate",description:`<strong>frame_rate</strong> (<code>float</code>, <em>optional</em>, defaults to <code>24.0</code>) — | |
| The frames per second (FPS) of the generated video.`,name:"frame_rate"},{anchor:"diffusers.LTX2Pipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 40) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.LTX2Pipeline.__call__.sigmas",description:`<strong>sigmas</strong> (<code>List[float]</code>, <em>optional</em>) — | |
| Custom sigmas to use for the denoising process with schedulers which support a <code>sigmas</code> argument in | |
| their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is passed | |
| will be used.`,name:"sigmas"},{anchor:"diffusers.LTX2Pipeline.__call__.timesteps",description:`<strong>timesteps</strong> (<code>list[int]</code>, <em>optional</em>) — | |
| Custom timesteps to use for the denoising process with schedulers which support a <code>timesteps</code> argument | |
| in their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is | |
| passed will be used. Must be in descending order.`,name:"timesteps"},{anchor:"diffusers.LTX2Pipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>4.0</code>) — | |
| Guidance scale as defined in <a href="https://huggingface.co/papers/2207.12598" rel="nofollow">Classifier-Free Diffusion | |
| Guidance</a>. <code>guidance_scale</code> is defined as <code>w</code> of equation 2. | |
| of <a href="https://huggingface.co/papers/2205.11487" rel="nofollow">Imagen Paper</a>. Guidance scale is enabled by setting | |
| <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to | |
| the text <code>prompt</code>, usually at the expense of lower image quality. Used for the video modality (there is | |
| a separate value <code>audio_guidance_scale</code> for the audio modality).`,name:"guidance_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.stg_scale",description:`<strong>stg_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) — | |
| Video guidance scale for Spatio-Temporal Guidance (STG), proposed in <a href="https://arxiv.org/abs/2411.18664" rel="nofollow">Spatiotemporal Skip Guidance for | |
| Enhanced Video Diffusion Sampling</a>. STG uses a CFG-like estimate | |
| where we move the sample away from a weak sample from a perturbed version of the denoising model. | |
| Enabling STG will result in an additional denoising model forward pass; the default value of <code>0.0</code> | |
| means that STG is disabled.`,name:"stg_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.modality_scale",description:`<strong>modality_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1.0</code>) — | |
| Video guidance scale for LTX-2.X modality isolation guidance, where we move the sample away from a | |
| weaker sample generated by the denoising model withy cross-modality (audio-to-video and video-to-audio) | |
| cross attention disabled using a CFG-like estimate. Enabling modality guidance will result in an | |
| additional denoising model forward pass; the default value of <code>1.0</code> means that modality guidance is | |
| disabled.`,name:"modality_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.guidance_rescale",description:`<strong>guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Guidance rescale factor proposed by <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are | |
| Flawed</a> <code>guidance_scale</code> is defined as <code>φ</code> in equation 16. of | |
| <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are | |
| Flawed</a>. Guidance rescale factor should fix overexposure when | |
| using zero terminal SNR. Used for the video modality.`,name:"guidance_rescale"},{anchor:"diffusers.LTX2Pipeline.__call__.audio_guidance_scale",description:`<strong>audio_guidance_scale</strong> (<code>float</code>, <em>optional</em> defaults to <code>None</code>) — | |
| Audio guidance scale for CFG with respect to the negative prompt. The CFG update rule is the same for | |
| video and audio, but they can use different values for the guidance scale. The LTX-2.X authors suggest | |
| that the <code>audio_guidance_scale</code> should be higher relative to the video <code>guidance_scale</code> (e.g. for | |
| LTX-2.3 they suggest 3.0 for video and 7.0 for audio). If <code>None</code>, defaults to the video value | |
| <code>guidance_scale</code>.`,name:"audio_guidance_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.audio_stg_scale",description:`<strong>audio_stg_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Audio guidance scale for STG. As with CFG, the STG update rule is otherwise the same for video and | |
| audio. For LTX-2.3, a value of 1.0 is suggested for both video and audio. If <code>None</code>, defaults to the | |
| video value <code>stg_scale</code>.`,name:"audio_stg_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.audio_modality_scale",description:`<strong>audio_modality_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Audio guidance scale for LTX-2.X modality isolation guidance. As with CFG, the modality guidance rule | |
| is otherwise the same for video and audio. For LTX-2.3, a value of 3.0 is suggested for both video and | |
| audio. If <code>None</code>, defaults to the video value <code>modality_scale</code>.`,name:"audio_modality_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.audio_guidance_rescale",description:`<strong>audio_guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| A separate guidance rescale factor for the audio modality. If <code>None</code>, defaults to the video value | |
| <code>guidance_rescale</code>.`,name:"audio_guidance_rescale"},{anchor:"diffusers.LTX2Pipeline.__call__.spatio_temporal_guidance_blocks",description:`<strong>spatio_temporal_guidance_blocks</strong> (<code>list[int]</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| The zero-indexed transformer block indices at which to apply STG. Must be supplied if STG is used | |
| (<code>stg_scale</code> or <code>audio_stg_scale</code> is greater than <code>0</code>). A value of <code>[29]</code> is recommended for LTX-2.0 | |
| and <code>[28]</code> is recommended for LTX-2.3.`,name:"spatio_temporal_guidance_blocks"},{anchor:"diffusers.LTX2Pipeline.__call__.noise_scale",description:`<strong>noise_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) — | |
| The interpolation factor between random noise and denoised latents at each timestep. Applying noise to | |
| the <code>latents</code> and <code>audio_latents</code> before continue denoising.`,name:"noise_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTX2Pipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>list[torch.Generator]</code>, <em>optional</em>) — | |
| One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a> | |
| to make generation deterministic.`,name:"generator"},{anchor:"diffusers.LTX2Pipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for video | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will be generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.LTX2Pipeline.__call__.audio_latents",description:`<strong>audio_latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for audio | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will be generated by sampling using the supplied random <code>generator</code>.`,name:"audio_latents"},{anchor:"diffusers.LTX2Pipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTX2Pipeline.__call__.prompt_attention_mask",description:`<strong>prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated attention mask for text embeddings.`,name:"prompt_attention_mask"},{anchor:"diffusers.LTX2Pipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. For PixArt-Sigma this negative prompt should be "". If not | |
| provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTX2Pipeline.__call__.negative_prompt_attention_mask",description:`<strong>negative_prompt_attention_mask</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) — | |
| Pre-generated attention mask for negative text embeddings.`,name:"negative_prompt_attention_mask"},{anchor:"diffusers.LTX2Pipeline.__call__.decode_timestep",description:`<strong>decode_timestep</strong> (<code>float</code>, defaults to <code>0.0</code>) — | |
| The timestep at which generated video is decoded.`,name:"decode_timestep"},{anchor:"diffusers.LTX2Pipeline.__call__.decode_noise_scale",description:`<strong>decode_noise_scale</strong> (<code>float</code>, defaults to <code>None</code>) — | |
| The interpolation factor between random noise and denoised latents at the decode timestep.`,name:"decode_noise_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.use_cross_timestep",description:`<strong>use_cross_timestep</strong> (<code>bool</code> <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to use the cross modality (audio is the cross modality of video, and vice versa) sigma when | |
| calculating the cross attention modulation parameters. <code>True</code> is the newer (e.g. LTX-2.3) behavior; | |
| <code>False</code> is the legacy LTX-2.0 behavior.`,name:"use_cross_timestep"},{anchor:"diffusers.LTX2Pipeline.__call__.system_prompt",description:`<strong>system_prompt</strong> (<code>str</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Optional system prompt to use for prompt enhancement. The system prompt will be used by the current | |
| text encoder (by default, a <code>Gemma3ForConditionalGeneration</code> model) to generate an enhanced prompt from | |
| the original <code>prompt</code> to condition generation. If not supplied, prompt enhancement will not be | |
| performed.`,name:"system_prompt"},{anchor:"diffusers.LTX2Pipeline.__call__.prompt_max_new_tokens",description:`<strong>prompt_max_new_tokens</strong> (<code>int</code>, <em>optional</em>, defaults to <code>512</code>) — | |
| The maximum number of new tokens to generate when performing prompt enhancement.`,name:"prompt_max_new_tokens"},{anchor:"diffusers.LTX2Pipeline.__call__.prompt_enhancement_kwargs",description:`<strong>prompt_enhancement_kwargs</strong> (<code>dict[str, Any]</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Keyword arguments for <code>self.text_encoder.generate</code>. If not supplied, default arguments of | |
| <code>do_sample=True</code> and <code>temperature=0.7</code> will be used. See | |
| <a href="https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.generate" rel="nofollow">https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.generate</a> | |
| for more details.`,name:"prompt_enhancement_kwargs"},{anchor:"diffusers.LTX2Pipeline.__call__.prompt_enhancement_seed",description:`<strong>prompt_enhancement_seed</strong> (<code>int</code>, <em>optional</em>, default to <code>10</code>) — | |
| Random seed for any random operations during prompt enhancement.`,name:"prompt_enhancement_seed"},{anchor:"diffusers.LTX2Pipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generate image. Choose between | |
| <a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.LTX2Pipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>~pipelines.ltx.LTX2PipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.LTX2Pipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.LTX2Pipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls at the end of each denoising steps during the inference. The function is called | |
| with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a list of all tensors as specified by | |
| <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.LTX2Pipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>, defaults to <code>["latents"]</code>) — | |
| The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list | |
| will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the | |
| <code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.LTX2Pipeline.__call__.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1024</code>) — | |
| Maximum sequence length to use with the <code>prompt</code>.`,name:"max_sequence_length"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2.py#L808",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <code>~pipelines.ltx.LTX2PipelineOutput</code> is returned, otherwise a <code>tuple</code> is | |
| returned where the first element is a list with the generated images.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>~pipelines.ltx.LTX2PipelineOutput</code> or <code>tuple</code></p> | |
| `}}),z=new Bn({props:{anchor:"diffusers.LTX2Pipeline.__call__.example",$$slots:{default:[ba]},$$scope:{ctx:V}}}),We=new v({props:{name:"encode_prompt",anchor:"diffusers.LTX2Pipeline.encode_prompt",parameters:[{name:"prompt",val:": str | list[str]"},{name:"negative_prompt",val:": str | list[str] | None = None"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"prompt_embeds",val:": torch.Tensor | None = None"},{name:"negative_prompt_embeds",val:": torch.Tensor | None = None"},{name:"prompt_attention_mask",val:": torch.Tensor | None = None"},{name:"negative_prompt_attention_mask",val:": torch.Tensor | None = None"},{name:"max_sequence_length",val:": int = 1024"},{name:"scale_factor",val:": int = 8"},{name:"device",val:": torch.device | None = None"},{name:"dtype",val:": torch.dtype | None = None"}],parametersDescription:[{anchor:"diffusers.LTX2Pipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.LTX2Pipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.LTX2Pipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to use classifier free guidance or not.`,name:"do_classifier_free_guidance"},{anchor:"diffusers.LTX2Pipeline.encode_prompt.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| Number of videos that should be generated per prompt. torch device to place the resulting embeddings on`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTX2Pipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTX2Pipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTX2Pipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>, <em>optional</em>): | |
| torch device`,name:"device"},{anchor:"diffusers.LTX2Pipeline.encode_prompt.dtype",description:`<strong>dtype</strong> — (<code>torch.dtype</code>, <em>optional</em>): | |
| torch dtype`,name:"dtype"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2.py#L337"}}),xe=new v({props:{name:"enhance_prompt",anchor:"diffusers.LTX2Pipeline.enhance_prompt",parameters:[{name:"prompt",val:": str"},{name:"system_prompt",val:": str"},{name:"max_new_tokens",val:": int = 512"},{name:"seed",val:": int = 10"},{name:"generator",val:": torch._C.Generator | None = None"},{name:"generation_kwargs",val:": dict[str, typing.Any] | None = None"},{name:"device",val:": str | torch.device | None = None"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2.py#L423"}}),Ve=new x({props:{title:"LTX2ImageToVideoPipeline",local:"diffusers.LTX2ImageToVideoPipeline",headingTag:"h2"}}),ke=new v({props:{name:"class diffusers.LTX2ImageToVideoPipeline",anchor:"diffusers.LTX2ImageToVideoPipeline",parameters:[{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"vae",val:": AutoencoderKLLTX2Video"},{name:"audio_vae",val:": AutoencoderKLLTX2Audio"},{name:"text_encoder",val:": Gemma3ForConditionalGeneration"},{name:"tokenizer",val:": GemmaTokenizer"},{name:"connectors",val:": LTX2TextConnectors"},{name:"transformer",val:": LTX2VideoTransformer3DModel"},{name:"vocoder",val:": diffusers.pipelines.ltx2.vocoder.LTX2Vocoder | diffusers.pipelines.ltx2.vocoder.LTX2VocoderWithBWE"},{name:"processor",val:": transformers.models.gemma3.processing_gemma3.Gemma3Processor | None = None"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py#L205"}}),Re=new v({props:{name:"__call__",anchor:"diffusers.LTX2ImageToVideoPipeline.__call__",parameters:[{name:"image",val:": PIL.Image.Image | numpy.ndarray | torch.Tensor | list[PIL.Image.Image] | list[numpy.ndarray] | list[torch.Tensor] = None"},{name:"prompt",val:": str | list[str] = None"},{name:"negative_prompt",val:": str | list[str] | None = None"},{name:"height",val:": int = 512"},{name:"width",val:": int = 768"},{name:"num_frames",val:": int = 121"},{name:"frame_rate",val:": float = 24.0"},{name:"num_inference_steps",val:": int = 40"},{name:"sigmas",val:": list[float] | None = None"},{name:"timesteps",val:": list[int] | None = None"},{name:"guidance_scale",val:": float = 4.0"},{name:"stg_scale",val:": float = 0.0"},{name:"modality_scale",val:": float = 1.0"},{name:"guidance_rescale",val:": float = 0.0"},{name:"audio_guidance_scale",val:": float | None = None"},{name:"audio_stg_scale",val:": float | None = None"},{name:"audio_modality_scale",val:": float | None = None"},{name:"audio_guidance_rescale",val:": float | None = None"},{name:"spatio_temporal_guidance_blocks",val:": list[int] | None = None"},{name:"noise_scale",val:": float = 0.0"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"generator",val:": torch._C.Generator | list[torch._C.Generator] | None = None"},{name:"latents",val:": torch.Tensor | None = None"},{name:"audio_latents",val:": torch.Tensor | None = None"},{name:"prompt_embeds",val:": torch.Tensor | None = None"},{name:"prompt_attention_mask",val:": torch.Tensor | None = None"},{name:"negative_prompt_embeds",val:": torch.Tensor | None = None"},{name:"negative_prompt_attention_mask",val:": torch.Tensor | None = None"},{name:"decode_timestep",val:": float | list[float] = 0.0"},{name:"decode_noise_scale",val:": float | list[float] | None = None"},{name:"use_cross_timestep",val:": bool = False"},{name:"system_prompt",val:": str | None = None"},{name:"prompt_max_new_tokens",val:": int = 512"},{name:"prompt_enhancement_kwargs",val:": dict[str, typing.Any] | None = None"},{name:"prompt_enhancement_seed",val:": int = 10"},{name:"output_type",val:": str = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"attention_kwargs",val:": dict[str, typing.Any] | None = None"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": list = ['latents']"},{name:"max_sequence_length",val:": int = 1024"}],parametersDescription:[{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.image",description:`<strong>image</strong> (<code>PipelineImageInput</code>) — | |
| The input image to condition the generation on. Must be an image, a list of images or a <code>torch.Tensor</code>.`,name:"image"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (<code>guidance_scale < 1</code>).`,name:"negative_prompt"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>512</code>) — | |
| The height in pixels of the generated image. This is set to 480 by default for the best results.`,name:"height"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>768</code>) — | |
| The width in pixels of the generated image. This is set to 848 by default for the best results.`,name:"width"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, <em>optional</em>, defaults to <code>121</code>) — | |
| The number of video frames to generate`,name:"num_frames"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.frame_rate",description:`<strong>frame_rate</strong> (<code>float</code>, <em>optional</em>, defaults to <code>24.0</code>) — | |
| The frames per second (FPS) of the generated video.`,name:"frame_rate"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 40) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.sigmas",description:`<strong>sigmas</strong> (<code>List[float]</code>, <em>optional</em>) — | |
| Custom sigmas to use for the denoising process with schedulers which support a <code>sigmas</code> argument in | |
| their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is passed | |
| will be used.`,name:"sigmas"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.timesteps",description:`<strong>timesteps</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Custom timesteps to use for the denoising process with schedulers which support a <code>timesteps</code> argument | |
| in their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is | |
| passed will be used. Must be in descending order.`,name:"timesteps"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>4.0</code>) — | |
| Guidance scale as defined in <a href="https://huggingface.co/papers/2207.12598" rel="nofollow">Classifier-Free Diffusion | |
| Guidance</a>. <code>guidance_scale</code> is defined as <code>w</code> of equation 2. | |
| of <a href="https://huggingface.co/papers/2205.11487" rel="nofollow">Imagen Paper</a>. Guidance scale is enabled by setting | |
| <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to | |
| the text <code>prompt</code>, usually at the expense of lower image quality. Used for the video modality (there is | |
| a separate value <code>audio_guidance_scale</code> for the audio modality).`,name:"guidance_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.stg_scale",description:`<strong>stg_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) — | |
| Video guidance scale for Spatio-Temporal Guidance (STG), proposed in <a href="https://arxiv.org/abs/2411.18664" rel="nofollow">Spatiotemporal Skip Guidance for | |
| Enhanced Video Diffusion Sampling</a>. STG uses a CFG-like estimate | |
| where we move the sample away from a weak sample from a perturbed version of the denoising model. | |
| Enabling STG will result in an additional denoising model forward pass; the default value of <code>0.0</code> | |
| means that STG is disabled.`,name:"stg_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.modality_scale",description:`<strong>modality_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1.0</code>) — | |
| Video guidance scale for LTX-2.X modality isolation guidance, where we move the sample away from a | |
| weaker sample generated by the denoising model withy cross-modality (audio-to-video and video-to-audio) | |
| cross attention disabled using a CFG-like estimate. Enabling modality guidance will result in an | |
| additional denoising model forward pass; the default value of <code>1.0</code> means that modality guidance is | |
| disabled.`,name:"modality_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.guidance_rescale",description:`<strong>guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Guidance rescale factor proposed by <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are | |
| Flawed</a> <code>guidance_scale</code> is defined as <code>φ</code> in equation 16. of | |
| <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are | |
| Flawed</a>. Guidance rescale factor should fix overexposure when | |
| using zero terminal SNR. Used for the video modality.`,name:"guidance_rescale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.audio_guidance_scale",description:`<strong>audio_guidance_scale</strong> (<code>float</code>, <em>optional</em> defaults to <code>None</code>) — | |
| Audio guidance scale for CFG with respect to the negative prompt. The CFG update rule is the same for | |
| video and audio, but they can use different values for the guidance scale. The LTX-2.X authors suggest | |
| that the <code>audio_guidance_scale</code> should be higher relative to the video <code>guidance_scale</code> (e.g. for | |
| LTX-2.3 they suggest 3.0 for video and 7.0 for audio). If <code>None</code>, defaults to the video value | |
| <code>guidance_scale</code>.`,name:"audio_guidance_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.audio_stg_scale",description:`<strong>audio_stg_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Audio guidance scale for STG. As with CFG, the STG update rule is otherwise the same for video and | |
| audio. For LTX-2.3, a value of 1.0 is suggested for both video and audio. If <code>None</code>, defaults to the | |
| video value <code>stg_scale</code>.`,name:"audio_stg_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.audio_modality_scale",description:`<strong>audio_modality_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Audio guidance scale for LTX-2.X modality isolation guidance. As with CFG, the modality guidance rule | |
| is otherwise the same for video and audio. For LTX-2.3, a value of 3.0 is suggested for both video and | |
| audio. If <code>None</code>, defaults to the video value <code>modality_scale</code>.`,name:"audio_modality_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.audio_guidance_rescale",description:`<strong>audio_guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| A separate guidance rescale factor for the audio modality. If <code>None</code>, defaults to the video value | |
| <code>guidance_rescale</code>.`,name:"audio_guidance_rescale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.spatio_temporal_guidance_blocks",description:`<strong>spatio_temporal_guidance_blocks</strong> (<code>list[int]</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| The zero-indexed transformer block indices at which to apply STG. Must be supplied if STG is used | |
| (<code>stg_scale</code> or <code>audio_stg_scale</code> is greater than <code>0</code>). A value of <code>[29]</code> is recommended for LTX-2.0 | |
| and <code>[28]</code> is recommended for LTX-2.3.`,name:"spatio_temporal_guidance_blocks"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.noise_scale",description:`<strong>noise_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) — | |
| The interpolation factor between random noise and denoised latents at each timestep. Applying noise to | |
| the <code>latents</code> and <code>audio_latents</code> before continue denoising.`,name:"noise_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>list[torch.Generator]</code>, <em>optional</em>) — | |
| One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a> | |
| to make generation deterministic.`,name:"generator"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for video | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will be generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.audio_latents",description:`<strong>audio_latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for audio | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will be generated by sampling using the supplied random <code>generator</code>.`,name:"audio_latents"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.prompt_attention_mask",description:`<strong>prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated attention mask for text embeddings.`,name:"prompt_attention_mask"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. For PixArt-Sigma this negative prompt should be "". If not | |
| provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.negative_prompt_attention_mask",description:`<strong>negative_prompt_attention_mask</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) — | |
| Pre-generated attention mask for negative text embeddings.`,name:"negative_prompt_attention_mask"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.decode_timestep",description:`<strong>decode_timestep</strong> (<code>float</code>, defaults to <code>0.0</code>) — | |
| The timestep at which generated video is decoded.`,name:"decode_timestep"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.decode_noise_scale",description:`<strong>decode_noise_scale</strong> (<code>float</code>, defaults to <code>None</code>) — | |
| The interpolation factor between random noise and denoised latents at the decode timestep.`,name:"decode_noise_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.use_cross_timestep",description:`<strong>use_cross_timestep</strong> (<code>bool</code> <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to use the cross modality (audio is the cross modality of video, and vice versa) sigma when | |
| calculating the cross attention modulation parameters. <code>True</code> is the newer (e.g. LTX-2.3) behavior; | |
| <code>False</code> is the legacy LTX-2.0 behavior.`,name:"use_cross_timestep"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.system_prompt",description:`<strong>system_prompt</strong> (<code>str</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Optional system prompt to use for prompt enhancement. The system prompt will be used by the current | |
| text encoder (by default, a <code>Gemma3ForConditionalGeneration</code> model) to generate an enhanced prompt from | |
| the original <code>prompt</code> to condition generation. If not supplied, prompt enhancement will not be | |
| performed.`,name:"system_prompt"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.prompt_max_new_tokens",description:`<strong>prompt_max_new_tokens</strong> (<code>int</code>, <em>optional</em>, defaults to <code>512</code>) — | |
| The maximum number of new tokens to generate when performing prompt enhancement.`,name:"prompt_max_new_tokens"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.prompt_enhancement_kwargs",description:`<strong>prompt_enhancement_kwargs</strong> (<code>dict[str, Any]</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Keyword arguments for <code>self.text_encoder.generate</code>. If not supplied, default arguments of | |
| <code>do_sample=True</code> and <code>temperature=0.7</code> will be used. See | |
| <a href="https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.generate" rel="nofollow">https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.generate</a> | |
| for more details.`,name:"prompt_enhancement_kwargs"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.prompt_enhancement_seed",description:`<strong>prompt_enhancement_seed</strong> (<code>int</code>, <em>optional</em>, default to <code>10</code>) — | |
| Random seed for any random operations during prompt enhancement.`,name:"prompt_enhancement_seed"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generate image. Choose between | |
| <a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>~pipelines.ltx.LTX2PipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls at the end of each denoising steps during the inference. The function is called | |
| with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a list of all tensors as specified by | |
| <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) — | |
| The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list | |
| will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the | |
| <code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1024</code>) — | |
| Maximum sequence length to use with the <code>prompt</code>.`,name:"max_sequence_length"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py#L868",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <code>~pipelines.ltx.LTX2PipelineOutput</code> is returned, otherwise a <code>tuple</code> is | |
| returned where the first element is a list with the generated images.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>~pipelines.ltx.LTX2PipelineOutput</code> or <code>tuple</code></p> | |
| `}}),$=new Bn({props:{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.example",$$slots:{default:[Ua]},$$scope:{ctx:V}}}),Ne=new v({props:{name:"encode_prompt",anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt",parameters:[{name:"prompt",val:": str | list[str]"},{name:"negative_prompt",val:": str | list[str] | None = None"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"prompt_embeds",val:": torch.Tensor | None = None"},{name:"negative_prompt_embeds",val:": torch.Tensor | None = None"},{name:"prompt_attention_mask",val:": torch.Tensor | None = None"},{name:"negative_prompt_attention_mask",val:": torch.Tensor | None = None"},{name:"max_sequence_length",val:": int = 1024"},{name:"scale_factor",val:": int = 8"},{name:"device",val:": torch.device | None = None"},{name:"dtype",val:": torch.dtype | None = None"}],parametersDescription:[{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to use classifier free guidance or not.`,name:"do_classifier_free_guidance"},{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| Number of videos that should be generated per prompt. torch device to place the resulting embeddings on`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>, <em>optional</em>): | |
| torch device`,name:"device"},{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.dtype",description:`<strong>dtype</strong> — (<code>torch.dtype</code>, <em>optional</em>): | |
| torch dtype`,name:"dtype"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py#L342"}}),Le=new v({props:{name:"enhance_prompt",anchor:"diffusers.LTX2ImageToVideoPipeline.enhance_prompt",parameters:[{name:"image",val:": PIL.Image.Image | numpy.ndarray | torch.Tensor | list[PIL.Image.Image] | list[numpy.ndarray] | list[torch.Tensor]"},{name:"prompt",val:": str"},{name:"system_prompt",val:": str"},{name:"max_new_tokens",val:": int = 512"},{name:"seed",val:": int = 10"},{name:"generator",val:": torch._C.Generator | None = None"},{name:"generation_kwargs",val:": dict[str, typing.Any] | None = None"},{name:"device",val:": str | torch.device | None = None"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py#L428"}}),Fe=new x({props:{title:"LTX2ConditionPipeline",local:"diffusers.LTX2ConditionPipeline",headingTag:"h2"}}),Ee=new v({props:{name:"class diffusers.LTX2ConditionPipeline",anchor:"diffusers.LTX2ConditionPipeline",parameters:[{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"vae",val:": AutoencoderKLLTX2Video"},{name:"audio_vae",val:": AutoencoderKLLTX2Audio"},{name:"text_encoder",val:": Gemma3ForConditionalGeneration"},{name:"tokenizer",val:": GemmaTokenizer"},{name:"connectors",val:": LTX2TextConnectors"},{name:"transformer",val:": LTX2VideoTransformer3DModel"},{name:"vocoder",val:": diffusers.pipelines.ltx2.vocoder.LTX2Vocoder | diffusers.pipelines.ltx2.vocoder.LTX2VocoderWithBWE"},{name:"audio_scheduler",val:": diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler | None = None"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py#L236"}}),Qe=new v({props:{name:"__call__",anchor:"diffusers.LTX2ConditionPipeline.__call__",parameters:[{name:"conditions",val:": diffusers.pipelines.ltx2.pipeline_ltx2_condition.LTX2VideoCondition | list[diffusers.pipelines.ltx2.pipeline_ltx2_condition.LTX2VideoCondition] | None = None"},{name:"prompt",val:": str | list[str] = None"},{name:"negative_prompt",val:": str | list[str] | None = None"},{name:"height",val:": int = 512"},{name:"width",val:": int = 768"},{name:"num_frames",val:": int = 121"},{name:"frame_rate",val:": float = 24.0"},{name:"num_inference_steps",val:": int = 40"},{name:"sigmas",val:": list[float] | None = None"},{name:"timesteps",val:": list[float] | None = None"},{name:"guidance_scale",val:": float = 4.0"},{name:"stg_scale",val:": float = 0.0"},{name:"modality_scale",val:": float = 1.0"},{name:"guidance_rescale",val:": float = 0.0"},{name:"audio_guidance_scale",val:": float | None = None"},{name:"audio_stg_scale",val:": float | None = None"},{name:"audio_modality_scale",val:": float | None = None"},{name:"audio_guidance_rescale",val:": float | None = None"},{name:"spatio_temporal_guidance_blocks",val:": list[int] | None = None"},{name:"noise_scale",val:": float | None = None"},{name:"num_videos_per_prompt",val:": int | None = 1"},{name:"generator",val:": torch._C.Generator | list[torch._C.Generator] | None = None"},{name:"latents",val:": torch.Tensor | None = None"},{name:"audio_latents",val:": torch.Tensor | None = None"},{name:"prompt_embeds",val:": torch.Tensor | None = None"},{name:"prompt_attention_mask",val:": torch.Tensor | None = None"},{name:"negative_prompt_embeds",val:": torch.Tensor | None = None"},{name:"negative_prompt_attention_mask",val:": torch.Tensor | None = None"},{name:"decode_timestep",val:": float | list[float] = 0.0"},{name:"decode_noise_scale",val:": float | list[float] | None = None"},{name:"use_cross_timestep",val:": bool = False"},{name:"output_type",val:": str = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"attention_kwargs",val:": dict[str, typing.Any] | None = None"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": list = ['latents']"},{name:"max_sequence_length",val:": int = 1024"}],parametersDescription:[{anchor:"diffusers.LTX2ConditionPipeline.__call__.conditions",description:`<strong>conditions</strong> (<code>List[LTXVideoCondition], *optional*</code>) — | |
| The list of frame-conditioning items for the video generation.`,name:"conditions"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (<code>guidance_scale < 1</code>).`,name:"negative_prompt"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>512</code>) — | |
| The height in pixels of the generated image. This is set to 480 by default for the best results.`,name:"height"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>768</code>) — | |
| The width in pixels of the generated image. This is set to 848 by default for the best results.`,name:"width"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, <em>optional</em>, defaults to <code>121</code>) — | |
| The number of video frames to generate`,name:"num_frames"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.frame_rate",description:`<strong>frame_rate</strong> (<code>float</code>, <em>optional</em>, defaults to <code>24.0</code>) — | |
| The frames per second (FPS) of the generated video.`,name:"frame_rate"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 40) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.sigmas",description:`<strong>sigmas</strong> (<code>List[float]</code>, <em>optional</em>) — | |
| Custom sigmas to use for the denoising process with schedulers which support a <code>sigmas</code> argument in | |
| their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is passed | |
| will be used.`,name:"sigmas"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.timesteps",description:`<strong>timesteps</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Custom timesteps to use for the denoising process with schedulers which support a <code>timesteps</code> argument | |
| in their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is | |
| passed will be used. Must be in descending order.`,name:"timesteps"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>4.0</code>) — | |
| Guidance scale as defined in <a href="https://huggingface.co/papers/2207.12598" rel="nofollow">Classifier-Free Diffusion | |
| Guidance</a>. <code>guidance_scale</code> is defined as <code>w</code> of equation 2. | |
| of <a href="https://huggingface.co/papers/2205.11487" rel="nofollow">Imagen Paper</a>. Guidance scale is enabled by setting | |
| <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to | |
| the text <code>prompt</code>, usually at the expense of lower image quality. Used for the video modality (there is | |
| a separate value <code>audio_guidance_scale</code> for the audio modality).`,name:"guidance_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.stg_scale",description:`<strong>stg_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) — | |
| Video guidance scale for Spatio-Temporal Guidance (STG), proposed in <a href="https://arxiv.org/abs/2411.18664" rel="nofollow">Spatiotemporal Skip Guidance for | |
| Enhanced Video Diffusion Sampling</a>. STG uses a CFG-like estimate | |
| where we move the sample away from a weak sample from a perturbed version of the denoising model. | |
| Enabling STG will result in an additional denoising model forward pass; the default value of <code>0.0</code> | |
| means that STG is disabled.`,name:"stg_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.modality_scale",description:`<strong>modality_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1.0</code>) — | |
| Video guidance scale for LTX-2.X modality isolation guidance, where we move the sample away from a | |
| weaker sample generated by the denoising model withy cross-modality (audio-to-video and video-to-audio) | |
| cross attention disabled using a CFG-like estimate. Enabling modality guidance will result in an | |
| additional denoising model forward pass; the default value of <code>1.0</code> means that modality guidance is | |
| disabled.`,name:"modality_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.guidance_rescale",description:`<strong>guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Guidance rescale factor proposed by <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are | |
| Flawed</a> <code>guidance_scale</code> is defined as <code>φ</code> in equation 16. of | |
| <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are | |
| Flawed</a>. Guidance rescale factor should fix overexposure when | |
| using zero terminal SNR. Used for the video modality.`,name:"guidance_rescale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.audio_guidance_scale",description:`<strong>audio_guidance_scale</strong> (<code>float</code>, <em>optional</em> defaults to <code>None</code>) — | |
| Audio guidance scale for CFG with respect to the negative prompt. The CFG update rule is the same for | |
| video and audio, but they can use different values for the guidance scale. The LTX-2.X authors suggest | |
| that the <code>audio_guidance_scale</code> should be higher relative to the video <code>guidance_scale</code> (e.g. for | |
| LTX-2.3 they suggest 3.0 for video and 7.0 for audio). If <code>None</code>, defaults to the video value | |
| <code>guidance_scale</code>.`,name:"audio_guidance_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.audio_stg_scale",description:`<strong>audio_stg_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Audio guidance scale for STG. As with CFG, the STG update rule is otherwise the same for video and | |
| audio. For LTX-2.3, a value of 1.0 is suggested for both video and audio. If <code>None</code>, defaults to the | |
| video value <code>stg_scale</code>.`,name:"audio_stg_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.audio_modality_scale",description:`<strong>audio_modality_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Audio guidance scale for LTX-2.X modality isolation guidance. As with CFG, the modality guidance rule | |
| is otherwise the same for video and audio. For LTX-2.3, a value of 3.0 is suggested for both video and | |
| audio. If <code>None</code>, defaults to the video value <code>modality_scale</code>.`,name:"audio_modality_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.audio_guidance_rescale",description:`<strong>audio_guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| A separate guidance rescale factor for the audio modality. If <code>None</code>, defaults to the video value | |
| <code>guidance_rescale</code>.`,name:"audio_guidance_rescale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.spatio_temporal_guidance_blocks",description:`<strong>spatio_temporal_guidance_blocks</strong> (<code>list[int]</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| The zero-indexed transformer block indices at which to apply STG. Must be supplied if STG is used | |
| (<code>stg_scale</code> or <code>audio_stg_scale</code> is greater than <code>0</code>). A value of <code>[29]</code> is recommended for LTX-2.0 | |
| and <code>[28]</code> is recommended for LTX-2.3.`,name:"spatio_temporal_guidance_blocks"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.noise_scale",description:`<strong>noise_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| The interpolation factor between random noise and denoised latents at each timestep. Applying noise to | |
| the <code>latents</code> and <code>audio_latents</code> before continue denoising. If not set, will be inferred from the | |
| sigma schedule.`,name:"noise_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a> | |
| to make generation deterministic.`,name:"generator"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for video | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will be generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.audio_latents",description:`<strong>audio_latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for audio | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will be generated by sampling using the supplied random <code>generator</code>.`,name:"audio_latents"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.prompt_attention_mask",description:`<strong>prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated attention mask for text embeddings.`,name:"prompt_attention_mask"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. For PixArt-Sigma this negative prompt should be "". If not | |
| provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.negative_prompt_attention_mask",description:`<strong>negative_prompt_attention_mask</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) — | |
| Pre-generated attention mask for negative text embeddings.`,name:"negative_prompt_attention_mask"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.decode_timestep",description:`<strong>decode_timestep</strong> (<code>float</code>, defaults to <code>0.0</code>) — | |
| The timestep at which generated video is decoded.`,name:"decode_timestep"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.decode_noise_scale",description:`<strong>decode_noise_scale</strong> (<code>float</code>, defaults to <code>None</code>) — | |
| The interpolation factor between random noise and denoised latents at the decode timestep.`,name:"decode_noise_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.use_cross_timestep",description:`<strong>use_cross_timestep</strong> (<code>bool</code> <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to use the cross modality (audio is the cross modality of video, and vice versa) sigma when | |
| calculating the cross attention modulation parameters. <code>True</code> is the newer (e.g. LTX-2.3) behavior; | |
| <code>False</code> is the legacy LTX-2.0 behavior.`,name:"use_cross_timestep"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generate image. Choose between | |
| <a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>~pipelines.ltx.LTX2PipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls at the end of each denoising steps during the inference. The function is called | |
| with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a list of all tensors as specified by | |
| <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) — | |
| The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list | |
| will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the | |
| <code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1024</code>) — | |
| Maximum sequence length to use with the <code>prompt</code>.`,name:"max_sequence_length"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py#L1174",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <code>~pipelines.ltx.LTX2PipelineOutput</code> is returned, otherwise a <code>tuple</code> is | |
| returned where the first element is a list with the generated images.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>~pipelines.ltx.LTX2PipelineOutput</code> or <code>tuple</code></p> | |
| `}}),K=new Bn({props:{anchor:"diffusers.LTX2ConditionPipeline.__call__.example",$$slots:{default:[ja]},$$scope:{ctx:V}}}),Ye=new v({props:{name:"apply_first_frame_conditioning",anchor:"diffusers.LTX2ConditionPipeline.apply_first_frame_conditioning",parameters:[{name:"latents",val:": Tensor"},{name:"conditioning_mask",val:": Tensor"},{name:"condition_latents",val:": list"},{name:"condition_strengths",val:": list"},{name:"condition_indices",val:": list"},{name:"latent_height",val:": int"},{name:"latent_width",val:": int"}],parametersDescription:[{anchor:"diffusers.LTX2ConditionPipeline.apply_first_frame_conditioning.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>) — | |
| Initial packed (patchified) latents of shape [batch_size, patch_seq_len, hidden_dim].`,name:"latents"},{anchor:"diffusers.LTX2ConditionPipeline.apply_first_frame_conditioning.conditioning_mask",description:`<strong>conditioning_mask</strong> (<code>torch.Tensor</code>) — | |
| Initial packed (patchified) conditioning mask of shape [batch_size, patch_seq_len, 1] with values in | |
| [0, 1] where 0 means the denoising model output will be fully used and 1 means the condition will be | |
| fully used.`,name:"conditioning_mask"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py#L793",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Returns a 3-tuple of tensors where:</p> | |
| <ol> | |
| <li>The packed video latents with first-frame conditions applied.</li> | |
| <li>The packed conditioning mask with first-frame strengths applied.</li> | |
| <li>The clean conditioning latents at first-frame positions (zeros elsewhere).</li> | |
| </ol> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>Tuple[torch.Tensor, torch.Tensor, torch.Tensor]</code></p> | |
| `}}),Se=new v({props:{name:"encode_prompt",anchor:"diffusers.LTX2ConditionPipeline.encode_prompt",parameters:[{name:"prompt",val:": str | list[str]"},{name:"negative_prompt",val:": str | list[str] | None = None"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"prompt_embeds",val:": torch.Tensor | None = None"},{name:"negative_prompt_embeds",val:": torch.Tensor | None = None"},{name:"prompt_attention_mask",val:": torch.Tensor | None = None"},{name:"negative_prompt_attention_mask",val:": torch.Tensor | None = None"},{name:"max_sequence_length",val:": int = 1024"},{name:"scale_factor",val:": int = 8"},{name:"device",val:": torch.device | None = None"},{name:"dtype",val:": torch.dtype | None = None"}],parametersDescription:[{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to use classifier free guidance or not.`,name:"do_classifier_free_guidance"},{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| Number of videos that should be generated per prompt. torch device to place the resulting embeddings on`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>, <em>optional</em>): | |
| torch device`,name:"device"},{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.dtype",description:`<strong>dtype</strong> — (<code>torch.dtype</code>, <em>optional</em>): | |
| torch dtype`,name:"dtype"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py#L381"}}),Ae=new v({props:{name:"prepare_latents",anchor:"diffusers.LTX2ConditionPipeline.prepare_latents",parameters:[{name:"conditions",val:": diffusers.pipelines.ltx2.pipeline_ltx2_condition.LTX2VideoCondition | list[diffusers.pipelines.ltx2.pipeline_ltx2_condition.LTX2VideoCondition] | None = None"},{name:"batch_size",val:": int = 1"},{name:"num_channels_latents",val:": int = 128"},{name:"height",val:": int = 512"},{name:"width",val:": int = 768"},{name:"num_frames",val:": int = 121"},{name:"frame_rate",val:": float = 24.0"},{name:"noise_scale",val:": float = 1.0"},{name:"dtype",val:": torch.dtype | None = None"},{name:"device",val:": torch.device | None = None"},{name:"generator",val:": torch._C.Generator | None = None"},{name:"latents",val:": torch.Tensor | None = None"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py#L900"}}),ze=new v({props:{name:"preprocess_conditions",anchor:"diffusers.LTX2ConditionPipeline.preprocess_conditions",parameters:[{name:"conditions",val:": diffusers.pipelines.ltx2.pipeline_ltx2_condition.LTX2VideoCondition | list[diffusers.pipelines.ltx2.pipeline_ltx2_condition.LTX2VideoCondition] | None = None"},{name:"height",val:": int = 512"},{name:"width",val:": int = 768"},{name:"num_frames",val:": int = 121"},{name:"device",val:": torch.device | None = None"}],parametersDescription:[{anchor:"diffusers.LTX2ConditionPipeline.preprocess_conditions.conditions",description:`<strong>conditions</strong> (<code>LTX2VideoCondition</code> or <code>List[LTX2VideoCondition]</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| A list of image/video condition instances.`,name:"conditions"},{anchor:"diffusers.LTX2ConditionPipeline.preprocess_conditions.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>512</code>) — | |
| The desired height in pixels.`,name:"height"},{anchor:"diffusers.LTX2ConditionPipeline.preprocess_conditions.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>768</code>) — | |
| The desired width in pixels.`,name:"width"},{anchor:"diffusers.LTX2ConditionPipeline.preprocess_conditions.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, <em>optional</em>, defaults to <code>121</code>) — | |
| The desired number of frames in the generated video.`,name:"num_frames"},{anchor:"diffusers.LTX2ConditionPipeline.preprocess_conditions.device",description:`<strong>device</strong> (<code>torch.device</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| The device on which to put the preprocessed image/video tensors.`,name:"device"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py#L686",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Returns a 4-tuple of lists of length <code>len(conditions)</code> as follows:</p> | |
| <ol> | |
| <li>The first list is a list of preprocessed video tensors of shape [batch_size=1, num_channels, | |
| num_frames, height, width].</li> | |
| <li>The second list is a list of conditioning strengths.</li> | |
| <li>The third list is a list of latent-space indices for each condition.</li> | |
| <li>The fourth list is a list of (trimmed) pixel-space frame counts per condition. This is needed | |
| for keyframe coord semantics (single-pixel-frame keyframes have a clamped temporal extent).</li> | |
| </ol> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>Tuple[List[torch.Tensor], List[float], List[int], List[int]]</code></p> | |
| `}}),He=new v({props:{name:"trim_conditioning_sequence",anchor:"diffusers.LTX2ConditionPipeline.trim_conditioning_sequence",parameters:[{name:"start_frame",val:": int"},{name:"sequence_num_frames",val:": int"},{name:"target_num_frames",val:": int"}],parametersDescription:[{anchor:"diffusers.LTX2ConditionPipeline.trim_conditioning_sequence.start_frame",description:"<strong>start_frame</strong> (int) — The target frame number of the first frame in the sequence.",name:"start_frame"},{anchor:"diffusers.LTX2ConditionPipeline.trim_conditioning_sequence.sequence_num_frames",description:"<strong>sequence_num_frames</strong> (int) — The number of frames in the sequence.",name:"sequence_num_frames"},{anchor:"diffusers.LTX2ConditionPipeline.trim_conditioning_sequence.target_num_frames",description:"<strong>target_num_frames</strong> (int) — The target number of frames in the generated video.",name:"target_num_frames"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py#L669",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>updated sequence length</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>int</p> | |
| `}}),Pe=new x({props:{title:"LTX2LatentUpsamplePipeline",local:"diffusers.LTX2LatentUpsamplePipeline",headingTag:"h2"}}),$e=new v({props:{name:"class diffusers.LTX2LatentUpsamplePipeline",anchor:"diffusers.LTX2LatentUpsamplePipeline",parameters:[{name:"vae",val:": AutoencoderKLLTX2Video"},{name:"latent_upsampler",val:": LTX2LatentUpsamplerModel"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2_latent_upsample.py#L104"}}),De=new v({props:{name:"__call__",anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__",parameters:[{name:"video",val:": list[PIL.Image.Image | numpy.ndarray | torch.Tensor | list[PIL.Image.Image] | list[numpy.ndarray] | list[torch.Tensor]] | None = None"},{name:"height",val:": int = 512"},{name:"width",val:": int = 768"},{name:"num_frames",val:": int = 121"},{name:"spatial_patch_size",val:": int = 1"},{name:"temporal_patch_size",val:": int = 1"},{name:"latents",val:": torch.Tensor | None = None"},{name:"latents_normalized",val:": bool = False"},{name:"decode_timestep",val:": float | list[float] = 0.0"},{name:"decode_noise_scale",val:": float | list[float] | None = None"},{name:"adain_factor",val:": float = 0.0"},{name:"tone_map_compression_ratio",val:": float = 0.0"},{name:"generator",val:": torch._C.Generator | list[torch._C.Generator] | None = None"},{name:"output_type",val:": str | None = 'pil'"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.video",description:`<strong>video</strong> (<code>list[PipelineImageInput]</code>, <em>optional</em>) — | |
| The video to be upsampled (such as a LTX 2.0 first stage output). If not supplied, <code>latents</code> should be | |
| supplied.`,name:"video"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>512</code>) — | |
| The height in pixels of the input video (not the generated video, which will have a larger resolution).`,name:"height"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>768</code>) — | |
| The width in pixels of the input video (not the generated video, which will have a larger resolution).`,name:"width"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, <em>optional</em>, defaults to <code>121</code>) — | |
| The number of frames in the input video.`,name:"num_frames"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.spatial_patch_size",description:`<strong>spatial_patch_size</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) — | |
| The spatial patch size of the video latents. Used when <code>latents</code> is supplied if unpacking is necessary.`,name:"spatial_patch_size"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.temporal_patch_size",description:`<strong>temporal_patch_size</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) — | |
| The temporal patch size of the video latents. Used when <code>latents</code> is supplied if unpacking is | |
| necessary.`,name:"temporal_patch_size"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated video latents. This can be supplied in place of the <code>video</code> argument. Can either be a | |
| patch sequence of shape <code>(batch_size, seq_len, hidden_dim)</code> or a video latent of shape <code>(batch_size, latent_channels, latent_frames, latent_height, latent_width)</code>.`,name:"latents"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.latents_normalized",description:`<strong>latents_normalized</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| If <code>latents</code> are supplied, whether the <code>latents</code> are normalized using the VAE latent mean and std. If | |
| <code>True</code>, the <code>latents</code> will be denormalized before being supplied to the latent upsampler.`,name:"latents_normalized"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.decode_timestep",description:`<strong>decode_timestep</strong> (<code>float</code>, defaults to <code>0.0</code>) — | |
| The timestep at which generated video is decoded.`,name:"decode_timestep"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.decode_noise_scale",description:`<strong>decode_noise_scale</strong> (<code>float</code>, defaults to <code>None</code>) — | |
| The interpolation factor between random noise and denoised latents at the decode timestep.`,name:"decode_noise_scale"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.adain_factor",description:`<strong>adain_factor</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) — | |
| Adaptive Instance Normalization (AdaIN) blending factor between the upsampled and original latents. | |
| Should be in [-10.0, 10.0]; supplying 0.0 (the default) means that AdaIN is not performed.`,name:"adain_factor"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.tone_map_compression_ratio",description:`<strong>tone_map_compression_ratio</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) — | |
| The compression strength for tone mapping, which will reduce the dynamic range of the latent values. | |
| This is useful for regularizing high-variance latents or for conditioning outputs during generation. | |
| Should be in [0, 1], where 0.0 (the default) means tone mapping is not applied and 1.0 corresponds to | |
| the full compression effect.`,name:"tone_map_compression_ratio"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>list[torch.Generator]</code>, <em>optional</em>) — | |
| One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a> | |
| to make generation deterministic.`,name:"generator"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generate image. Choose between | |
| <a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>~pipelines.ltx.LTXPipelineOutput</code> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2_latent_upsample.py#L264",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <code>~pipelines.ltx.LTXPipelineOutput</code> is returned, otherwise a <code>tuple</code> is | |
| returned where the first element is the upsampled video.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>~pipelines.ltx.LTXPipelineOutput</code> or <code>tuple</code></p> | |
| `}}),ne=new Bn({props:{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.example",$$slots:{default:[va]},$$scope:{ctx:V}}}),qe=new v({props:{name:"adain_filter_latent",anchor:"diffusers.LTX2LatentUpsamplePipeline.adain_filter_latent",parameters:[{name:"latents",val:": Tensor"},{name:"reference_latents",val:": Tensor"},{name:"factor",val:": float = 1.0"}],parametersDescription:[{anchor:"diffusers.LTX2LatentUpsamplePipeline.adain_filter_latent.latent",description:`<strong>latent</strong> (<code>torch.Tensor</code>) — | |
| Input latents to normalize`,name:"latent"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.adain_filter_latent.reference_latents",description:`<strong>reference_latents</strong> (<code>torch.Tensor</code>) — | |
| The reference latents providing style statistics.`,name:"reference_latents"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.adain_filter_latent.factor",description:`<strong>factor</strong> (<code>float</code>) — | |
| Blending factor between original and transformed latent. Range: -10.0 to 10.0, Default: 1.0`,name:"factor"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2_latent_upsample.py#L168",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The transformed latent tensor</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>torch.Tensor</p> | |
| `}}),Ke=new v({props:{name:"tone_map_latents",anchor:"diffusers.LTX2LatentUpsamplePipeline.tone_map_latents",parameters:[{name:"latents",val:": Tensor"},{name:"compression",val:": float"}],parametersDescription:[{anchor:"diffusers.LTX2LatentUpsamplePipeline.tone_map_latents.latents",description:`<strong>latents</strong> — torch.Tensor | |
| Input latent tensor with arbitrary shape. Expected to be roughly in [-1, 1] or [0, 1] range.`,name:"latents"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.tone_map_latents.compression",description:`<strong>compression</strong> — float | |
| Compression strength in the range [0, 1]. | |
| <ul> | |
| <li>0.0: No tone-mapping (identity transform)</li> | |
| <li>1.0: Full compression effect</li> | |
| </ul>`,name:"compression"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_ltx2_latent_upsample.py#L196",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>torch.Tensor | |
| The tone-mapped latent tensor of the same shape as input.</p> | |
| `}}),Oe=new x({props:{title:"LTX2PipelineOutput",local:"diffusers.pipelines.ltx2.pipeline_output.LTX2PipelineOutput",headingTag:"h2"}}),et=new v({props:{name:"class diffusers.pipelines.ltx2.pipeline_output.LTX2PipelineOutput",anchor:"diffusers.pipelines.ltx2.pipeline_output.LTX2PipelineOutput",parameters:[{name:"frames",val:": Tensor"},{name:"audio",val:": Tensor"}],parametersDescription:[{anchor:"diffusers.pipelines.ltx2.pipeline_output.LTX2PipelineOutput.frames",description:`<strong>frames</strong> (<code>torch.Tensor</code>, <code>np.ndarray</code>, or list[list[PIL.Image.Image]]) — | |
| List of video outputs - It can be a nested list of length <code>batch_size,</code> with each sub-list containing | |
| denoised PIL image sequences of length <code>num_frames.</code> It can also be a NumPy array or Torch tensor of shape | |
| <code>(batch_size, num_frames, channels, height, width)</code>.`,name:"frames"},{anchor:"diffusers.pipelines.ltx2.pipeline_output.LTX2PipelineOutput.audio",description:`<strong>audio</strong> (<code>torch.Tensor</code>, <code>np.ndarray</code>) — | |
| TODO`,name:"audio"}],source:"https://github.com/huggingface/diffusers/blob/vr_13893/src/diffusers/pipelines/ltx2/pipeline_output.py#L9"}}),tt=new wa({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/ltx2.md"}}),{c(){M=l("meta"),I=a(),y=l("p"),_=a(),c(T.$$.fragment),r=a(),w=l("div"),w.innerHTML=Zs,Lt=a(),oe=l("p"),oe.innerHTML=Gs,Ft=a(),le=l("p"),le.innerHTML=Is,Et=a(),ie=l("p"),ie.innerHTML=Xs,Qt=a(),c(de.$$.fragment),Yt=a(),re=l("p"),re.textContent=Bs,St=a(),pe=l("ul"),pe.innerHTML=Cs,At=a(),ce=l("p"),ce.textContent=Ws,zt=a(),c(me.$$.fragment),Ht=a(),c(ue.$$.fragment),Pt=a(),he=l("p"),he.textContent=xs,$t=a(),c(ge.$$.fragment),Dt=a(),c(fe.$$.fragment),qt=a(),Me=l("p"),Me.innerHTML=Vs,Kt=a(),c(_e.$$.fragment),Ot=a(),Te=l("p"),Te.textContent=ks,en=a(),c(ye.$$.fragment),tn=a(),Je=l("p"),Je.textContent=Rs,nn=a(),c(we.$$.fragment),sn=a(),be=l("p"),be.textContent=Ns,an=a(),Ue=l("ol"),Ue.innerHTML=Ls,on=a(),je=l("p"),je.innerHTML=Fs,ln=a(),c(ve.$$.fragment),dn=a(),c(Ze.$$.fragment),rn=a(),Ge=l("p"),Ge.innerHTML=Es,pn=a(),c(Ie.$$.fragment),cn=a(),c(Xe.$$.fragment),mn=a(),G=l("div"),c(Be.$$.fragment),Wn=a(),at=l("p"),at.textContent=Qs,xn=a(),ot=l("p"),ot.innerHTML=Ys,Vn=a(),k=l("div"),c(Ce.$$.fragment),kn=a(),lt=l("p"),lt.textContent=Ss,Rn=a(),c(z.$$.fragment),Nn=a(),H=l("div"),c(We.$$.fragment),Ln=a(),it=l("p"),it.textContent=As,Fn=a(),P=l("div"),c(xe.$$.fragment),En=a(),dt=l("p"),dt.innerHTML=zs,un=a(),c(Ve.$$.fragment),hn=a(),Z=l("div"),c(ke.$$.fragment),Qn=a(),rt=l("p"),rt.textContent=Hs,Yn=a(),pt=l("p"),pt.innerHTML=Ps,Sn=a(),ct=l("p"),ct.textContent=$s,An=a(),R=l("div"),c(Re.$$.fragment),zn=a(),mt=l("p"),mt.textContent=Ds,Hn=a(),c($.$$.fragment),Pn=a(),D=l("div"),c(Ne.$$.fragment),$n=a(),ut=l("p"),ut.textContent=qs,Dn=a(),q=l("div"),c(Le.$$.fragment),qn=a(),ht=l("p"),ht.innerHTML=Ks,gn=a(),c(Fe.$$.fragment),fn=a(),J=l("div"),c(Ee.$$.fragment),Kn=a(),gt=l("p"),gt.textContent=Os,On=a(),ft=l("p"),ft.innerHTML=ea,es=a(),Mt=l("p"),Mt.textContent=ta,ts=a(),N=l("div"),c(Qe.$$.fragment),ns=a(),_t=l("p"),_t.textContent=na,ss=a(),c(K.$$.fragment),as=a(),L=l("div"),c(Ye.$$.fragment),os=a(),Tt=l("p"),Tt.textContent=sa,ls=a(),yt=l("p"),yt.innerHTML=aa,is=a(),O=l("div"),c(Se.$$.fragment),ds=a(),Jt=l("p"),Jt.textContent=oa,rs=a(),B=l("div"),c(Ae.$$.fragment),ps=a(),wt=l("p"),wt.textContent=la,cs=a(),bt=l("p"),bt.innerHTML=ia,ms=a(),Ut=l("p"),Ut.textContent=da,us=a(),jt=l("ul"),jt.innerHTML=ra,hs=a(),ee=l("div"),c(ze.$$.fragment),gs=a(),vt=l("p"),vt.textContent=pa,fs=a(),te=l("div"),c(He.$$.fragment),Ms=a(),Zt=l("p"),Zt.textContent=ca,Mn=a(),c(Pe.$$.fragment),_n=a(),W=l("div"),c($e.$$.fragment),_s=a(),F=l("div"),c(De.$$.fragment),Ts=a(),Gt=l("p"),Gt.textContent=ma,ys=a(),c(ne.$$.fragment),Js=a(),se=l("div"),c(qe.$$.fragment),ws=a(),It=l("p"),It.textContent=ua,bs=a(),E=l("div"),c(Ke.$$.fragment),Us=a(),Xt=l("p"),Xt.textContent=ha,js=a(),Bt=l("p"),Bt.innerHTML=ga,Tn=a(),c(Oe.$$.fragment),yn=a(),S=l("div"),c(et.$$.fragment),vs=a(),Ct=l("p"),Ct.textContent=fa,Jn=a(),c(tt.$$.fragment),wn=a(),Nt=l("p"),this.h()},l(e){const s=Ja("svelte-u9bgzb",document.head);M=i(s,"META",{name:!0,content:!0}),s.forEach(t),I=o(e),y=i(e,"P",{}),U(y).forEach(t),_=o(e),m(T.$$.fragment,e),r=o(e),w=i(e,"DIV",{class:!0,"data-svelte-h":!0}),p(w)!=="svelte-si9ct8"&&(w.innerHTML=Zs),Lt=o(e),oe=i(e,"P",{"data-svelte-h":!0}),p(oe)!=="svelte-3vo67t"&&(oe.innerHTML=Gs),Ft=o(e),le=i(e,"P",{"data-svelte-h":!0}),p(le)!=="svelte-1134kk7"&&(le.innerHTML=Is),Et=o(e),ie=i(e,"P",{"data-svelte-h":!0}),p(ie)!=="svelte-qw2r3a"&&(ie.innerHTML=Xs),Qt=o(e),m(de.$$.fragment,e),Yt=o(e),re=i(e,"P",{"data-svelte-h":!0}),p(re)!=="svelte-7k8pa1"&&(re.textContent=Bs),St=o(e),pe=i(e,"UL",{"data-svelte-h":!0}),p(pe)!=="svelte-12js9fh"&&(pe.innerHTML=Cs),At=o(e),ce=i(e,"P",{"data-svelte-h":!0}),p(ce)!=="svelte-v5oo8i"&&(ce.textContent=Ws),zt=o(e),m(me.$$.fragment,e),Ht=o(e),m(ue.$$.fragment,e),Pt=o(e),he=i(e,"P",{"data-svelte-h":!0}),p(he)!=="svelte-71u96j"&&(he.textContent=xs),$t=o(e),m(ge.$$.fragment,e),Dt=o(e),m(fe.$$.fragment,e),qt=o(e),Me=i(e,"P",{"data-svelte-h":!0}),p(Me)!=="svelte-rabflj"&&(Me.innerHTML=Vs),Kt=o(e),m(_e.$$.fragment,e),Ot=o(e),Te=i(e,"P",{"data-svelte-h":!0}),p(Te)!=="svelte-1nr79xi"&&(Te.textContent=ks),en=o(e),m(ye.$$.fragment,e),tn=o(e),Je=i(e,"P",{"data-svelte-h":!0}),p(Je)!=="svelte-q9sktm"&&(Je.textContent=Rs),nn=o(e),m(we.$$.fragment,e),sn=o(e),be=i(e,"P",{"data-svelte-h":!0}),p(be)!=="svelte-mni5wh"&&(be.textContent=Ns),an=o(e),Ue=i(e,"OL",{"data-svelte-h":!0}),p(Ue)!=="svelte-azzl8m"&&(Ue.innerHTML=Ls),on=o(e),je=i(e,"P",{"data-svelte-h":!0}),p(je)!=="svelte-1k5mdn5"&&(je.innerHTML=Fs),ln=o(e),m(ve.$$.fragment,e),dn=o(e),m(Ze.$$.fragment,e),rn=o(e),Ge=i(e,"P",{"data-svelte-h":!0}),p(Ge)!=="svelte-iec67l"&&(Ge.innerHTML=Es),pn=o(e),m(Ie.$$.fragment,e),cn=o(e),m(Xe.$$.fragment,e),mn=o(e),G=i(e,"DIV",{class:!0});var X=U(G);m(Be.$$.fragment,X),Wn=o(X),at=i(X,"P",{"data-svelte-h":!0}),p(at)!=="svelte-19ipoo4"&&(at.textContent=Qs),xn=o(X),ot=i(X,"P",{"data-svelte-h":!0}),p(ot)!=="svelte-1sr6eg8"&&(ot.innerHTML=Ys),Vn=o(X),k=i(X,"DIV",{class:!0});var A=U(k);m(Ce.$$.fragment,A),kn=o(A),lt=i(A,"P",{"data-svelte-h":!0}),p(lt)!=="svelte-v78lg8"&&(lt.textContent=Ss),Rn=o(A),m(z.$$.fragment,A),A.forEach(t),Nn=o(X),H=i(X,"DIV",{class:!0});var nt=U(H);m(We.$$.fragment,nt),Ln=o(nt),it=i(nt,"P",{"data-svelte-h":!0}),p(it)!=="svelte-16q0ax1"&&(it.textContent=As),nt.forEach(t),Fn=o(X),P=i(X,"DIV",{class:!0});var st=U(P);m(xe.$$.fragment,st),En=o(st),dt=i(st,"P",{"data-svelte-h":!0}),p(dt)!=="svelte-ycu0kg"&&(dt.innerHTML=zs),st.forEach(t),X.forEach(t),un=o(e),m(Ve.$$.fragment,e),hn=o(e),Z=i(e,"DIV",{class:!0});var C=U(Z);m(ke.$$.fragment,C),Qn=o(C),rt=i(C,"P",{"data-svelte-h":!0}),p(rt)!=="svelte-10tczlw"&&(rt.textContent=Hs),Yn=o(C),pt=i(C,"P",{"data-svelte-h":!0}),p(pt)!=="svelte-1sr6eg8"&&(pt.innerHTML=Ps),Sn=o(C),ct=i(C,"P",{"data-svelte-h":!0}),p(ct)!=="svelte-1yyqpus"&&(ct.textContent=$s),An=o(C),R=i(C,"DIV",{class:!0});var Wt=U(R);m(Re.$$.fragment,Wt),zn=o(Wt),mt=i(Wt,"P",{"data-svelte-h":!0}),p(mt)!=="svelte-v78lg8"&&(mt.textContent=Ds),Hn=o(Wt),m($.$$.fragment,Wt),Wt.forEach(t),Pn=o(C),D=i(C,"DIV",{class:!0});var Un=U(D);m(Ne.$$.fragment,Un),$n=o(Un),ut=i(Un,"P",{"data-svelte-h":!0}),p(ut)!=="svelte-16q0ax1"&&(ut.textContent=qs),Un.forEach(t),Dn=o(C),q=i(C,"DIV",{class:!0});var jn=U(q);m(Le.$$.fragment,jn),qn=o(jn),ht=i(jn,"P",{"data-svelte-h":!0}),p(ht)!=="svelte-ycu0kg"&&(ht.innerHTML=Ks),jn.forEach(t),C.forEach(t),gn=o(e),m(Fe.$$.fragment,e),fn=o(e),J=i(e,"DIV",{class:!0});var j=U(J);m(Ee.$$.fragment,j),Kn=o(j),gt=i(j,"P",{"data-svelte-h":!0}),p(gt)!=="svelte-8bvrgw"&&(gt.textContent=Os),On=o(j),ft=i(j,"P",{"data-svelte-h":!0}),p(ft)!=="svelte-1sr6eg8"&&(ft.innerHTML=ea),es=o(j),Mt=i(j,"P",{"data-svelte-h":!0}),p(Mt)!=="svelte-1yyqpus"&&(Mt.textContent=ta),ts=o(j),N=i(j,"DIV",{class:!0});var xt=U(N);m(Qe.$$.fragment,xt),ns=o(xt),_t=i(xt,"P",{"data-svelte-h":!0}),p(_t)!=="svelte-v78lg8"&&(_t.textContent=na),ss=o(xt),m(K.$$.fragment,xt),xt.forEach(t),as=o(j),L=i(j,"DIV",{class:!0});var Vt=U(L);m(Ye.$$.fragment,Vt),os=o(Vt),Tt=i(Vt,"P",{"data-svelte-h":!0}),p(Tt)!=="svelte-1ykv0p4"&&(Tt.textContent=sa),ls=o(Vt),yt=i(Vt,"P",{"data-svelte-h":!0}),p(yt)!=="svelte-gkp5jx"&&(yt.innerHTML=aa),Vt.forEach(t),is=o(j),O=i(j,"DIV",{class:!0});var vn=U(O);m(Se.$$.fragment,vn),ds=o(vn),Jt=i(vn,"P",{"data-svelte-h":!0}),p(Jt)!=="svelte-16q0ax1"&&(Jt.textContent=oa),vn.forEach(t),rs=o(j),B=i(j,"DIV",{class:!0});var Q=U(B);m(Ae.$$.fragment,Q),ps=o(Q),wt=i(Q,"P",{"data-svelte-h":!0}),p(wt)!=="svelte-14h95ik"&&(wt.textContent=la),cs=o(Q),bt=i(Q,"P",{"data-svelte-h":!0}),p(bt)!=="svelte-4xxbdz"&&(bt.innerHTML=ia),ms=o(Q),Ut=i(Q,"P",{"data-svelte-h":!0}),p(Ut)!=="svelte-5nbc7d"&&(Ut.textContent=da),us=o(Q),jt=i(Q,"UL",{"data-svelte-h":!0}),p(jt)!=="svelte-14hshfo"&&(jt.innerHTML=ra),Q.forEach(t),hs=o(j),ee=i(j,"DIV",{class:!0});var Zn=U(ee);m(ze.$$.fragment,Zn),gs=o(Zn),vt=i(Zn,"P",{"data-svelte-h":!0}),p(vt)!=="svelte-176er7i"&&(vt.textContent=pa),Zn.forEach(t),fs=o(j),te=i(j,"DIV",{class:!0});var Gn=U(te);m(He.$$.fragment,Gn),Ms=o(Gn),Zt=i(Gn,"P",{"data-svelte-h":!0}),p(Zt)!=="svelte-1eod455"&&(Zt.textContent=ca),Gn.forEach(t),j.forEach(t),Mn=o(e),m(Pe.$$.fragment,e),_n=o(e),W=i(e,"DIV",{class:!0});var ae=U(W);m($e.$$.fragment,ae),_s=o(ae),F=i(ae,"DIV",{class:!0});var kt=U(F);m(De.$$.fragment,kt),Ts=o(kt),Gt=i(kt,"P",{"data-svelte-h":!0}),p(Gt)!=="svelte-v78lg8"&&(Gt.textContent=ma),ys=o(kt),m(ne.$$.fragment,kt),kt.forEach(t),Js=o(ae),se=i(ae,"DIV",{class:!0});var In=U(se);m(qe.$$.fragment,In),ws=o(In),It=i(In,"P",{"data-svelte-h":!0}),p(It)!=="svelte-tr32vd"&&(It.textContent=ua),In.forEach(t),bs=o(ae),E=i(ae,"DIV",{class:!0});var Rt=U(E);m(Ke.$$.fragment,Rt),Us=o(Rt),Xt=i(Rt,"P",{"data-svelte-h":!0}),p(Xt)!=="svelte-1p9wfz7"&&(Xt.textContent=ha),js=o(Rt),Bt=i(Rt,"P",{"data-svelte-h":!0}),p(Bt)!=="svelte-7cxa61"&&(Bt.innerHTML=ga),Rt.forEach(t),ae.forEach(t),Tn=o(e),m(Oe.$$.fragment,e),yn=o(e),S=i(e,"DIV",{class:!0});var Xn=U(S);m(et.$$.fragment,Xn),vs=o(Xn),Ct=i(Xn,"P",{"data-svelte-h":!0}),p(Ct)!=="svelte-ia4jjd"&&(Ct.textContent=fa),Xn.forEach(t),Jn=o(e),m(tt.$$.fragment,e),wn=o(e),Nt=i(e,"P",{}),U(Nt).forEach(t),this.h()},h(){b(M,"name","hf:doc:metadata"),b(M,"content",Ga),b(w,"class","flex flex-wrap space-x-1"),b(k,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(H,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(Z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(O,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(ee,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(te,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(J,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(F,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(se,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(E,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,s){n(document.head,M),d(e,I,s),d(e,y,s),d(e,_,s),u(T,e,s),d(e,r,s),d(e,w,s),d(e,Lt,s),d(e,oe,s),d(e,Ft,s),d(e,le,s),d(e,Et,s),d(e,ie,s),d(e,Qt,s),u(de,e,s),d(e,Yt,s),d(e,re,s),d(e,St,s),d(e,pe,s),d(e,At,s),d(e,ce,s),d(e,zt,s),u(me,e,s),d(e,Ht,s),u(ue,e,s),d(e,Pt,s),d(e,he,s),d(e,$t,s),u(ge,e,s),d(e,Dt,s),u(fe,e,s),d(e,qt,s),d(e,Me,s),d(e,Kt,s),u(_e,e,s),d(e,Ot,s),d(e,Te,s),d(e,en,s),u(ye,e,s),d(e,tn,s),d(e,Je,s),d(e,nn,s),u(we,e,s),d(e,sn,s),d(e,be,s),d(e,an,s),d(e,Ue,s),d(e,on,s),d(e,je,s),d(e,ln,s),u(ve,e,s),d(e,dn,s),u(Ze,e,s),d(e,rn,s),d(e,Ge,s),d(e,pn,s),u(Ie,e,s),d(e,cn,s),u(Xe,e,s),d(e,mn,s),d(e,G,s),u(Be,G,null),n(G,Wn),n(G,at),n(G,xn),n(G,ot),n(G,Vn),n(G,k),u(Ce,k,null),n(k,kn),n(k,lt),n(k,Rn),u(z,k,null),n(G,Nn),n(G,H),u(We,H,null),n(H,Ln),n(H,it),n(G,Fn),n(G,P),u(xe,P,null),n(P,En),n(P,dt),d(e,un,s),u(Ve,e,s),d(e,hn,s),d(e,Z,s),u(ke,Z,null),n(Z,Qn),n(Z,rt),n(Z,Yn),n(Z,pt),n(Z,Sn),n(Z,ct),n(Z,An),n(Z,R),u(Re,R,null),n(R,zn),n(R,mt),n(R,Hn),u($,R,null),n(Z,Pn),n(Z,D),u(Ne,D,null),n(D,$n),n(D,ut),n(Z,Dn),n(Z,q),u(Le,q,null),n(q,qn),n(q,ht),d(e,gn,s),u(Fe,e,s),d(e,fn,s),d(e,J,s),u(Ee,J,null),n(J,Kn),n(J,gt),n(J,On),n(J,ft),n(J,es),n(J,Mt),n(J,ts),n(J,N),u(Qe,N,null),n(N,ns),n(N,_t),n(N,ss),u(K,N,null),n(J,as),n(J,L),u(Ye,L,null),n(L,os),n(L,Tt),n(L,ls),n(L,yt),n(J,is),n(J,O),u(Se,O,null),n(O,ds),n(O,Jt),n(J,rs),n(J,B),u(Ae,B,null),n(B,ps),n(B,wt),n(B,cs),n(B,bt),n(B,ms),n(B,Ut),n(B,us),n(B,jt),n(J,hs),n(J,ee),u(ze,ee,null),n(ee,gs),n(ee,vt),n(J,fs),n(J,te),u(He,te,null),n(te,Ms),n(te,Zt),d(e,Mn,s),u(Pe,e,s),d(e,_n,s),d(e,W,s),u($e,W,null),n(W,_s),n(W,F),u(De,F,null),n(F,Ts),n(F,Gt),n(F,ys),u(ne,F,null),n(W,Js),n(W,se),u(qe,se,null),n(se,ws),n(se,It),n(W,bs),n(W,E),u(Ke,E,null),n(E,Us),n(E,Xt),n(E,js),n(E,Bt),d(e,Tn,s),u(Oe,e,s),d(e,yn,s),d(e,S,s),u(et,S,null),n(S,vs),n(S,Ct),d(e,Jn,s),u(tt,e,s),d(e,wn,s),d(e,Nt,s),bn=!0},p(e,[s]){const X={};s&2&&(X.$$scope={dirty:s,ctx:e}),z.$set(X);const A={};s&2&&(A.$$scope={dirty:s,ctx:e}),$.$set(A);const nt={};s&2&&(nt.$$scope={dirty:s,ctx:e}),K.$set(nt);const st={};s&2&&(st.$$scope={dirty:s,ctx:e}),ne.$set(st)},i(e){bn||(h(T.$$.fragment,e),h(de.$$.fragment,e),h(me.$$.fragment,e),h(ue.$$.fragment,e),h(ge.$$.fragment,e),h(fe.$$.fragment,e),h(_e.$$.fragment,e),h(ye.$$.fragment,e),h(we.$$.fragment,e),h(ve.$$.fragment,e),h(Ze.$$.fragment,e),h(Ie.$$.fragment,e),h(Xe.$$.fragment,e),h(Be.$$.fragment,e),h(Ce.$$.fragment,e),h(z.$$.fragment,e),h(We.$$.fragment,e),h(xe.$$.fragment,e),h(Ve.$$.fragment,e),h(ke.$$.fragment,e),h(Re.$$.fragment,e),h($.$$.fragment,e),h(Ne.$$.fragment,e),h(Le.$$.fragment,e),h(Fe.$$.fragment,e),h(Ee.$$.fragment,e),h(Qe.$$.fragment,e),h(K.$$.fragment,e),h(Ye.$$.fragment,e),h(Se.$$.fragment,e),h(Ae.$$.fragment,e),h(ze.$$.fragment,e),h(He.$$.fragment,e),h(Pe.$$.fragment,e),h($e.$$.fragment,e),h(De.$$.fragment,e),h(ne.$$.fragment,e),h(qe.$$.fragment,e),h(Ke.$$.fragment,e),h(Oe.$$.fragment,e),h(et.$$.fragment,e),h(tt.$$.fragment,e),bn=!0)},o(e){g(T.$$.fragment,e),g(de.$$.fragment,e),g(me.$$.fragment,e),g(ue.$$.fragment,e),g(ge.$$.fragment,e),g(fe.$$.fragment,e),g(_e.$$.fragment,e),g(ye.$$.fragment,e),g(we.$$.fragment,e),g(ve.$$.fragment,e),g(Ze.$$.fragment,e),g(Ie.$$.fragment,e),g(Xe.$$.fragment,e),g(Be.$$.fragment,e),g(Ce.$$.fragment,e),g(z.$$.fragment,e),g(We.$$.fragment,e),g(xe.$$.fragment,e),g(Ve.$$.fragment,e),g(ke.$$.fragment,e),g(Re.$$.fragment,e),g($.$$.fragment,e),g(Ne.$$.fragment,e),g(Le.$$.fragment,e),g(Fe.$$.fragment,e),g(Ee.$$.fragment,e),g(Qe.$$.fragment,e),g(K.$$.fragment,e),g(Ye.$$.fragment,e),g(Se.$$.fragment,e),g(Ae.$$.fragment,e),g(ze.$$.fragment,e),g(He.$$.fragment,e),g(Pe.$$.fragment,e),g($e.$$.fragment,e),g(De.$$.fragment,e),g(ne.$$.fragment,e),g(qe.$$.fragment,e),g(Ke.$$.fragment,e),g(Oe.$$.fragment,e),g(et.$$.fragment,e),g(tt.$$.fragment,e),bn=!1},d(e){e&&(t(I),t(y),t(_),t(r),t(w),t(Lt),t(oe),t(Ft),t(le),t(Et),t(ie),t(Qt),t(Yt),t(re),t(St),t(pe),t(At),t(ce),t(zt),t(Ht),t(Pt),t(he),t($t),t(Dt),t(qt),t(Me),t(Kt),t(Ot),t(Te),t(en),t(tn),t(Je),t(nn),t(sn),t(be),t(an),t(Ue),t(on),t(je),t(ln),t(dn),t(rn),t(Ge),t(pn),t(cn),t(mn),t(G),t(un),t(hn),t(Z),t(gn),t(fn),t(J),t(Mn),t(_n),t(W),t(Tn),t(yn),t(S),t(Jn),t(wn),t(Nt)),t(M),f(T,e),f(de,e),f(me,e),f(ue,e),f(ge,e),f(fe,e),f(_e,e),f(ye,e),f(we,e),f(ve,e),f(Ze,e),f(Ie,e),f(Xe,e),f(Be),f(Ce),f(z),f(We),f(xe),f(Ve,e),f(ke),f(Re),f($),f(Ne),f(Le),f(Fe,e),f(Ee),f(Qe),f(K),f(Ye),f(Se),f(Ae),f(ze),f(He),f(Pe,e),f($e),f(De),f(ne),f(qe),f(Ke),f(Oe,e),f(et),f(tt,e)}}}const Ga='{"title":"LTX-2","local":"ltx-2","sections":[{"title":"Two-stages Generation","local":"two-stages-generation","sections":[],"depth":2},{"title":"Distilled checkpoint generation","local":"distilled-checkpoint-generation","sections":[],"depth":2},{"title":"Condition Pipeline Generation","local":"condition-pipeline-generation","sections":[],"depth":2},{"title":"Multimodal Guidance","local":"multimodal-guidance","sections":[],"depth":2},{"title":"Prompt Enhancement","local":"prompt-enhancement","sections":[],"depth":2},{"title":"LTX2Pipeline","local":"diffusers.LTX2Pipeline","sections":[],"depth":2},{"title":"LTX2ImageToVideoPipeline","local":"diffusers.LTX2ImageToVideoPipeline","sections":[],"depth":2},{"title":"LTX2ConditionPipeline","local":"diffusers.LTX2ConditionPipeline","sections":[],"depth":2},{"title":"LTX2LatentUpsamplePipeline","local":"diffusers.LTX2LatentUpsamplePipeline","sections":[],"depth":2},{"title":"LTX2PipelineOutput","local":"diffusers.pipelines.ltx2.pipeline_output.LTX2PipelineOutput","sections":[],"depth":2}],"depth":1}';function Ia(V){return _a(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ka extends Ta{constructor(M){super(),ya(this,M,Ia,Za,Ma,{})}}export{ka as component}; | |
Xet Storage Details
- Size:
- 202 kB
- Xet hash:
- 7a9c6d87332c6ff6e1224f5c831f451ea0b93acfe7a8b3805457088a915dea9a
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.