Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / diffusers /pr_13370 /en /_app /immutable /nodes /169.6d4d5dfd.js

rtrm's picture

3 months ago

197 kB

	import{s as Ks,o as Os,n as Un}from"../chunks/scheduler.53228c21.js";import{S as ea,i as ta,e as i,s,c as p,h as na,a as d,d as t,b as a,f as U,g as c,j as g,k as b,l as o,m as l,n as m,t as u,o as h,p as M}from"../chunks/index.100fac89.js";import{D as v}from"../chunks/Docstring.b1c56063.js";import{C as E}from"../chunks/CodeBlock.d30a6509.js";import{E as bn}from"../chunks/ExampleCodeBlock.fa72d7c3.js";import{H as W,E as sa}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.78c624f7.js";function aa(C){let f,I="Examples:",y,_,T;return _=new E({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYMlBpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMubHR4Mi5leHBvcnRfdXRpbHMlMjBpbXBvcnQlMjBlbmNvZGVfdmlkZW8lMEElMEFwaXBlJTIwJTNEJTIwTFRYMlBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJMaWdodHJpY2tzJTJGTFRYLTIlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQXBpcGUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKCklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJBJTIwd29tYW4lMjB3aXRoJTIwbG9uZyUyMGJyb3duJTIwaGFpciUyMGFuZCUyMGxpZ2h0JTIwc2tpbiUyMHNtaWxlcyUyMGF0JTIwYW5vdGhlciUyMHdvbWFuJTIwd2l0aCUyMGxvbmclMjBibG9uZGUlMjBoYWlyLiUyMFRoZSUyMHdvbWFuJTIwd2l0aCUyMGJyb3duJTIwaGFpciUyMHdlYXJzJTIwYSUyMGJsYWNrJTIwamFja2V0JTIwYW5kJTIwaGFzJTIwYSUyMHNtYWxsJTJDJTIwYmFyZWx5JTIwbm90aWNlYWJsZSUyMG1vbGUlMjBvbiUyMGhlciUyMHJpZ2h0JTIwY2hlZWsuJTIwVGhlJTIwY2FtZXJhJTIwYW5nbGUlMjBpcyUyMGElMjBjbG9zZS11cCUyQyUyMGZvY3VzZWQlMjBvbiUyMHRoZSUyMHdvbWFuJTIwd2l0aCUyMGJyb3duJTIwaGFpcidzJTIwZmFjZS4lMjBUaGUlMjBsaWdodGluZyUyMGlzJTIwd2FybSUyMGFuZCUyMG5hdHVyYWwlMkMlMjBsaWtlbHklMjBmcm9tJTIwdGhlJTIwc2V0dGluZyUyMHN1biUyQyUyMGNhc3RpbmclMjBhJTIwc29mdCUyMGdsb3clMjBvbiUyMHRoZSUyMHNjZW5lLiUyMFRoZSUyMHNjZW5lJTIwYXBwZWFycyUyMHRvJTIwYmUlMjByZWFsLWxpZmUlMjBmb290YWdlJTIyJTBBbmVnYXRpdmVfcHJvbXB0JTIwJTNEJTIwJTIyd29yc3QlMjBxdWFsaXR5JTJDJTIwaW5jb25zaXN0ZW50JTIwbW90aW9uJTJDJTIwYmx1cnJ5JTJDJTIwaml0dGVyeSUyQyUyMGRpc3RvcnRlZCUyMiUwQSUwQWZyYW1lX3JhdGUlMjAlM0QlMjAyNC4wJTBBdmlkZW8lMkMlMjBhdWRpbyUyMCUzRCUyMHBpcGUoJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbmVnYXRpdmVfcHJvbXB0JTNEbmVnYXRpdmVfcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwd2lkdGglM0Q3NjglMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0Q1MTIlMkMlMEElMjAlMjAlMjAlMjBudW1fZnJhbWVzJTNEMTIxJTJDJTBBJTIwJTIwJTIwJTIwZnJhbWVfcmF0ZSUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNENDAlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDQuMCUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF90eXBlJTNEJTIybnAlMjIlMkMlMEElMjAlMjAlMjAlMjByZXR1cm5fZGljdCUzREZhbHNlJTJDJTBBKSUwQSUwQWVuY29kZV92aWRlbyglMEElMjAlMjAlMjAlMjB2aWRlbyU1QjAlNUQlMkMlMEElMjAlMjAlMjAlMjBmcHMlM0RmcmFtZV9yYXRlJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW8lM0RhdWRpbyU1QjAlNUQuZmxvYXQoKS5jcHUoKSUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX3NhbXBsZV9yYXRlJTNEcGlwZS52b2NvZGVyLmNvbmZpZy5vdXRwdXRfc2FtcGxpbmdfcmF0ZSUyQyUyMCUyMCUyMyUyMHNob3VsZCUyMGJlJTIwMjQwMDAlMEElMjAlMjAlMjAlMjBvdXRwdXRfcGF0aCUzRCUyMnZpZGVvLm1wNCUyMiUyQyUwQSk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2Pipeline
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.export_utils <span class="hljs-keyword">import</span> encode_video

	<span class="hljs-meta">>>> </span>pipe = LTX2Pipeline.from_pretrained(<span class="hljs-string">"Lightricks/LTX-2"</span>, torch_dtype=torch.bfloat16)
	<span class="hljs-meta">>>> </span>pipe.enable_model_cpu_offload()

	<span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage"</span>
	<span class="hljs-meta">>>> </span>negative_prompt = <span class="hljs-string">"worst quality, inconsistent motion, blurry, jittery, distorted"</span>

	<span class="hljs-meta">>>> </span>frame_rate = <span class="hljs-number">24.0</span>
	<span class="hljs-meta">>>> </span>video, audio = pipe(
	<span class="hljs-meta">... </span> prompt=prompt,
	<span class="hljs-meta">... </span> negative_prompt=negative_prompt,
	<span class="hljs-meta">... </span> width=<span class="hljs-number">768</span>,
	<span class="hljs-meta">... </span> height=<span class="hljs-number">512</span>,
	<span class="hljs-meta">... </span> num_frames=<span class="hljs-number">121</span>,
	<span class="hljs-meta">... </span> frame_rate=frame_rate,
	<span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">40</span>,
	<span class="hljs-meta">... </span> guidance_scale=<span class="hljs-number">4.0</span>,
	<span class="hljs-meta">... </span> output_type=<span class="hljs-string">"np"</span>,
	<span class="hljs-meta">... </span> return_dict=<span class="hljs-literal">False</span>,
	<span class="hljs-meta">... </span>)

	<span class="hljs-meta">>>> </span>encode_video(
	<span class="hljs-meta">... </span> video[<span class="hljs-number">0</span>],
	<span class="hljs-meta">... </span> fps=frame_rate,
	<span class="hljs-meta">... </span> audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(),
	<span class="hljs-meta">... </span> audio_sample_rate=pipe.vocoder.config.output_sampling_rate, <span class="hljs-comment"># should be 24000</span>
	<span class="hljs-meta">... </span> output_path=<span class="hljs-string">"video.mp4"</span>,
	<span class="hljs-meta">... </span>)`,wrap:!1}}),{c(){f=i("p"),f.textContent=I,y=s(),p(_.$$.fragment)},l(r){f=d(r,"P",{"data-svelte-h":!0}),g(f)!=="svelte-kvfsh7"&&(f.textContent=I),y=a(r),c(_.$$.fragment,r)},m(r,w){l(r,f,w),l(r,y,w),m(_,r,w),T=!0},p:Un,i(r){T\|\|(u(_.$$.fragment,r),T=!0)},o(r){h(_.$$.fragment,r),T=!1},d(r){r&&(t(f),t(y)),M(_,r)}}}function oa(C){let f,I="Examples:",y,_,T;return _=new E({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYMkltYWdlVG9WaWRlb1BpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMubHR4Mi5leHBvcnRfdXRpbHMlMjBpbXBvcnQlMjBlbmNvZGVfdmlkZW8lMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwbG9hZF9pbWFnZSUwQSUwQXBpcGUlMjAlM0QlMjBMVFgySW1hZ2VUb1ZpZGVvUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUyMkxpZ2h0cmlja3MlMkZMVFgtMiUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYpJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQSUwQWltYWdlJTIwJTNEJTIwbG9hZF9pbWFnZSglMEElMjAlMjAlMjAlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZhLXItci1vLXclMkZ0aW55LW1lbWUtZGF0YXNldC1jYXB0aW9uZWQlMkZyZXNvbHZlJTJGbWFpbiUyRmltYWdlcyUyRjgucG5nJTIyJTBBKSUwQXByb21wdCUyMCUzRCUyMCUyMkElMjB5b3VuZyUyMGdpcmwlMjBzdGFuZHMlMjBjYWxtbHklMjBpbiUyMHRoZSUyMGZvcmVncm91bmQlMkMlMjBsb29raW5nJTIwZGlyZWN0bHklMjBhdCUyMHRoZSUyMGNhbWVyYSUyQyUyMGFzJTIwYSUyMGhvdXNlJTIwZmlyZSUyMHJhZ2VzJTIwaW4lMjB0aGUlMjBiYWNrZ3JvdW5kLiUyMiUwQW5lZ2F0aXZlX3Byb21wdCUyMCUzRCUyMCUyMndvcnN0JTIwcXVhbGl0eSUyQyUyMGluY29uc2lzdGVudCUyMG1vdGlvbiUyQyUyMGJsdXJyeSUyQyUyMGppdHRlcnklMkMlMjBkaXN0b3J0ZWQlMjIlMEElMEFmcmFtZV9yYXRlJTIwJTNEJTIwMjQuMCUwQXZpZGVvJTJDJTIwYXVkaW8lMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMGltYWdlJTNEaW1hZ2UlMkMlMEElMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBuZWdhdGl2ZV9wcm9tcHQlM0RuZWdhdGl2ZV9wcm9tcHQlMkMlMEElMjAlMjAlMjAlMjB3aWR0aCUzRDc2OCUyQyUwQSUyMCUyMCUyMCUyMGhlaWdodCUzRDUxMiUyQyUwQSUyMCUyMCUyMCUyMG51bV9mcmFtZXMlM0QxMjElMkMlMEElMjAlMjAlMjAlMjBmcmFtZV9yYXRlJTNEZnJhbWVfcmF0ZSUyQyUwQSUyMCUyMCUyMCUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0Q0MCUyQyUwQSUyMCUyMCUyMCUyMGd1aWRhbmNlX3NjYWxlJTNENC4wJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3R5cGUlM0QlMjJucCUyMiUyQyUwQSUyMCUyMCUyMCUyMHJldHVybl9kaWN0JTNERmFsc2UlMkMlMEEpJTBBJTBBZW5jb2RlX3ZpZGVvKCUwQSUyMCUyMCUyMCUyMHZpZGVvJTVCMCU1RCUyQyUwQSUyMCUyMCUyMCUyMGZwcyUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBhdWRpbyUzRGF1ZGlvJTVCMCU1RC5mbG9hdCgpLmNwdSgpJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fc2FtcGxlX3JhdGUlM0RwaXBlLnZvY29kZXIuY29uZmlnLm91dHB1dF9zYW1wbGluZ19yYXRlJTJDJTIwJTIwJTIzJTIwc2hvdWxkJTIwYmUlMjAyNDAwMCUwQSUyMCUyMCUyMCUyMG91dHB1dF9wYXRoJTNEJTIydmlkZW8ubXA0JTIyJTJDJTBBKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2ImageToVideoPipeline
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.export_utils <span class="hljs-keyword">import</span> encode_video
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image

	<span class="hljs-meta">>>> </span>pipe = LTX2ImageToVideoPipeline.from_pretrained(<span class="hljs-string">"Lightricks/LTX-2"</span>, torch_dtype=torch.bfloat16)
	<span class="hljs-meta">>>> </span>pipe.enable_model_cpu_offload()

	<span class="hljs-meta">>>> </span>image = load_image(
	<span class="hljs-meta">... </span> <span class="hljs-string">"https://huggingface.co/datasets/a-r-r-o-w/tiny-meme-dataset-captioned/resolve/main/images/8.png"</span>
	<span class="hljs-meta">... </span>)
	<span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"A young girl stands calmly in the foreground, looking directly at the camera, as a house fire rages in the background."</span>
	<span class="hljs-meta">>>> </span>negative_prompt = <span class="hljs-string">"worst quality, inconsistent motion, blurry, jittery, distorted"</span>

	<span class="hljs-meta">>>> </span>frame_rate = <span class="hljs-number">24.0</span>
	<span class="hljs-meta">>>> </span>video, audio = pipe(
	<span class="hljs-meta">... </span> image=image,
	<span class="hljs-meta">... </span> prompt=prompt,
	<span class="hljs-meta">... </span> negative_prompt=negative_prompt,
	<span class="hljs-meta">... </span> width=<span class="hljs-number">768</span>,
	<span class="hljs-meta">... </span> height=<span class="hljs-number">512</span>,
	<span class="hljs-meta">... </span> num_frames=<span class="hljs-number">121</span>,
	<span class="hljs-meta">... </span> frame_rate=frame_rate,
	<span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">40</span>,
	<span class="hljs-meta">... </span> guidance_scale=<span class="hljs-number">4.0</span>,
	<span class="hljs-meta">... </span> output_type=<span class="hljs-string">"np"</span>,
	<span class="hljs-meta">... </span> return_dict=<span class="hljs-literal">False</span>,
	<span class="hljs-meta">... </span>)

	<span class="hljs-meta">>>> </span>encode_video(
	<span class="hljs-meta">... </span> video[<span class="hljs-number">0</span>],
	<span class="hljs-meta">... </span> fps=frame_rate,
	<span class="hljs-meta">... </span> audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(),
	<span class="hljs-meta">... </span> audio_sample_rate=pipe.vocoder.config.output_sampling_rate, <span class="hljs-comment"># should be 24000</span>
	<span class="hljs-meta">... </span> output_path=<span class="hljs-string">"video.mp4"</span>,
	<span class="hljs-meta">... </span>)`,wrap:!1}}),{c(){f=i("p"),f.textContent=I,y=s(),p(_.$$.fragment)},l(r){f=d(r,"P",{"data-svelte-h":!0}),g(f)!=="svelte-kvfsh7"&&(f.textContent=I),y=a(r),c(_.$$.fragment,r)},m(r,w){l(r,f,w),l(r,y,w),m(_,r,w),T=!0},p:Un,i(r){T\|\|(u(_.$$.fragment,r),T=!0)},o(r){h(_.$$.fragment,r),T=!1},d(r){r&&(t(f),t(y)),M(_,r)}}}function la(C){let f,I="Examples:",y,_,T;return _=new E({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYMkNvbmRpdGlvblBpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMubHR4Mi5leHBvcnRfdXRpbHMlMjBpbXBvcnQlMjBlbmNvZGVfdmlkZW8lMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyLnBpcGVsaW5lX2x0eDJfY29uZGl0aW9uJTIwaW1wb3J0JTIwTFRYMlZpZGVvQ29uZGl0aW9uJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGxvYWRfaW1hZ2UlMEElMEFwaXBlJTIwJTNEJTIwTFRYMkNvbmRpdGlvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJMaWdodHJpY2tzJTJGTFRYLTIlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQXBpcGUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKCklMEElMEFmaXJzdF9pbWFnZSUyMCUzRCUyMGxvYWRfaW1hZ2UoJTBBJTIwJTIwJTIwJTIwJTIyaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmRhdGFzZXRzJTJGaHVnZ2luZ2ZhY2UlMkZkb2N1bWVudGF0aW9uLWltYWdlcyUyRnJlc29sdmUlMkZtYWluJTJGZGlmZnVzZXJzJTJGZmxmMnZfaW5wdXRfZmlyc3RfZnJhbWUucG5nJTIyJTBBKSUwQWxhc3RfaW1hZ2UlMjAlM0QlMjBsb2FkX2ltYWdlKCUwQSUyMCUyMCUyMCUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZkYXRhc2V0cyUyRmh1Z2dpbmdmYWNlJTJGZG9jdW1lbnRhdGlvbi1pbWFnZXMlMkZyZXNvbHZlJTJGbWFpbiUyRmRpZmZ1c2VycyUyRmZsZjJ2X2lucHV0X2xhc3RfZnJhbWUucG5nJTIyJTBBKSUwQWZpcnN0X2NvbmQlMjAlM0QlMjBMVFgyVmlkZW9Db25kaXRpb24oZnJhbWVzJTNEZmlyc3RfaW1hZ2UlMkMlMjBpbmRleCUzRDAlMkMlMjBzdHJlbmd0aCUzRDEuMCklMEFsYXN0X2NvbmQlMjAlM0QlMjBMVFgyVmlkZW9Db25kaXRpb24oZnJhbWVzJTNEbGFzdF9pbWFnZSUyQyUyMGluZGV4JTNELTElMkMlMjBzdHJlbmd0aCUzRDEuMCklMEFjb25kaXRpb25zJTIwJTNEJTIwJTVCZmlyc3RfY29uZCUyQyUyMGxhc3RfY29uZCU1RCUwQXByb21wdCUyMCUzRCUyMCUyMkNHJTIwYW5pbWF0aW9uJTIwc3R5bGUlMkMlMjBhJTIwc21hbGwlMjBibHVlJTIwYmlyZCUyMHRha2VzJTIwb2ZmJTIwZnJvbSUyMHRoZSUyMGdyb3VuZCUyQyUyMGZsYXBwaW5nJTIwaXRzJTIwd2luZ3MuJTIyJTBBbmVnYXRpdmVfcHJvbXB0JTIwJTNEJTIwJTIyd29yc3QlMjBxdWFsaXR5JTJDJTIwaW5jb25zaXN0ZW50JTIwbW90aW9uJTJDJTIwYmx1cnJ5JTJDJTIwaml0dGVyeSUyQyUyMGRpc3RvcnRlZCUyQyUyMHN0YXRpYyUyMiUwQSUwQWZyYW1lX3JhdGUlMjAlM0QlMjAyNC4wJTBBdmlkZW8lMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMGNvbmRpdGlvbnMlM0Rjb25kaXRpb25zJTJDJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbmVnYXRpdmVfcHJvbXB0JTNEbmVnYXRpdmVfcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwd2lkdGglM0Q3NjglMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0Q1MTIlMkMlMEElMjAlMjAlMjAlMjBudW1fZnJhbWVzJTNEMTIxJTJDJTBBJTIwJTIwJTIwJTIwZnJhbWVfcmF0ZSUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNENDAlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDQuMCUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF90eXBlJTNEJTIybnAlMjIlMkMlMEElMjAlMjAlMjAlMjByZXR1cm5fZGljdCUzREZhbHNlJTJDJTBBKSUwQXZpZGVvJTIwJTNEJTIwKHZpZGVvJTIwKiUyMDI1NSkucm91bmQoKS5hc3R5cGUoJTIydWludDglMjIpJTBBdmlkZW8lMjAlM0QlMjB0b3JjaC5mcm9tX251bXB5KHZpZGVvKSUwQSUwQWVuY29kZV92aWRlbyglMEElMjAlMjAlMjAlMjB2aWRlbyU1QjAlNUQlMkMlMEElMjAlMjAlMjAlMjBmcHMlM0RmcmFtZV9yYXRlJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW8lM0RhdWRpbyU1QjAlNUQuZmxvYXQoKS5jcHUoKSUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX3NhbXBsZV9yYXRlJTNEcGlwZS52b2NvZGVyLmNvbmZpZy5vdXRwdXRfc2FtcGxpbmdfcmF0ZSUyQyUyMCUyMCUyMyUyMHNob3VsZCUyMGJlJTIwMjQwMDAlMEElMjAlMjAlMjAlMjBvdXRwdXRfcGF0aCUzRCUyMnZpZGVvLm1wNCUyMiUyQyUwQSk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2ConditionPipeline
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.export_utils <span class="hljs-keyword">import</span> encode_video
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.pipeline_ltx2_condition <span class="hljs-keyword">import</span> LTX2VideoCondition
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image

	<span class="hljs-meta">>>> </span>pipe = LTX2ConditionPipeline.from_pretrained(<span class="hljs-string">"Lightricks/LTX-2"</span>, torch_dtype=torch.bfloat16)
	<span class="hljs-meta">>>> </span>pipe.enable_model_cpu_offload()

	<span class="hljs-meta">>>> </span>first_image = load_image(
	<span class="hljs-meta">... </span> <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_first_frame.png"</span>
	<span class="hljs-meta">... </span>)
	<span class="hljs-meta">>>> </span>last_image = load_image(
	<span class="hljs-meta">... </span> <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_last_frame.png"</span>
	<span class="hljs-meta">... </span>)
	<span class="hljs-meta">>>> </span>first_cond = LTX2VideoCondition(frames=first_image, index=<span class="hljs-number">0</span>, strength=<span class="hljs-number">1.0</span>)
	<span class="hljs-meta">>>> </span>last_cond = LTX2VideoCondition(frames=last_image, index=-<span class="hljs-number">1</span>, strength=<span class="hljs-number">1.0</span>)
	<span class="hljs-meta">>>> </span>conditions = [first_cond, last_cond]
	<span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"CG animation style, a small blue bird takes off from the ground, flapping its wings."</span>
	<span class="hljs-meta">>>> </span>negative_prompt = <span class="hljs-string">"worst quality, inconsistent motion, blurry, jittery, distorted, static"</span>

	<span class="hljs-meta">>>> </span>frame_rate = <span class="hljs-number">24.0</span>
	<span class="hljs-meta">>>> </span>video = pipe(
	<span class="hljs-meta">... </span> conditions=conditions,
	<span class="hljs-meta">... </span> prompt=prompt,
	<span class="hljs-meta">... </span> negative_prompt=negative_prompt,
	<span class="hljs-meta">... </span> width=<span class="hljs-number">768</span>,
	<span class="hljs-meta">... </span> height=<span class="hljs-number">512</span>,
	<span class="hljs-meta">... </span> num_frames=<span class="hljs-number">121</span>,
	<span class="hljs-meta">... </span> frame_rate=frame_rate,
	<span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">40</span>,
	<span class="hljs-meta">... </span> guidance_scale=<span class="hljs-number">4.0</span>,
	<span class="hljs-meta">... </span> output_type=<span class="hljs-string">"np"</span>,
	<span class="hljs-meta">... </span> return_dict=<span class="hljs-literal">False</span>,
	<span class="hljs-meta">... </span>)
	<span class="hljs-meta">>>> </span>video = (video * <span class="hljs-number">255</span>).<span class="hljs-built_in">round</span>().astype(<span class="hljs-string">"uint8"</span>)
	<span class="hljs-meta">>>> </span>video = torch.from_numpy(video)

	<span class="hljs-meta">>>> </span>encode_video(
	<span class="hljs-meta">... </span> video[<span class="hljs-number">0</span>],
	<span class="hljs-meta">... </span> fps=frame_rate,
	<span class="hljs-meta">... </span> audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(),
	<span class="hljs-meta">... </span> audio_sample_rate=pipe.vocoder.config.output_sampling_rate, <span class="hljs-comment"># should be 24000</span>
	<span class="hljs-meta">... </span> output_path=<span class="hljs-string">"video.mp4"</span>,
	<span class="hljs-meta">... </span>)`,wrap:!1}}),{c(){f=i("p"),f.textContent=I,y=s(),p(_.$$.fragment)},l(r){f=d(r,"P",{"data-svelte-h":!0}),g(f)!=="svelte-kvfsh7"&&(f.textContent=I),y=a(r),c(_.$$.fragment,r)},m(r,w){l(r,f,w),l(r,y,w),m(_,r,w),T=!0},p:Un,i(r){T\|\|(u(_.$$.fragment,r),T=!0)},o(r){h(_.$$.fragment,r),T=!1},d(r){r&&(t(f),t(y)),M(_,r)}}}function ia(C){let f,I="Examples:",y,_,T;return _=new E({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYMkltYWdlVG9WaWRlb1BpcGVsaW5lJTJDJTIwTFRYMkxhdGVudFVwc2FtcGxlUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyLmV4cG9ydF91dGlscyUyMGltcG9ydCUyMGVuY29kZV92aWRlbyUwQWZyb20lMjBkaWZmdXNlcnMucGlwZWxpbmVzLmx0eDIubGF0ZW50X3Vwc2FtcGxlciUyMGltcG9ydCUyMExUWDJMYXRlbnRVcHNhbXBsZXJNb2RlbCUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBsb2FkX2ltYWdlJTBBJTBBcGlwZSUyMCUzRCUyMExUWDJJbWFnZVRvVmlkZW9QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTIyTGlnaHRyaWNrcyUyRkxUWC0yJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBJTBBaW1hZ2UlMjAlM0QlMjBsb2FkX2ltYWdlKCUwQSUyMCUyMCUyMCUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZkYXRhc2V0cyUyRmEtci1yLW8tdyUyRnRpbnktbWVtZS1kYXRhc2V0LWNhcHRpb25lZCUyRnJlc29sdmUlMkZtYWluJTJGaW1hZ2VzJTJGOC5wbmclMjIlMEEpJTBBcHJvbXB0JTIwJTNEJTIwJTIyQSUyMHlvdW5nJTIwZ2lybCUyMHN0YW5kcyUyMGNhbG1seSUyMGluJTIwdGhlJTIwZm9yZWdyb3VuZCUyQyUyMGxvb2tpbmclMjBkaXJlY3RseSUyMGF0JTIwdGhlJTIwY2FtZXJhJTJDJTIwYXMlMjBhJTIwaG91c2UlMjBmaXJlJTIwcmFnZXMlMjBpbiUyMHRoZSUyMGJhY2tncm91bmQuJTIyJTBBbmVnYXRpdmVfcHJvbXB0JTIwJTNEJTIwJTIyd29yc3QlMjBxdWFsaXR5JTJDJTIwaW5jb25zaXN0ZW50JTIwbW90aW9uJTJDJTIwYmx1cnJ5JTJDJTIwaml0dGVyeSUyQyUyMGRpc3RvcnRlZCUyMiUwQSUwQWZyYW1lX3JhdGUlMjAlM0QlMjAyNC4wJTBBdmlkZW8lMkMlMjBhdWRpbyUyMCUzRCUyMHBpcGUoJTBBJTIwJTIwJTIwJTIwaW1hZ2UlM0RpbWFnZSUyQyUwQSUyMCUyMCUyMCUyMHByb21wdCUzRHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMG5lZ2F0aXZlX3Byb21wdCUzRG5lZ2F0aXZlX3Byb21wdCUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNENzY4JTJDJTBBJTIwJTIwJTIwJTIwaGVpZ2h0JTNENTEyJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2ZyYW1lcyUzRDEyMSUyQyUwQSUyMCUyMCUyMCUyMGZyYW1lX3JhdGUlM0RmcmFtZV9yYXRlJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDQwJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0Q0LjAlMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfdHlwZSUzRCUyMnBpbCUyMiUyQyUwQSUyMCUyMCUyMCUyMHJldHVybl9kaWN0JTNERmFsc2UlMkMlMEEpJTBBJTBBbGF0ZW50X3Vwc2FtcGxlciUyMCUzRCUyMExUWDJMYXRlbnRVcHNhbXBsZXJNb2RlbC5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyTGlnaHRyaWNrcyUyRkxUWC0yJTIyJTJDJTIwc3ViZm9sZGVyJTNEJTIybGF0ZW50X3Vwc2FtcGxlciUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEpJTBBdXBzYW1wbGVfcGlwZSUyMCUzRCUyMExUWDJMYXRlbnRVcHNhbXBsZVBpcGVsaW5lKHZhZSUzRHBpcGUudmFlJTJDJTIwbGF0ZW50X3Vwc2FtcGxlciUzRGxhdGVudF91cHNhbXBsZXIpJTBBdXBzYW1wbGVfcGlwZS52YWUuZW5hYmxlX3RpbGluZygpJTBBdXBzYW1wbGVfcGlwZS50byhkZXZpY2UlM0QlMjJjdWRhJTIyJTJDJTIwZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMEElMEF2aWRlbyUyMCUzRCUyMHVwc2FtcGxlX3BpcGUoJTBBJTIwJTIwJTIwJTIwdmlkZW8lM0R2aWRlbyUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNENzY4JTJDJTBBJTIwJTIwJTIwJTIwaGVpZ2h0JTNENTEyJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3R5cGUlM0QlMjJucCUyMiUyQyUwQSUyMCUyMCUyMCUyMHJldHVybl9kaWN0JTNERmFsc2UlMkMlMEEpJTVCMCU1RCUwQSUwQWVuY29kZV92aWRlbyglMEElMjAlMjAlMjAlMjB2aWRlbyU1QjAlNUQlMkMlMEElMjAlMjAlMjAlMjBmcHMlM0RmcmFtZV9yYXRlJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW8lM0RhdWRpbyU1QjAlNUQuZmxvYXQoKS5jcHUoKSUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX3NhbXBsZV9yYXRlJTNEcGlwZS52b2NvZGVyLmNvbmZpZy5vdXRwdXRfc2FtcGxpbmdfcmF0ZSUyQyUyMCUyMCUyMyUyMHNob3VsZCUyMGJlJTIwMjQwMDAlMEElMjAlMjAlMjAlMjBvdXRwdXRfcGF0aCUzRCUyMnZpZGVvLm1wNCUyMiUyQyUwQSk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2ImageToVideoPipeline, LTX2LatentUpsamplePipeline
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.export_utils <span class="hljs-keyword">import</span> encode_video
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.latent_upsampler <span class="hljs-keyword">import</span> LTX2LatentUpsamplerModel
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image

	<span class="hljs-meta">>>> </span>pipe = LTX2ImageToVideoPipeline.from_pretrained(<span class="hljs-string">"Lightricks/LTX-2"</span>, torch_dtype=torch.bfloat16)
	<span class="hljs-meta">>>> </span>pipe.enable_model_cpu_offload()

	<span class="hljs-meta">>>> </span>image = load_image(
	<span class="hljs-meta">... </span> <span class="hljs-string">"https://huggingface.co/datasets/a-r-r-o-w/tiny-meme-dataset-captioned/resolve/main/images/8.png"</span>
	<span class="hljs-meta">... </span>)
	<span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"A young girl stands calmly in the foreground, looking directly at the camera, as a house fire rages in the background."</span>
	<span class="hljs-meta">>>> </span>negative_prompt = <span class="hljs-string">"worst quality, inconsistent motion, blurry, jittery, distorted"</span>

	<span class="hljs-meta">>>> </span>frame_rate = <span class="hljs-number">24.0</span>
	<span class="hljs-meta">>>> </span>video, audio = pipe(
	<span class="hljs-meta">... </span> image=image,
	<span class="hljs-meta">... </span> prompt=prompt,
	<span class="hljs-meta">... </span> negative_prompt=negative_prompt,
	<span class="hljs-meta">... </span> width=<span class="hljs-number">768</span>,
	<span class="hljs-meta">... </span> height=<span class="hljs-number">512</span>,
	<span class="hljs-meta">... </span> num_frames=<span class="hljs-number">121</span>,
	<span class="hljs-meta">... </span> frame_rate=frame_rate,
	<span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">40</span>,
	<span class="hljs-meta">... </span> guidance_scale=<span class="hljs-number">4.0</span>,
	<span class="hljs-meta">... </span> output_type=<span class="hljs-string">"pil"</span>,
	<span class="hljs-meta">... </span> return_dict=<span class="hljs-literal">False</span>,
	<span class="hljs-meta">... </span>)

	<span class="hljs-meta">>>> </span>latent_upsampler = LTX2LatentUpsamplerModel.from_pretrained(
	<span class="hljs-meta">... </span> <span class="hljs-string">"Lightricks/LTX-2"</span>, subfolder=<span class="hljs-string">"latent_upsampler"</span>, torch_dtype=torch.bfloat16
	<span class="hljs-meta">... </span>)
	<span class="hljs-meta">>>> </span>upsample_pipe = LTX2LatentUpsamplePipeline(vae=pipe.vae, latent_upsampler=latent_upsampler)
	<span class="hljs-meta">>>> </span>upsample_pipe.vae.enable_tiling()
	<span class="hljs-meta">>>> </span>upsample_pipe.to(device=<span class="hljs-string">"cuda"</span>, dtype=torch.bfloat16)

	<span class="hljs-meta">>>> </span>video = upsample_pipe(
	<span class="hljs-meta">... </span> video=video,
	<span class="hljs-meta">... </span> width=<span class="hljs-number">768</span>,
	<span class="hljs-meta">... </span> height=<span class="hljs-number">512</span>,
	<span class="hljs-meta">... </span> output_type=<span class="hljs-string">"np"</span>,
	<span class="hljs-meta">... </span> return_dict=<span class="hljs-literal">False</span>,
	<span class="hljs-meta">... </span>)[<span class="hljs-number">0</span>]

	<span class="hljs-meta">>>> </span>encode_video(
	<span class="hljs-meta">... </span> video[<span class="hljs-number">0</span>],
	<span class="hljs-meta">... </span> fps=frame_rate,
	<span class="hljs-meta">... </span> audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(),
	<span class="hljs-meta">... </span> audio_sample_rate=pipe.vocoder.config.output_sampling_rate, <span class="hljs-comment"># should be 24000</span>
	<span class="hljs-meta">... </span> output_path=<span class="hljs-string">"video.mp4"</span>,
	<span class="hljs-meta">... </span>)`,wrap:!1}}),{c(){f=i("p"),f.textContent=I,y=s(),p(_.$$.fragment)},l(r){f=d(r,"P",{"data-svelte-h":!0}),g(f)!=="svelte-kvfsh7"&&(f.textContent=I),y=a(r),c(_.$$.fragment,r)},m(r,w){l(r,f,w),l(r,y,w),m(_,r,w),T=!0},p:Un,i(r){T\|\|(u(_.$$.fragment,r),T=!0)},o(r){h(_.$$.fragment,r),T=!1},d(r){r&&(t(f),t(y)),M(_,r)}}}function da(C){let f,I,y,_,T,r,w,us='<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>',Xt,se,hs='<a href="https://hf.co/papers/2601.03233" rel="nofollow">LTX-2</a> is a DiT-based foundation model designed to generate synchronized video and audio within a single model. It brings together the core building blocks of modern video generation, with open weights and a focus on practical, local execution.',Bt,ae,Ms='You can find all the original LTX-Video checkpoints under the <a href="https://huggingface.co/Lightricks" rel="nofollow">Lightricks</a> organization.',Vt,oe,gs='The original codebase for LTX-2 can be found <a href="https://github.com/Lightricks/LTX-2" rel="nofollow">here</a>.',Wt,le,Ct,ie,fs="Recommended pipeline to achieve production quality generation, this pipeline is composed of two stages:",kt,de,_s="<li>Stage 1: Generate a video at the target resolution using diffusion sampling with classifier-free guidance (CFG). This stage produces a coherent low-noise video sequence that respects the text/image conditioning.</li> <li>Stage 2: Upsample the Stage 1 output by 2 and refine details using a distilled LoRA model to improve fidelity and visual quality. Stage 2 may apply lighter CFG to preserve the structure from Stage 1 while enhancing texture and sharpness.</li>",xt,re,Ts="Sample usage of text-to-video two stages pipeline",Rt,pe,Nt,ce,Lt,me,ys="Fastest two-stages generation pipeline using a distilled checkpoint.",Et,ue,Ft,he,Qt,Me,ws="You can use <code>LTX2ConditionPipeline</code> to specify image and/or video conditions at arbitrary latent indices. For example, we can specify both a first-frame and last-frame condition to perform first-last-frame-to-video (FLF2V) generation:",Yt,ge,St,fe,Js="You can use both image and video conditions:",At,_e,zt,Te,bs="Because the conditioning is done via latent frames, the 8 data space frames corresponding to the specified latent frame for an image condition will tend to be static.",Ht,ye,Pt,we,Us="LTX-2.X pipelines support multimodal guidance. It is composed of three terms, all using a CFG-style update rule:",$t,Je,js='<li>Classifier-Free Guidance (CFG): standard <a href="https://huggingface.co/papers/2207.12598" rel="nofollow">CFG</a> where the perturbed (“weaker”) output is generated using the negative prompt.</li> <li>Spatio-Temporal Guidance (STG): <a href="https://huggingface.co/papers/2411.18664" rel="nofollow">STG</a> moves away from a perturbed output created from short-cutting self-attention operations and substitutes in the attention values instead. The idea is that this creates sharper videos and better spatiotemporal consistency.</li> <li>Modality Isolation Guidance: moves away from a perturbed output created from disabling cross-modality (audio-to-video and video-to-audio) cross attention. This guidance is more specific to <a href="https://huggingface.co/papers/2601.03233" rel="nofollow">LTX-2.X</a> models, with the idea that this produces better consistency between the generated audio and video.</li>',Dt,be,vs='These are controlled by the <code>guidance_scale</code>, <code>stg_scale</code>, and <code>modality_scale</code> arguments and can be set separately for video and audio. Additionally, for STG the transformer block indices where self-attention is skipped needs to be specified via the <code>spatio_temporal_guidance_blocks</code> argument. The LTX-2.X pipelines also support <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">guidance rescaling</a> to help reduce over-exposure, which can be a problem when the guidance scales are set to high values.',qt,Ue,Kt,je,Ot,ve,Zs='The LTX-2.X models are sensitive to prompting style. Refer to the <a href="https://ltx.io/model/model-blog/prompting-guide-for-ltx-2" rel="nofollow">official prompting guide</a> for recommendations on how to write a good prompt. Using prompt enhancement, where the supplied prompts are enhanced using the pipeline’s text encoder (by default a <a href="https://huggingface.co/google/gemma-3-12b-it-qat-q4_0-unquantized" rel="nofollow">Gemma 3</a> model) given a system prompt, can also improve sample quality. The optional <code>processor</code> pipeline component needs to be present to use prompt enhancement. Enable prompt enhancement by supplying a <code>system_prompt</code> argument:',en,Ze,tn,Ge,nn,G,Ie,jn,tt,Gs="Pipeline for text-to-video generation.",vn,nt,Is='Reference: <a href="https://github.com/Lightricks/LTX-Video" rel="nofollow">https://github.com/Lightricks/LTX-Video</a>',Zn,k,Xe,Gn,st,Xs="Function invoked when calling the pipeline for generation.",In,Y,Xn,S,Be,Bn,at,Bs="Encodes the prompt into text encoder hidden states.",Vn,A,Ve,Wn,ot,Vs=`Enhances the supplied <code>prompt</code> by generating a new prompt using the current text encoder (default is a
	<code>transformers.Gemma3ForConditionalGeneration</code> model) from it and a system prompt.`,sn,We,an,j,Ce,Cn,lt,Ws="Pipeline for image-to-video generation.",kn,it,Cs='Reference: <a href="https://github.com/Lightricks/LTX-Video" rel="nofollow">https://github.com/Lightricks/LTX-Video</a>',xn,dt,ks="TODO",Rn,x,ke,Nn,rt,xs="Function invoked when calling the pipeline for generation.",Ln,z,En,H,xe,Fn,pt,Rs="Encodes the prompt into text encoder hidden states.",Qn,P,Re,Yn,ct,Ns=`Enhances the supplied <code>prompt</code> by generating a new prompt using the current text encoder (default is a
	<code>transformers.Gemma3ForConditionalGeneration</code> model) from it and a system prompt.`,on,Ne,ln,J,Le,Sn,mt,Ls="Pipeline for video generation which allows image conditions to be inserted at arbitary parts of the video.",An,ut,Es='Reference: <a href="https://github.com/Lightricks/LTX-Video" rel="nofollow">https://github.com/Lightricks/LTX-Video</a>',zn,ht,Fs="TODO",Hn,R,Ee,Pn,Mt,Qs="Function invoked when calling the pipeline for generation.",$n,$,Dn,D,Fe,qn,gt,Ys="Applies visual conditioning frames to an initial latent.",Kn,q,Qe,On,ft,Ss="Encodes the prompt into text encoder hidden states.",es,K,Ye,ts,_t,As="Preprocesses the condition images/videos to torch tensors.",ns,O,Se,ss,Tt,zs="Trim a conditioning sequence to the allowed number of frames.",dn,Ae,rn,V,ze,as,N,He,os,yt,Hs="Function invoked when calling the pipeline for generation.",ls,ee,is,te,Pe,ds,wt,Ps=`Applies Adaptive Instance Normalization (AdaIN) to a latent tensor based on statistics from a reference latent
	tensor.`,rs,L,$e,ps,Jt,$s=`Applies a non-linear tone-mapping function to latent values to reduce their dynamic range in a perceptually
	smooth way using a sigmoid-based compression.`,cs,bt,Ds=`This is useful for regularizing high-variance latents or for conditioning outputs during generation, especially
	when controlling dynamic behavior with a <code>compression</code> factor.`,pn,De,cn,F,qe,ms,Ut,qs="Output class for LTX pipelines.",mn,Ke,un,It,hn;return T=new W({props:{title:"LTX-2",local:"ltx-2",headingTag:"h1"}}),le=new W({props:{title:"Two-stages Generation",local:"two-stages-generation",headingTag:"h2"}}),pe=new E({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRmxvd01hdGNoRXVsZXJEaXNjcmV0ZVNjaGVkdWxlciUwQWZyb20lMjBkaWZmdXNlcnMucGlwZWxpbmVzLmx0eDIlMjBpbXBvcnQlMjBMVFgyUGlwZWxpbmUlMkMlMjBMVFgyTGF0ZW50VXBzYW1wbGVQaXBlbGluZSUwQWZyb20lMjBkaWZmdXNlcnMucGlwZWxpbmVzLmx0eDIubGF0ZW50X3Vwc2FtcGxlciUyMGltcG9ydCUyMExUWDJMYXRlbnRVcHNhbXBsZXJNb2RlbCUwQWZyb20lMjBkaWZmdXNlcnMucGlwZWxpbmVzLmx0eDIudXRpbHMlMjBpbXBvcnQlMjBTVEFHRV8yX0RJU1RJTExFRF9TSUdNQV9WQUxVRVMlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyLmV4cG9ydF91dGlscyUyMGltcG9ydCUyMGVuY29kZV92aWRlbyUwQSUwQWRldmljZSUyMCUzRCUyMCUyMmN1ZGElM0EwJTIyJTBBd2lkdGglMjAlM0QlMjA3NjglMEFoZWlnaHQlMjAlM0QlMjA1MTIlMEElMEFwaXBlJTIwJTNEJTIwTFRYMlBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJMaWdodHJpY2tzJTJGTFRYLTIlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQXBpcGUuZW5hYmxlX3NlcXVlbnRpYWxfY3B1X29mZmxvYWQoZGV2aWNlJTNEZGV2aWNlKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkElMjBiZWF1dGlmdWwlMjBzdW5zZXQlMjBvdmVyJTIwdGhlJTIwb2NlYW4lMjIlMEFuZWdhdGl2ZV9wcm9tcHQlMjAlM0QlMjAlMjJzaGFreSUyQyUyMGdsaXRjaHklMkMlMjBsb3clMjBxdWFsaXR5JTJDJTIwd29yc3QlMjBxdWFsaXR5JTJDJTIwZGVmb3JtZWQlMkMlMjBkaXN0b3J0ZWQlMkMlMjBkaXNmaWd1cmVkJTJDJTIwbW90aW9uJTIwc21lYXIlMkMlMjBtb3Rpb24lMjBhcnRpZmFjdHMlMkMlMjBmdXNlZCUyMGZpbmdlcnMlMkMlMjBiYWQlMjBhbmF0b215JTJDJTIwd2VpcmQlMjBoYW5kJTJDJTIwdWdseSUyQyUyMHRyYW5zaXRpb24lMkMlMjBzdGF0aWMuJTIyJTBBJTBBJTIzJTIwU3RhZ2UlMjAxJTIwZGVmYXVsdCUyMChub24tZGlzdGlsbGVkKSUyMGluZmVyZW5jZSUwQWZyYW1lX3JhdGUlMjAlM0QlMjAyNC4wJTBBdmlkZW9fbGF0ZW50JTJDJTIwYXVkaW9fbGF0ZW50JTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBuZWdhdGl2ZV9wcm9tcHQlM0RuZWdhdGl2ZV9wcm9tcHQlMkMlMEElMjAlMjAlMjAlMjB3aWR0aCUzRHdpZHRoJTJDJTBBJTIwJTIwJTIwJTIwaGVpZ2h0JTNEaGVpZ2h0JTJDJTBBJTIwJTIwJTIwJTIwbnVtX2ZyYW1lcyUzRDEyMSUyQyUwQSUyMCUyMCUyMCUyMGZyYW1lX3JhdGUlM0RmcmFtZV9yYXRlJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDQwJTJDJTBBJTIwJTIwJTIwJTIwc2lnbWFzJTNETm9uZSUyQyUwQSUyMCUyMCUyMCUyMGd1aWRhbmNlX3NjYWxlJTNENC4wJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3R5cGUlM0QlMjJsYXRlbnQlMjIlMkMlMEElMjAlMjAlMjAlMjByZXR1cm5fZGljdCUzREZhbHNlJTJDJTBBKSUwQSUwQWxhdGVudF91cHNhbXBsZXIlMjAlM0QlMjBMVFgyTGF0ZW50VXBzYW1wbGVyTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMkxpZ2h0cmlja3MlMkZMVFgtMiUyMiUyQyUwQSUyMCUyMCUyMCUyMHN1YmZvbGRlciUzRCUyMmxhdGVudF91cHNhbXBsZXIlMjIlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTJDJTBBKSUwQXVwc2FtcGxlX3BpcGUlMjAlM0QlMjBMVFgyTGF0ZW50VXBzYW1wbGVQaXBlbGluZSh2YWUlM0RwaXBlLnZhZSUyQyUyMGxhdGVudF91cHNhbXBsZXIlM0RsYXRlbnRfdXBzYW1wbGVyKSUwQXVwc2FtcGxlX3BpcGUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKGRldmljZSUzRGRldmljZSklMEF1cHNjYWxlZF92aWRlb19sYXRlbnQlMjAlM0QlMjB1cHNhbXBsZV9waXBlKCUwQSUyMCUyMCUyMCUyMGxhdGVudHMlM0R2aWRlb19sYXRlbnQlMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfdHlwZSUzRCUyMmxhdGVudCUyMiUyQyUwQSUyMCUyMCUyMCUyMHJldHVybl9kaWN0JTNERmFsc2UlMkMlMEEpJTVCMCU1RCUwQSUwQSUyMyUyMExvYWQlMjBTdGFnZSUyMDIlMjBkaXN0aWxsZWQlMjBMb1JBJTBBcGlwZS5sb2FkX2xvcmFfd2VpZ2h0cyglMEElMjAlMjAlMjAlMjAlMjJMaWdodHJpY2tzJTJGTFRYLTIlMjIlMkMlMjBhZGFwdGVyX25hbWUlM0QlMjJzdGFnZV8yX2Rpc3RpbGxlZCUyMiUyQyUyMHdlaWdodF9uYW1lJTNEJTIybHR4LTItMTliLWRpc3RpbGxlZC1sb3JhLTM4NC5zYWZldGVuc29ycyUyMiUwQSklMEFwaXBlLnNldF9hZGFwdGVycyglMjJzdGFnZV8yX2Rpc3RpbGxlZCUyMiUyQyUyMDEuMCklMEElMjMlMjBWQUUlMjB0aWxpbmclMjBpcyUyMHVzdWFsbHklMjBuZWNlc3NhcnklMjB0byUyMGF2b2lkJTIwT09NJTIwZXJyb3IlMjB3aGVuJTIwVkFFJTIwZGVjb2RpbmclMEFwaXBlLnZhZS5lbmFibGVfdGlsaW5nKCklMEElMjMlMjBDaGFuZ2UlMjBzY2hlZHVsZXIlMjB0byUyMHVzZSUyMFN0YWdlJTIwMiUyMGRpc3RpbGxlZCUyMHNpZ21hcyUyMGFzJTIwaXMlMEFuZXdfc2NoZWR1bGVyJTIwJTNEJTIwRmxvd01hdGNoRXVsZXJEaXNjcmV0ZVNjaGVkdWxlci5mcm9tX2NvbmZpZyglMEElMjAlMjAlMjAlMjBwaXBlLnNjaGVkdWxlci5jb25maWclMkMlMjB1c2VfZHluYW1pY19zaGlmdGluZyUzREZhbHNlJTJDJTIwc2hpZnRfdGVybWluYWwlM0ROb25lJTBBKSUwQXBpcGUuc2NoZWR1bGVyJTIwJTNEJTIwbmV3X3NjaGVkdWxlciUwQSUyMyUyMFN0YWdlJTIwMiUyMGluZmVyZW5jZSUyMHdpdGglMjBkaXN0aWxsZWQlMjBMb1JBJTIwYW5kJTIwc2lnbWFzJTBBdmlkZW8lMkMlMjBhdWRpbyUyMCUzRCUyMHBpcGUoJTBBJTIwJTIwJTIwJTIwbGF0ZW50cyUzRHVwc2NhbGVkX3ZpZGVvX2xhdGVudCUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX2xhdGVudHMlM0RhdWRpb19sYXRlbnQlMkMlMEElMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBuZWdhdGl2ZV9wcm9tcHQlM0RuZWdhdGl2ZV9wcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNEMyUyQyUwQSUyMCUyMCUyMCUyMG5vaXNlX3NjYWxlJTNEU1RBR0VfMl9ESVNUSUxMRURfU0lHTUFfVkFMVUVTJTVCMCU1RCUyQyUyMCUyMyUyMHJlbm9pc2UlMjB3aXRoJTIwZmlyc3QlMjBzaWdtYSUyMHZhbHVlJTIwaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGTGlnaHRyaWNrcyUyRkxUWC0yJTJGYmxvYiUyRm1haW4lMkZwYWNrYWdlcyUyRmx0eC1waXBlbGluZXMlMkZzcmMlMkZsdHhfcGlwZWxpbmVzJTJGdGkydmlkX3R3b19zdGFnZXMucHklMjNMMjE4JTBBJTIwJTIwJTIwJTIwc2lnbWFzJTNEU1RBR0VfMl9ESVNUSUxMRURfU0lHTUFfVkFMVUVTJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0QxLjAlMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfdHlwZSUzRCUyMm5wJTIyJTJDJTBBJTIwJTIwJTIwJTIwcmV0dXJuX2RpY3QlM0RGYWxzZSUyQyUwQSklMEElMEFlbmNvZGVfdmlkZW8oJTBBJTIwJTIwJTIwJTIwdmlkZW8lNUIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwZnBzJTNEZnJhbWVfcmF0ZSUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvJTNEYXVkaW8lNUIwJTVELmZsb2F0KCkuY3B1KCklMkMlMEElMjAlMjAlMjAlMjBhdWRpb19zYW1wbGVfcmF0ZSUzRHBpcGUudm9jb2Rlci5jb25maWcub3V0cHV0X3NhbXBsaW5nX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfcGF0aCUzRCUyMmx0eDJfbG9yYV9kaXN0aWxsZWRfc2FtcGxlLm1wNCUyMiUyQyUwQSk=",highlighted:`<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FlowMatchEulerDiscreteScheduler
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2 <span class="hljs-keyword">import</span> LTX2Pipeline, LTX2LatentUpsamplePipeline
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.latent_upsampler <span class="hljs-keyword">import</span> LTX2LatentUpsamplerModel
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.utils <span class="hljs-keyword">import</span> STAGE_2_DISTILLED_SIGMA_VALUES
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.export_utils <span class="hljs-keyword">import</span> encode_video

	device = <span class="hljs-string">"cuda:0"</span>
	width = <span class="hljs-number">768</span>
	height = <span class="hljs-number">512</span>

	pipe = LTX2Pipeline.from_pretrained(
	<span class="hljs-string">"Lightricks/LTX-2"</span>, torch_dtype=torch.bfloat16
	)
	pipe.enable_sequential_cpu_offload(device=device)

	prompt = <span class="hljs-string">"A beautiful sunset over the ocean"</span>
	negative_prompt = <span class="hljs-string">"shaky, glitchy, low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly, transition, static."</span>

	<span class="hljs-comment"># Stage 1 default (non-distilled) inference</span>
	frame_rate = <span class="hljs-number">24.0</span>
	video_latent, audio_latent = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	width=width,
	height=height,
	num_frames=<span class="hljs-number">121</span>,
	frame_rate=frame_rate,
	num_inference_steps=<span class="hljs-number">40</span>,
	sigmas=<span class="hljs-literal">None</span>,
	guidance_scale=<span class="hljs-number">4.0</span>,
	output_type=<span class="hljs-string">"latent"</span>,
	return_dict=<span class="hljs-literal">False</span>,
	)

	latent_upsampler = LTX2LatentUpsamplerModel.from_pretrained(
	<span class="hljs-string">"Lightricks/LTX-2"</span>,
	subfolder=<span class="hljs-string">"latent_upsampler"</span>,
	torch_dtype=torch.bfloat16,
	)
	upsample_pipe = LTX2LatentUpsamplePipeline(vae=pipe.vae, latent_upsampler=latent_upsampler)
	upsample_pipe.enable_model_cpu_offload(device=device)
	upscaled_video_latent = upsample_pipe(
	latents=video_latent,
	output_type=<span class="hljs-string">"latent"</span>,
	return_dict=<span class="hljs-literal">False</span>,
	)[<span class="hljs-number">0</span>]

	<span class="hljs-comment"># Load Stage 2 distilled LoRA</span>
	pipe.load_lora_weights(
	<span class="hljs-string">"Lightricks/LTX-2"</span>, adapter_name=<span class="hljs-string">"stage_2_distilled"</span>, weight_name=<span class="hljs-string">"ltx-2-19b-distilled-lora-384.safetensors"</span>
	)
	pipe.set_adapters(<span class="hljs-string">"stage_2_distilled"</span>, <span class="hljs-number">1.0</span>)
	<span class="hljs-comment"># VAE tiling is usually necessary to avoid OOM error when VAE decoding</span>
	pipe.vae.enable_tiling()
	<span class="hljs-comment"># Change scheduler to use Stage 2 distilled sigmas as is</span>
	new_scheduler = FlowMatchEulerDiscreteScheduler.from_config(
	pipe.scheduler.config, use_dynamic_shifting=<span class="hljs-literal">False</span>, shift_terminal=<span class="hljs-literal">None</span>
	)
	pipe.scheduler = new_scheduler
	<span class="hljs-comment"># Stage 2 inference with distilled LoRA and sigmas</span>
	video, audio = pipe(
	latents=upscaled_video_latent,
	audio_latents=audio_latent,
	prompt=prompt,
	negative_prompt=negative_prompt,
	num_inference_steps=<span class="hljs-number">3</span>,
	noise_scale=STAGE_2_DISTILLED_SIGMA_VALUES[<span class="hljs-number">0</span>], <span class="hljs-comment"># renoise with first sigma value https://github.com/Lightricks/LTX-2/blob/main/packages/ltx-pipelines/src/ltx_pipelines/ti2vid_two_stages.py#L218</span>
	sigmas=STAGE_2_DISTILLED_SIGMA_VALUES,
	guidance_scale=<span class="hljs-number">1.0</span>,
	output_type=<span class="hljs-string">"np"</span>,
	return_dict=<span class="hljs-literal">False</span>,
	)

	encode_video(
	video[<span class="hljs-number">0</span>],
	fps=frame_rate,
	audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(),
	audio_sample_rate=pipe.vocoder.config.output_sampling_rate,
	output_path=<span class="hljs-string">"ltx2_lora_distilled_sample.mp4"</span>,
	)`,wrap:!1}}),ce=new W({props:{title:"Distilled checkpoint generation",local:"distilled-checkpoint-generation",headingTag:"h2"}}),ue=new E({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyJTIwaW1wb3J0JTIwTFRYMlBpcGVsaW5lJTJDJTIwTFRYMkxhdGVudFVwc2FtcGxlUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyLmxhdGVudF91cHNhbXBsZXIlMjBpbXBvcnQlMjBMVFgyTGF0ZW50VXBzYW1wbGVyTW9kZWwlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyLnV0aWxzJTIwaW1wb3J0JTIwRElTVElMTEVEX1NJR01BX1ZBTFVFUyUyQyUyMFNUQUdFXzJfRElTVElMTEVEX1NJR01BX1ZBTFVFUyUwQWZyb20lMjBkaWZmdXNlcnMucGlwZWxpbmVzLmx0eDIuZXhwb3J0X3V0aWxzJTIwaW1wb3J0JTIwZW5jb2RlX3ZpZGVvJTBBJTBBZGV2aWNlJTIwJTNEJTIwJTIyY3VkYSUyMiUwQXdpZHRoJTIwJTNEJTIwNzY4JTBBaGVpZ2h0JTIwJTNEJTIwNTEyJTBBcmFuZG9tX3NlZWQlMjAlM0QlMjA0MiUwQWdlbmVyYXRvciUyMCUzRCUyMHRvcmNoLkdlbmVyYXRvcihkZXZpY2UpLm1hbnVhbF9zZWVkKHJhbmRvbV9zZWVkKSUwQW1vZGVsX3BhdGglMjAlM0QlMjAlMjJyb290b25jaGFpciUyRkxUWC0yLTE5Yi1kaXN0aWxsZWQlMjIlMEElMEFwaXBlJTIwJTNEJTIwTFRYMlBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9wYXRoJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUwQSklMEFwaXBlLmVuYWJsZV9zZXF1ZW50aWFsX2NwdV9vZmZsb2FkKGRldmljZSUzRGRldmljZSklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJBJTIwYmVhdXRpZnVsJTIwc3Vuc2V0JTIwb3ZlciUyMHRoZSUyMG9jZWFuJTIyJTBBbmVnYXRpdmVfcHJvbXB0JTIwJTNEJTIwJTIyc2hha3klMkMlMjBnbGl0Y2h5JTJDJTIwbG93JTIwcXVhbGl0eSUyQyUyMHdvcnN0JTIwcXVhbGl0eSUyQyUyMGRlZm9ybWVkJTJDJTIwZGlzdG9ydGVkJTJDJTIwZGlzZmlndXJlZCUyQyUyMG1vdGlvbiUyMHNtZWFyJTJDJTIwbW90aW9uJTIwYXJ0aWZhY3RzJTJDJTIwZnVzZWQlMjBmaW5nZXJzJTJDJTIwYmFkJTIwYW5hdG9teSUyQyUyMHdlaXJkJTIwaGFuZCUyQyUyMHVnbHklMkMlMjB0cmFuc2l0aW9uJTJDJTIwc3RhdGljLiUyMiUwQSUwQWZyYW1lX3JhdGUlMjAlM0QlMjAyNC4wJTBBdmlkZW9fbGF0ZW50JTJDJTIwYXVkaW9fbGF0ZW50JTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBuZWdhdGl2ZV9wcm9tcHQlM0RuZWdhdGl2ZV9wcm9tcHQlMkMlMEElMjAlMjAlMjAlMjB3aWR0aCUzRHdpZHRoJTJDJTBBJTIwJTIwJTIwJTIwaGVpZ2h0JTNEaGVpZ2h0JTJDJTBBJTIwJTIwJTIwJTIwbnVtX2ZyYW1lcyUzRDEyMSUyQyUwQSUyMCUyMCUyMCUyMGZyYW1lX3JhdGUlM0RmcmFtZV9yYXRlJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDglMkMlMEElMjAlMjAlMjAlMjBzaWdtYXMlM0RESVNUSUxMRURfU0lHTUFfVkFMVUVTJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0QxLjAlMkMlMEElMjAlMjAlMjAlMjBnZW5lcmF0b3IlM0RnZW5lcmF0b3IlMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfdHlwZSUzRCUyMmxhdGVudCUyMiUyQyUwQSUyMCUyMCUyMCUyMHJldHVybl9kaWN0JTNERmFsc2UlMkMlMEEpJTBBJTBBbGF0ZW50X3Vwc2FtcGxlciUyMCUzRCUyMExUWDJMYXRlbnRVcHNhbXBsZXJNb2RlbC5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwbW9kZWxfcGF0aCUyQyUwQSUyMCUyMCUyMCUyMHN1YmZvbGRlciUzRCUyMmxhdGVudF91cHNhbXBsZXIlMjIlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTJDJTBBKSUwQXVwc2FtcGxlX3BpcGUlMjAlM0QlMjBMVFgyTGF0ZW50VXBzYW1wbGVQaXBlbGluZSh2YWUlM0RwaXBlLnZhZSUyQyUyMGxhdGVudF91cHNhbXBsZXIlM0RsYXRlbnRfdXBzYW1wbGVyKSUwQXVwc2FtcGxlX3BpcGUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKGRldmljZSUzRGRldmljZSklMEF1cHNjYWxlZF92aWRlb19sYXRlbnQlMjAlM0QlMjB1cHNhbXBsZV9waXBlKCUwQSUyMCUyMCUyMCUyMGxhdGVudHMlM0R2aWRlb19sYXRlbnQlMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfdHlwZSUzRCUyMmxhdGVudCUyMiUyQyUwQSUyMCUyMCUyMCUyMHJldHVybl9kaWN0JTNERmFsc2UlMkMlMEEpJTVCMCU1RCUwQSUwQXZpZGVvJTJDJTIwYXVkaW8lMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMGxhdGVudHMlM0R1cHNjYWxlZF92aWRlb19sYXRlbnQlMkMlMEElMjAlMjAlMjAlMjBhdWRpb19sYXRlbnRzJTNEYXVkaW9fbGF0ZW50JTJDJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbmVnYXRpdmVfcHJvbXB0JTNEbmVnYXRpdmVfcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDMlMkMlMEElMjAlMjAlMjAlMjBub2lzZV9zY2FsZSUzRFNUQUdFXzJfRElTVElMTEVEX1NJR01BX1ZBTFVFUyU1QjAlNUQlMkMlMjAlMjMlMjByZW5vaXNlJTIwd2l0aCUyMGZpcnN0JTIwc2lnbWElMjB2YWx1ZSUyMGh0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRkxpZ2h0cmlja3MlMkZMVFgtMiUyRmJsb2IlMkZtYWluJTJGcGFja2FnZXMlMkZsdHgtcGlwZWxpbmVzJTJGc3JjJTJGbHR4X3BpcGVsaW5lcyUyRmRpc3RpbGxlZC5weSUyM0wxNzglMEElMjAlMjAlMjAlMjBzaWdtYXMlM0RTVEFHRV8yX0RJU1RJTExFRF9TSUdNQV9WQUxVRVMlMkMlMEElMjAlMjAlMjAlMjBnZW5lcmF0b3IlM0RnZW5lcmF0b3IlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDEuMCUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF90eXBlJTNEJTIybnAlMjIlMkMlMEElMjAlMjAlMjAlMjByZXR1cm5fZGljdCUzREZhbHNlJTJDJTBBKSUwQSUwQWVuY29kZV92aWRlbyglMEElMjAlMjAlMjAlMjB2aWRlbyU1QjAlNUQlMkMlMEElMjAlMjAlMjAlMjBmcHMlM0RmcmFtZV9yYXRlJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW8lM0RhdWRpbyU1QjAlNUQuZmxvYXQoKS5jcHUoKSUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX3NhbXBsZV9yYXRlJTNEcGlwZS52b2NvZGVyLmNvbmZpZy5vdXRwdXRfc2FtcGxpbmdfcmF0ZSUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF9wYXRoJTNEJTIybHR4Ml9kaXN0aWxsZWRfc2FtcGxlLm1wNCUyMiUyQyUwQSk=",highlighted:`<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2 <span class="hljs-keyword">import</span> LTX2Pipeline, LTX2LatentUpsamplePipeline
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.latent_upsampler <span class="hljs-keyword">import</span> LTX2LatentUpsamplerModel
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.utils <span class="hljs-keyword">import</span> DISTILLED_SIGMA_VALUES, STAGE_2_DISTILLED_SIGMA_VALUES
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.export_utils <span class="hljs-keyword">import</span> encode_video

	device = <span class="hljs-string">"cuda"</span>
	width = <span class="hljs-number">768</span>
	height = <span class="hljs-number">512</span>
	random_seed = <span class="hljs-number">42</span>
	generator = torch.Generator(device).manual_seed(random_seed)
	model_path = <span class="hljs-string">"rootonchair/LTX-2-19b-distilled"</span>

	pipe = LTX2Pipeline.from_pretrained(
	model_path, torch_dtype=torch.bfloat16
	)
	pipe.enable_sequential_cpu_offload(device=device)

	prompt = <span class="hljs-string">"A beautiful sunset over the ocean"</span>
	negative_prompt = <span class="hljs-string">"shaky, glitchy, low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly, transition, static."</span>

	frame_rate = <span class="hljs-number">24.0</span>
	video_latent, audio_latent = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	width=width,
	height=height,
	num_frames=<span class="hljs-number">121</span>,
	frame_rate=frame_rate,
	num_inference_steps=<span class="hljs-number">8</span>,
	sigmas=DISTILLED_SIGMA_VALUES,
	guidance_scale=<span class="hljs-number">1.0</span>,
	generator=generator,
	output_type=<span class="hljs-string">"latent"</span>,
	return_dict=<span class="hljs-literal">False</span>,
	)

	latent_upsampler = LTX2LatentUpsamplerModel.from_pretrained(
	model_path,
	subfolder=<span class="hljs-string">"latent_upsampler"</span>,
	torch_dtype=torch.bfloat16,
	)
	upsample_pipe = LTX2LatentUpsamplePipeline(vae=pipe.vae, latent_upsampler=latent_upsampler)
	upsample_pipe.enable_model_cpu_offload(device=device)
	upscaled_video_latent = upsample_pipe(
	latents=video_latent,
	output_type=<span class="hljs-string">"latent"</span>,
	return_dict=<span class="hljs-literal">False</span>,
	)[<span class="hljs-number">0</span>]

	video, audio = pipe(
	latents=upscaled_video_latent,
	audio_latents=audio_latent,
	prompt=prompt,
	negative_prompt=negative_prompt,
	num_inference_steps=<span class="hljs-number">3</span>,
	noise_scale=STAGE_2_DISTILLED_SIGMA_VALUES[<span class="hljs-number">0</span>], <span class="hljs-comment"># renoise with first sigma value https://github.com/Lightricks/LTX-2/blob/main/packages/ltx-pipelines/src/ltx_pipelines/distilled.py#L178</span>
	sigmas=STAGE_2_DISTILLED_SIGMA_VALUES,
	generator=generator,
	guidance_scale=<span class="hljs-number">1.0</span>,
	output_type=<span class="hljs-string">"np"</span>,
	return_dict=<span class="hljs-literal">False</span>,
	)

	encode_video(
	video[<span class="hljs-number">0</span>],
	fps=frame_rate,
	audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(),
	audio_sample_rate=pipe.vocoder.config.output_sampling_rate,
	output_path=<span class="hljs-string">"ltx2_distilled_sample.mp4"</span>,
	)`,wrap:!1}}),he=new W({props:{title:"Condition Pipeline Generation",local:"condition-pipeline-generation",headingTag:"h2"}}),ge=new E({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYMkNvbmRpdGlvblBpcGVsaW5lJTJDJTIwTFRYMkxhdGVudFVwc2FtcGxlUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyLmxhdGVudF91cHNhbXBsZXIlMjBpbXBvcnQlMjBMVFgyTGF0ZW50VXBzYW1wbGVyTW9kZWwlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyLnBpcGVsaW5lX2x0eDJfY29uZGl0aW9uJTIwaW1wb3J0JTIwTFRYMlZpZGVvQ29uZGl0aW9uJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMubHR4Mi51dGlscyUyMGltcG9ydCUyMERJU1RJTExFRF9TSUdNQV9WQUxVRVMlMkMlMjBTVEFHRV8yX0RJU1RJTExFRF9TSUdNQV9WQUxVRVMlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyLmV4cG9ydF91dGlscyUyMGltcG9ydCUyMGVuY29kZV92aWRlbyUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBsb2FkX2ltYWdlJTBBJTBBZGV2aWNlJTIwJTNEJTIwJTIyY3VkYSUyMiUwQXdpZHRoJTIwJTNEJTIwNzY4JTBBaGVpZ2h0JTIwJTNEJTIwNTEyJTBBcmFuZG9tX3NlZWQlMjAlM0QlMjA0MiUwQWdlbmVyYXRvciUyMCUzRCUyMHRvcmNoLkdlbmVyYXRvcihkZXZpY2UpLm1hbnVhbF9zZWVkKHJhbmRvbV9zZWVkKSUwQW1vZGVsX3BhdGglMjAlM0QlMjAlMjJyb290b25jaGFpciUyRkxUWC0yLTE5Yi1kaXN0aWxsZWQlMjIlMEElMEFwaXBlJTIwJTNEJTIwTFRYMkNvbmRpdGlvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZChtb2RlbF9wYXRoJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMEFwaXBlLmVuYWJsZV9zZXF1ZW50aWFsX2NwdV9vZmZsb2FkKGRldmljZSUzRGRldmljZSklMEFwaXBlLnZhZS5lbmFibGVfdGlsaW5nKCklMEElMEFwcm9tcHQlMjAlM0QlMjAoJTBBJTIwJTIwJTIwJTIwJTIyQ0clMjBhbmltYXRpb24lMjBzdHlsZSUyQyUyMGElMjBzbWFsbCUyMGJsdWUlMjBiaXJkJTIwdGFrZXMlMjBvZmYlMjBmcm9tJTIwdGhlJTIwZ3JvdW5kJTJDJTIwZmxhcHBpbmclMjBpdHMlMjB3aW5ncy4lMjBUaGUlMjBiaXJkJ3MlMjBmZWF0aGVycyUyMGFyZSUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMmRlbGljYXRlJTJDJTIwd2l0aCUyMGElMjB1bmlxdWUlMjBwYXR0ZXJuJTIwb24lMjBpdHMlMjBjaGVzdC4lMjBUaGUlMjBiYWNrZ3JvdW5kJTIwc2hvd3MlMjBhJTIwYmx1ZSUyMHNreSUyMHdpdGglMjB3aGl0ZSUyMGNsb3VkcyUyMHVuZGVyJTIwYnJpZ2h0JTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIyc3Vuc2hpbmUuJTIwVGhlJTIwY2FtZXJhJTIwZm9sbG93cyUyMHRoZSUyMGJpcmQlMjB1cHdhcmQlMkMlMjBjYXB0dXJpbmclMjBpdHMlMjBmbGlnaHQlMjBhbmQlMjB0aGUlMjB2YXN0bmVzcyUyMG9mJTIwdGhlJTIwc2t5JTIwZnJvbSUyMGElMjBjbG9zZS11cCUyQyUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMmxvdy1hbmdsZSUyMHBlcnNwZWN0aXZlLiUyMiUwQSklMEElMEFmaXJzdF9pbWFnZSUyMCUzRCUyMGxvYWRfaW1hZ2UoJTBBJTIwJTIwJTIwJTIwJTIyaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmRhdGFzZXRzJTJGaHVnZ2luZ2ZhY2UlMkZkb2N1bWVudGF0aW9uLWltYWdlcyUyRnJlc29sdmUlMkZtYWluJTJGZGlmZnVzZXJzJTJGZmxmMnZfaW5wdXRfZmlyc3RfZnJhbWUucG5nJTIyJTJDJTBBKSUwQWxhc3RfaW1hZ2UlMjAlM0QlMjBsb2FkX2ltYWdlKCUwQSUyMCUyMCUyMCUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZkYXRhc2V0cyUyRmh1Z2dpbmdmYWNlJTJGZG9jdW1lbnRhdGlvbi1pbWFnZXMlMkZyZXNvbHZlJTJGbWFpbiUyRmRpZmZ1c2VycyUyRmZsZjJ2X2lucHV0X2xhc3RfZnJhbWUucG5nJTIyJTJDJTBBKSUwQWZpcnN0X2NvbmQlMjAlM0QlMjBMVFgyVmlkZW9Db25kaXRpb24oZnJhbWVzJTNEZmlyc3RfaW1hZ2UlMkMlMjBpbmRleCUzRDAlMkMlMjBzdHJlbmd0aCUzRDEuMCklMEFsYXN0X2NvbmQlMjAlM0QlMjBMVFgyVmlkZW9Db25kaXRpb24oZnJhbWVzJTNEbGFzdF9pbWFnZSUyQyUyMGluZGV4JTNELTElMkMlMjBzdHJlbmd0aCUzRDEuMCklMEFjb25kaXRpb25zJTIwJTNEJTIwJTVCZmlyc3RfY29uZCUyQyUyMGxhc3RfY29uZCU1RCUwQSUwQWZyYW1lX3JhdGUlMjAlM0QlMjAyNC4wJTBBdmlkZW9fbGF0ZW50JTJDJTIwYXVkaW9fbGF0ZW50JTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBjb25kaXRpb25zJTNEY29uZGl0aW9ucyUyQyUwQSUyMCUyMCUyMCUyMHByb21wdCUzRHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNEd2lkdGglMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0RoZWlnaHQlMkMlMEElMjAlMjAlMjAlMjBudW1fZnJhbWVzJTNEMTIxJTJDJTBBJTIwJTIwJTIwJTIwZnJhbWVfcmF0ZSUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNEOCUyQyUwQSUyMCUyMCUyMCUyMHNpZ21hcyUzRERJU1RJTExFRF9TSUdNQV9WQUxVRVMlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDEuMCUyQyUwQSUyMCUyMCUyMCUyMGdlbmVyYXRvciUzRGdlbmVyYXRvciUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF90eXBlJTNEJTIybGF0ZW50JTIyJTJDJTBBJTIwJTIwJTIwJTIwcmV0dXJuX2RpY3QlM0RGYWxzZSUyQyUwQSklMEElMEFsYXRlbnRfdXBzYW1wbGVyJTIwJTNEJTIwTFRYMkxhdGVudFVwc2FtcGxlck1vZGVsLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9wYXRoJTJDJTBBJTIwJTIwJTIwJTIwc3ViZm9sZGVyJTNEJTIybGF0ZW50X3Vwc2FtcGxlciUyMiUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMkMlMEEpJTBBdXBzYW1wbGVfcGlwZSUyMCUzRCUyMExUWDJMYXRlbnRVcHNhbXBsZVBpcGVsaW5lKHZhZSUzRHBpcGUudmFlJTJDJTIwbGF0ZW50X3Vwc2FtcGxlciUzRGxhdGVudF91cHNhbXBsZXIpJTBBdXBzYW1wbGVfcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoZGV2aWNlJTNEZGV2aWNlKSUwQXVwc2NhbGVkX3ZpZGVvX2xhdGVudCUyMCUzRCUyMHVwc2FtcGxlX3BpcGUoJTBBJTIwJTIwJTIwJTIwbGF0ZW50cyUzRHZpZGVvX2xhdGVudCUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF90eXBlJTNEJTIybGF0ZW50JTIyJTJDJTBBJTIwJTIwJTIwJTIwcmV0dXJuX2RpY3QlM0RGYWxzZSUyQyUwQSklNUIwJTVEJTBBJTBBdmlkZW8lMkMlMjBhdWRpbyUyMCUzRCUyMHBpcGUoJTBBJTIwJTIwJTIwJTIwbGF0ZW50cyUzRHVwc2NhbGVkX3ZpZGVvX2xhdGVudCUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX2xhdGVudHMlM0RhdWRpb19sYXRlbnQlMkMlMEElMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMEElMjAlMjAlMjAlMjB3aWR0aCUzRHdpZHRoJTIwKiUyMDIlMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0RoZWlnaHQlMjAqJTIwMiUyQyUwQSUyMCUyMCUyMCUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0QzJTJDJTBBJTIwJTIwJTIwJTIwc2lnbWFzJTNEU1RBR0VfMl9ESVNUSUxMRURfU0lHTUFfVkFMVUVTJTJDJTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yJTNEZ2VuZXJhdG9yJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0QxLjAlMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfdHlwZSUzRCUyMm5wJTIyJTJDJTBBJTIwJTIwJTIwJTIwcmV0dXJuX2RpY3QlM0RGYWxzZSUyQyUwQSklMEElMEFlbmNvZGVfdmlkZW8oJTBBJTIwJTIwJTIwJTIwdmlkZW8lNUIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwZnBzJTNEZnJhbWVfcmF0ZSUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvJTNEYXVkaW8lNUIwJTVELmZsb2F0KCkuY3B1KCklMkMlMEElMjAlMjAlMjAlMjBhdWRpb19zYW1wbGVfcmF0ZSUzRHBpcGUudm9jb2Rlci5jb25maWcub3V0cHV0X3NhbXBsaW5nX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfcGF0aCUzRCUyMmx0eDJfZGlzdGlsbGVkX2ZsZjJ2Lm1wNCUyMiUyQyUwQSk=",highlighted:`<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2ConditionPipeline, LTX2LatentUpsamplePipeline
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.latent_upsampler <span class="hljs-keyword">import</span> LTX2LatentUpsamplerModel
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.pipeline_ltx2_condition <span class="hljs-keyword">import</span> LTX2VideoCondition
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.utils <span class="hljs-keyword">import</span> DISTILLED_SIGMA_VALUES, STAGE_2_DISTILLED_SIGMA_VALUES
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.export_utils <span class="hljs-keyword">import</span> encode_video
	<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image

	device = <span class="hljs-string">"cuda"</span>
	width = <span class="hljs-number">768</span>
	height = <span class="hljs-number">512</span>
	random_seed = <span class="hljs-number">42</span>
	generator = torch.Generator(device).manual_seed(random_seed)
	model_path = <span class="hljs-string">"rootonchair/LTX-2-19b-distilled"</span>

	pipe = LTX2ConditionPipeline.from_pretrained(model_path, torch_dtype=torch.bfloat16)
	pipe.enable_sequential_cpu_offload(device=device)
	pipe.vae.enable_tiling()

	prompt = (
	<span class="hljs-string">"CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird's feathers are "</span>
	<span class="hljs-string">"delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright "</span>
	<span class="hljs-string">"sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, "</span>
	<span class="hljs-string">"low-angle perspective."</span>
	)

	first_image = load_image(
	<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_first_frame.png"</span>,
	)
	last_image = load_image(
	<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_last_frame.png"</span>,
	)
	first_cond = LTX2VideoCondition(frames=first_image, index=<span class="hljs-number">0</span>, strength=<span class="hljs-number">1.0</span>)
	last_cond = LTX2VideoCondition(frames=last_image, index=-<span class="hljs-number">1</span>, strength=<span class="hljs-number">1.0</span>)
	conditions = [first_cond, last_cond]

	frame_rate = <span class="hljs-number">24.0</span>
	video_latent, audio_latent = pipe(
	conditions=conditions,
	prompt=prompt,
	width=width,
	height=height,
	num_frames=<span class="hljs-number">121</span>,
	frame_rate=frame_rate,
	num_inference_steps=<span class="hljs-number">8</span>,
	sigmas=DISTILLED_SIGMA_VALUES,
	guidance_scale=<span class="hljs-number">1.0</span>,
	generator=generator,
	output_type=<span class="hljs-string">"latent"</span>,
	return_dict=<span class="hljs-literal">False</span>,
	)

	latent_upsampler = LTX2LatentUpsamplerModel.from_pretrained(
	model_path,
	subfolder=<span class="hljs-string">"latent_upsampler"</span>,
	torch_dtype=torch.bfloat16,
	)
	upsample_pipe = LTX2LatentUpsamplePipeline(vae=pipe.vae, latent_upsampler=latent_upsampler)
	upsample_pipe.enable_model_cpu_offload(device=device)
	upscaled_video_latent = upsample_pipe(
	latents=video_latent,
	output_type=<span class="hljs-string">"latent"</span>,
	return_dict=<span class="hljs-literal">False</span>,
	)[<span class="hljs-number">0</span>]

	video, audio = pipe(
	latents=upscaled_video_latent,
	audio_latents=audio_latent,
	prompt=prompt,
	width=width * <span class="hljs-number">2</span>,
	height=height * <span class="hljs-number">2</span>,
	num_inference_steps=<span class="hljs-number">3</span>,
	sigmas=STAGE_2_DISTILLED_SIGMA_VALUES,
	generator=generator,
	guidance_scale=<span class="hljs-number">1.0</span>,
	output_type=<span class="hljs-string">"np"</span>,
	return_dict=<span class="hljs-literal">False</span>,
	)

	encode_video(
	video[<span class="hljs-number">0</span>],
	fps=frame_rate,
	audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(),
	audio_sample_rate=pipe.vocoder.config.output_sampling_rate,
	output_path=<span class="hljs-string">"ltx2_distilled_flf2v.mp4"</span>,
	)`,wrap:!1}}),_e=new E({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYMkNvbmRpdGlvblBpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMubHR4Mi5waXBlbGluZV9sdHgyX2NvbmRpdGlvbiUyMGltcG9ydCUyMExUWDJWaWRlb0NvbmRpdGlvbiUwQWZyb20lMjBkaWZmdXNlcnMucGlwZWxpbmVzLmx0eDIuZXhwb3J0X3V0aWxzJTIwaW1wb3J0JTIwZW5jb2RlX3ZpZGVvJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMubHR4Mi51dGlscyUyMGltcG9ydCUyMERFRkFVTFRfTkVHQVRJVkVfUFJPTVBUJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGxvYWRfaW1hZ2UlMkMlMjBsb2FkX3ZpZGVvJTBBJTBBZGV2aWNlJTIwJTNEJTIwJTIyY3VkYSUyMiUwQXdpZHRoJTIwJTNEJTIwNzY4JTBBaGVpZ2h0JTIwJTNEJTIwNTEyJTBBcmFuZG9tX3NlZWQlMjAlM0QlMjA0MiUwQWdlbmVyYXRvciUyMCUzRCUyMHRvcmNoLkdlbmVyYXRvcihkZXZpY2UpLm1hbnVhbF9zZWVkKHJhbmRvbV9zZWVkKSUwQW1vZGVsX3BhdGglMjAlM0QlMjAlMjJyb290b25jaGFpciUyRkxUWC0yLTE5Yi1kaXN0aWxsZWQlMjIlMEElMEFwaXBlJTIwJTNEJTIwTFRYMkNvbmRpdGlvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZChtb2RlbF9wYXRoJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMEFwaXBlLmVuYWJsZV9zZXF1ZW50aWFsX2NwdV9vZmZsb2FkKGRldmljZSUzRGRldmljZSklMEFwaXBlLnZhZS5lbmFibGVfdGlsaW5nKCklMEElMEFwcm9tcHQlMjAlM0QlMjAoJTBBJTIwJTIwJTIwJTIwJTIyVGhlJTIwdmlkZW8lMjBkZXBpY3RzJTIwYSUyMGxvbmclMkMlMjBzdHJhaWdodCUyMGhpZ2h3YXklMjBzdHJldGNoaW5nJTIwaW50byUyMHRoZSUyMGRpc3RhbmNlJTJDJTIwZmxhbmtlZCUyMGJ5JTIwbWV0YWwlMjBndWFyZHJhaWxzLiUyMFRoZSUyMHJvYWQlMjBpcyUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMmRpdmlkZWQlMjBpbnRvJTIwbXVsdGlwbGUlMjBsYW5lcyUyQyUyMHdpdGglMjBhJTIwZmV3JTIwdmVoaWNsZXMlMjB2aXNpYmxlJTIwaW4lMjB0aGUlMjBmYXIlMjBkaXN0YW5jZS4lMjBUaGUlMjBzdXJyb3VuZGluZyUyMGxhbmRzY2FwZSUyMGZlYXR1cmVzJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIyZHJ5JTJDJTIwZ3Jhc3N5JTIwZmllbGRzJTIwb24lMjBvbmUlMjBzaWRlJTIwYW5kJTIwcm9sbGluZyUyMGhpbGxzJTIwb24lMjB0aGUlMjBvdGhlci4lMjBUaGUlMjBza3klMjBpcyUyMG1vc3RseSUyMGNsZWFyJTIwd2l0aCUyMGElMjBmZXclMjBzY2F0dGVyZWQlMjAlMjIlMEElMjAlMjAlMjAlMjAlMjJjbG91ZHMlMkMlMjBzdWdnZXN0aW5nJTIwYSUyMGJyaWdodCUyQyUyMHN1bm55JTIwZGF5LiUyMEFuZCUyMHRoZW4lMjB0aGUlMjBjYW1lcmElMjBzd2l0Y2glMjB0byUyMGElMjB3aW5kaW5nJTIwbW91bnRhaW4lMjByb2FkJTIwY292ZXJlZCUyMGluJTIwc25vdyUyQyUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMndpdGglMjBhJTIwc2luZ2xlJTIwdmVoaWNsZSUyMHRyYXZlbGluZyUyMGFsb25nJTIwaXQuJTIwVGhlJTIwcm9hZCUyMGlzJTIwZmxhbmtlZCUyMGJ5JTIwc3RlZXAlMkMlMjByb2NreSUyMGNsaWZmcyUyMGFuZCUyMHNwYXJzZSUyMHZlZ2V0YXRpb24uJTIwVGhlJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIybGFuZHNjYXBlJTIwaXMlMjBjaGFyYWN0ZXJpemVkJTIwYnklMjBydWdnZWQlMjB0ZXJyYWluJTIwYW5kJTIwYSUyMHJpdmVyJTIwdmlzaWJsZSUyMGluJTIwdGhlJTIwZGlzdGFuY2UuJTIwVGhlJTIwc2NlbmUlMjBjYXB0dXJlcyUyMHRoZSUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMnNvbGl0dWRlJTIwYW5kJTIwYmVhdXR5JTIwb2YlMjBhJTIwd2ludGVyJTIwZHJpdmUlMjB0aHJvdWdoJTIwYSUyMG1vdW50YWlub3VzJTIwcmVnaW9uLiUyMiUwQSklMEElMEFjb25kX3ZpZGVvJTIwJTNEJTIwbG9hZF92aWRlbyglMEElMjAlMjAlMjAlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZodWdnaW5nZmFjZSUyRmRvY3VtZW50YXRpb24taW1hZ2VzJTJGcmVzb2x2ZSUyRm1haW4lMkZkaWZmdXNlcnMlMkZjb3Ntb3MlMkZjb3Ntb3MtdmlkZW8yd29ybGQtaW5wdXQtdmlkLm1wNCUyMiUwQSklMEFjb25kX2ltYWdlJTIwJTNEJTIwbG9hZF9pbWFnZSglMEElMjAlMjAlMjAlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZodWdnaW5nZmFjZSUyRmRvY3VtZW50YXRpb24taW1hZ2VzJTJGcmVzb2x2ZSUyRm1haW4lMkZkaWZmdXNlcnMlMkZjb3Ntb3MlMkZjb3Ntb3MtdmlkZW8yd29ybGQtaW5wdXQuanBnJTIyJTBBKSUwQXZpZGVvX2NvbmQlMjAlM0QlMjBMVFgyVmlkZW9Db25kaXRpb24oZnJhbWVzJTNEY29uZF92aWRlbyUyQyUyMGluZGV4JTNEMCUyQyUyMHN0cmVuZ3RoJTNEMS4wKSUwQWltYWdlX2NvbmQlMjAlM0QlMjBMVFgyVmlkZW9Db25kaXRpb24oZnJhbWVzJTNEY29uZF9pbWFnZSUyQyUyMGluZGV4JTNEOCUyQyUyMHN0cmVuZ3RoJTNEMS4wKSUwQWNvbmRpdGlvbnMlMjAlM0QlMjAlNUJ2aWRlb19jb25kJTJDJTIwaW1hZ2VfY29uZCU1RCUwQSUwQWZyYW1lX3JhdGUlMjAlM0QlMjAyNC4wJTBBdmlkZW8lMkMlMjBhdWRpbyUyMCUzRCUyMHBpcGUoJTBBJTIwJTIwJTIwJTIwY29uZGl0aW9ucyUzRGNvbmRpdGlvbnMlMkMlMEElMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBuZWdhdGl2ZV9wcm9tcHQlM0RERUZBVUxUX05FR0FUSVZFX1BST01QVCUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNEd2lkdGglMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0RoZWlnaHQlMkMlMEElMjAlMjAlMjAlMjBudW1fZnJhbWVzJTNEMTIxJTJDJTBBJTIwJTIwJTIwJTIwZnJhbWVfcmF0ZSUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNENDAlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDQuMCUyQyUwQSUyMCUyMCUyMCUyMGdlbmVyYXRvciUzRGdlbmVyYXRvciUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF90eXBlJTNEJTIybnAlMjIlMkMlMEElMjAlMjAlMjAlMjByZXR1cm5fZGljdCUzREZhbHNlJTJDJTBBKSUwQSUwQWVuY29kZV92aWRlbyglMEElMjAlMjAlMjAlMjB2aWRlbyU1QjAlNUQlMkMlMEElMjAlMjAlMjAlMjBmcHMlM0RmcmFtZV9yYXRlJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW8lM0RhdWRpbyU1QjAlNUQuZmxvYXQoKS5jcHUoKSUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX3NhbXBsZV9yYXRlJTNEcGlwZS52b2NvZGVyLmNvbmZpZy5vdXRwdXRfc2FtcGxpbmdfcmF0ZSUyQyUwQSUyMCUyMCUyMCUyMG91dHB1dF9wYXRoJTNEJTIybHR4Ml9jb25kX3ZpZGVvLm1wNCUyMiUyQyUwQSk=",highlighted:`<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2ConditionPipeline
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.pipeline_ltx2_condition <span class="hljs-keyword">import</span> LTX2VideoCondition
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.export_utils <span class="hljs-keyword">import</span> encode_video
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.utils <span class="hljs-keyword">import</span> DEFAULT_NEGATIVE_PROMPT
	<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, load_video

	device = <span class="hljs-string">"cuda"</span>
	width = <span class="hljs-number">768</span>
	height = <span class="hljs-number">512</span>
	random_seed = <span class="hljs-number">42</span>
	generator = torch.Generator(device).manual_seed(random_seed)
	model_path = <span class="hljs-string">"rootonchair/LTX-2-19b-distilled"</span>

	pipe = LTX2ConditionPipeline.from_pretrained(model_path, torch_dtype=torch.bfloat16)
	pipe.enable_sequential_cpu_offload(device=device)
	pipe.vae.enable_tiling()

	prompt = (
	<span class="hljs-string">"The video depicts a long, straight highway stretching into the distance, flanked by metal guardrails. The road is "</span>
	<span class="hljs-string">"divided into multiple lanes, with a few vehicles visible in the far distance. The surrounding landscape features "</span>
	<span class="hljs-string">"dry, grassy fields on one side and rolling hills on the other. The sky is mostly clear with a few scattered "</span>
	<span class="hljs-string">"clouds, suggesting a bright, sunny day. And then the camera switch to a winding mountain road covered in snow, "</span>
	<span class="hljs-string">"with a single vehicle traveling along it. The road is flanked by steep, rocky cliffs and sparse vegetation. The "</span>
	<span class="hljs-string">"landscape is characterized by rugged terrain and a river visible in the distance. The scene captures the "</span>
	<span class="hljs-string">"solitude and beauty of a winter drive through a mountainous region."</span>
	)

	cond_video = load_video(
	<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cosmos/cosmos-video2world-input-vid.mp4"</span>
	)
	cond_image = load_image(
	<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cosmos/cosmos-video2world-input.jpg"</span>
	)
	video_cond = LTX2VideoCondition(frames=cond_video, index=<span class="hljs-number">0</span>, strength=<span class="hljs-number">1.0</span>)
	image_cond = LTX2VideoCondition(frames=cond_image, index=<span class="hljs-number">8</span>, strength=<span class="hljs-number">1.0</span>)
	conditions = [video_cond, image_cond]

	frame_rate = <span class="hljs-number">24.0</span>
	video, audio = pipe(
	conditions=conditions,
	prompt=prompt,
	negative_prompt=DEFAULT_NEGATIVE_PROMPT,
	width=width,
	height=height,
	num_frames=<span class="hljs-number">121</span>,
	frame_rate=frame_rate,
	num_inference_steps=<span class="hljs-number">40</span>,
	guidance_scale=<span class="hljs-number">4.0</span>,
	generator=generator,
	output_type=<span class="hljs-string">"np"</span>,
	return_dict=<span class="hljs-literal">False</span>,
	)

	encode_video(
	video[<span class="hljs-number">0</span>],
	fps=frame_rate,
	audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(),
	audio_sample_rate=pipe.vocoder.config.output_sampling_rate,
	output_path=<span class="hljs-string">"ltx2_cond_video.mp4"</span>,
	)`,wrap:!1}}),ye=new W({props:{title:"Multimodal Guidance",local:"multimodal-guidance",headingTag:"h2"}}),Ue=new E({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYMkltYWdlVG9WaWRlb1BpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMubHR4Mi5leHBvcnRfdXRpbHMlMjBpbXBvcnQlMjBlbmNvZGVfdmlkZW8lMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy5sdHgyLnV0aWxzJTIwaW1wb3J0JTIwREVGQVVMVF9ORUdBVElWRV9QUk9NUFQlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwbG9hZF9pbWFnZSUwQSUwQWRldmljZSUyMCUzRCUyMCUyMmN1ZGElMjIlMEF3aWR0aCUyMCUzRCUyMDc2OCUwQWhlaWdodCUyMCUzRCUyMDUxMiUwQXJhbmRvbV9zZWVkJTIwJTNEJTIwNDIlMEFmcmFtZV9yYXRlJTIwJTNEJTIwMjQuMCUwQWdlbmVyYXRvciUyMCUzRCUyMHRvcmNoLkdlbmVyYXRvcihkZXZpY2UpLm1hbnVhbF9zZWVkKHJhbmRvbV9zZWVkKSUwQW1vZGVsX3BhdGglMjAlM0QlMjAlMjJkZzg0NSUyRkxUWC0yLjMtRGlmZnVzZXJzJTIyJTBBJTBBcGlwZSUyMCUzRCUyMExUWDJJbWFnZVRvVmlkZW9QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfcGF0aCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYpJTBBcGlwZS5lbmFibGVfc2VxdWVudGlhbF9jcHVfb2ZmbG9hZChkZXZpY2UlM0RkZXZpY2UpJTBBcGlwZS52YWUuZW5hYmxlX3RpbGluZygpJTBBJTBBcHJvbXB0JTIwJTNEJTIwKCUwQSUyMCUyMCUyMCUyMCUyMkFuJTIwYXN0cm9uYXV0JTIwaGF0Y2hlcyUyMGZyb20lMjBhJTIwZnJhZ2lsZSUyMGVnZyUyMG9uJTIwdGhlJTIwc3VyZmFjZSUyMG9mJTIwdGhlJTIwTW9vbiUyQyUyMHRoZSUyMHNoZWxsJTIwY3JhY2tpbmclMjBhbmQlMjBwZWVsaW5nJTIwYXBhcnQlMjBpbiUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMmdlbnRsZSUyMGxvdy1ncmF2aXR5JTIwbW90aW9uLiUyMEZpbmUlMjBsdW5hciUyMGR1c3QlMjBsaWZ0cyUyMGFuZCUyMGRyaWZ0cyUyMG91dHdhcmQlMjB3aXRoJTIwZWFjaCUyMG1vdmVtZW50JTJDJTIwZmxvYXRpbmclMjBpbiUyMHNsb3clMjBhcmNzJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIyYmVmb3JlJTIwc2V0dGxpbmclMjBiYWNrJTIwb250byUyMHRoZSUyMGdyb3VuZC4lMjBUaGUlMjBhc3Ryb25hdXQlMjBwdXNoZXMlMjBmcmVlJTIwaW4lMjBhJTIwZGVsaWJlcmF0ZSUyQyUyMHdlaWdodGxlc3MlMjBtb3Rpb24lMkMlMjBzbWFsbCUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMmZyYWdtZW50cyUyMG9mJTIwdGhlJTIwZWdnJTIwdHVtYmxpbmclMjBhbmQlMjBzcGlubmluZyUyMHRocm91Z2glMjB0aGUlMjBhaXIuJTIwSW4lMjB0aGUlMjBiYWNrZ3JvdW5kJTJDJTIwdGhlJTIwZGVlcCUyMGRhcmtuZXNzJTIwb2YlMjBzcGFjZSUyMHN1YnRseSUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMnNoaWZ0cyUyMGFzJTIwc3RhcnMlMjBnbGlkZSUyMHdpdGglMjB0aGUlMjBjYW1lcmEncyUyMG1vdmVtZW50JTJDJTIwZW1waGFzaXppbmclMjB2YXN0JTIwZGVwdGglMjBhbmQlMjBzY2FsZS4lMjBUaGUlMjBjYW1lcmElMjBwZXJmb3JtcyUyMGElMjAlMjIlMEElMjAlMjAlMjAlMjAlMjJzbW9vdGglMkMlMjBjaW5lbWF0aWMlMjBzbG93JTIwcHVzaC1pbiUyQyUyMHdpdGglMjBuYXR1cmFsJTIwcGFyYWxsYXglMjBiZXR3ZWVuJTIwdGhlJTIwZm9yZWdyb3VuZCUyMGR1c3QlMkMlMjB0aGUlMjBhc3Ryb25hdXQlMkMlMjBhbmQlMjB0aGUlMjAlMjIlMEElMjAlMjAlMjAlMjAlMjJkaXN0YW50JTIwc3RhcmZpZWxkLiUyMFVsdHJhLXJlYWxpc3RpYyUyMGRldGFpbCUyQyUyMHBoeXNpY2FsbHklMjBhY2N1cmF0ZSUyMGxvdy1ncmF2aXR5JTIwbW90aW9uJTJDJTIwY2luZW1hdGljJTIwbGlnaHRpbmclMkMlMjBhbmQlMjBhJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIyYnJlYXRoLXRha2luZyUyQyUyMG1vdmllLWxpa2UlMjBzaG90LiUyMiUwQSklMEElMEFpbWFnZSUyMCUzRCUyMGxvYWRfaW1hZ2UoJTBBJTIwJTIwJTIwJTIwJTIyaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmRhdGFzZXRzJTJGaHVnZ2luZ2ZhY2UlMkZkb2N1bWVudGF0aW9uLWltYWdlcyUyRnJlc29sdmUlMkZtYWluJTJGZGlmZnVzZXJzJTJGYXN0cm9uYXV0LmpwZyUyMiUyQyUwQSklMEElMEF2aWRlbyUyQyUyMGF1ZGlvJTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBpbWFnZSUzRGltYWdlJTJDJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbmVnYXRpdmVfcHJvbXB0JTNEREVGQVVMVF9ORUdBVElWRV9QUk9NUFQlMkMlMEElMjAlMjAlMjAlMjB3aWR0aCUzRHdpZHRoJTJDJTBBJTIwJTIwJTIwJTIwaGVpZ2h0JTNEaGVpZ2h0JTJDJTBBJTIwJTIwJTIwJTIwbnVtX2ZyYW1lcyUzRDEyMSUyQyUwQSUyMCUyMCUyMCUyMGZyYW1lX3JhdGUlM0RmcmFtZV9yYXRlJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDMwJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0QzLjAlMkMlMjAlMjAlMjMlMjBSZWNvbW1lbmRlZCUyMExUWC0yLjMlMjBndWlkYW5jZSUyMHBhcmFtZXRlcnMlMEElMjAlMjAlMjAlMjBzdGdfc2NhbGUlM0QxLjAlMkMlMjAlMjAlMjMlMjBOb3RlJTIwdGhhdCUyMDAuMCUyMChub3QlMjAxLjApJTIwbWVhbnMlMjB0aGF0JTIwU1RHJTIwaXMlMjBkaXNhYmxlZCUyMChhbGwlMjBvdGhlciUyMGd1aWRhbmNlJTIwaXMlMjBkaXNhYmxlZCUyMGF0JTIwMS4wKSUwQSUyMCUyMCUyMCUyMG1vZGFsaXR5X3NjYWxlJTNEMy4wJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2VfcmVzY2FsZSUzRDAuNyUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX2d1aWRhbmNlX3NjYWxlJTNENy4wJTJDJTIwJTIwJTIzJTIwTm90ZSUyMHRoYXQlMjBhJTIwaGlnaGVyJTIwQ0ZHJTIwZ3VpZGFuY2UlMjBzY2FsZSUyMGlzJTIwcmVjb21tZW5kZWQlMjBmb3IlMjBhdWRpbyUwQSUyMCUyMCUyMCUyMGF1ZGlvX3N0Z19zY2FsZSUzRDEuMCUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX21vZGFsaXR5X3NjYWxlJTNEMy4wJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fZ3VpZGFuY2VfcmVzY2FsZSUzRDAuNyUyQyUwQSUyMCUyMCUyMCUyMHNwYXRpb190ZW1wb3JhbF9ndWlkYW5jZV9ibG9ja3MlM0QlNUIyOCU1RCUyQyUwQSUyMCUyMCUyMCUyMHVzZV9jcm9zc190aW1lc3RlcCUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBnZW5lcmF0b3IlM0RnZW5lcmF0b3IlMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfdHlwZSUzRCUyMm5wJTIyJTJDJTBBJTIwJTIwJTIwJTIwcmV0dXJuX2RpY3QlM0RGYWxzZSUyQyUwQSklMEElMEFlbmNvZGVfdmlkZW8oJTBBJTIwJTIwJTIwJTIwdmlkZW8lNUIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwZnBzJTNEZnJhbWVfcmF0ZSUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvJTNEYXVkaW8lNUIwJTVELmZsb2F0KCkuY3B1KCklMkMlMEElMjAlMjAlMjAlMjBhdWRpb19zYW1wbGVfcmF0ZSUzRHBpcGUudm9jb2Rlci5jb25maWcub3V0cHV0X3NhbXBsaW5nX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfcGF0aCUzRCUyMmx0eDJfM19pMnZfc3RhZ2VfMS5tcDQlMjIlMkMlMEEp",highlighted:`<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2ImageToVideoPipeline
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.export_utils <span class="hljs-keyword">import</span> encode_video
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.utils <span class="hljs-keyword">import</span> DEFAULT_NEGATIVE_PROMPT
	<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image

	device = <span class="hljs-string">"cuda"</span>
	width = <span class="hljs-number">768</span>
	height = <span class="hljs-number">512</span>
	random_seed = <span class="hljs-number">42</span>
	frame_rate = <span class="hljs-number">24.0</span>
	generator = torch.Generator(device).manual_seed(random_seed)
	model_path = <span class="hljs-string">"dg845/LTX-2.3-Diffusers"</span>

	pipe = LTX2ImageToVideoPipeline.from_pretrained(model_path, torch_dtype=torch.bfloat16)
	pipe.enable_sequential_cpu_offload(device=device)
	pipe.vae.enable_tiling()

	prompt = (
	<span class="hljs-string">"An astronaut hatches from a fragile egg on the surface of the Moon, the shell cracking and peeling apart in "</span>
	<span class="hljs-string">"gentle low-gravity motion. Fine lunar dust lifts and drifts outward with each movement, floating in slow arcs "</span>
	<span class="hljs-string">"before settling back onto the ground. The astronaut pushes free in a deliberate, weightless motion, small "</span>
	<span class="hljs-string">"fragments of the egg tumbling and spinning through the air. In the background, the deep darkness of space subtly "</span>
	<span class="hljs-string">"shifts as stars glide with the camera's movement, emphasizing vast depth and scale. The camera performs a "</span>
	<span class="hljs-string">"smooth, cinematic slow push-in, with natural parallax between the foreground dust, the astronaut, and the "</span>
	<span class="hljs-string">"distant starfield. Ultra-realistic detail, physically accurate low-gravity motion, cinematic lighting, and a "</span>
	<span class="hljs-string">"breath-taking, movie-like shot."</span>
	)

	image = load_image(
	<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg"</span>,
	)

	video, audio = pipe(
	image=image,
	prompt=prompt,
	negative_prompt=DEFAULT_NEGATIVE_PROMPT,
	width=width,
	height=height,
	num_frames=<span class="hljs-number">121</span>,
	frame_rate=frame_rate,
	num_inference_steps=<span class="hljs-number">30</span>,
	guidance_scale=<span class="hljs-number">3.0</span>, <span class="hljs-comment"># Recommended LTX-2.3 guidance parameters</span>
	stg_scale=<span class="hljs-number">1.0</span>, <span class="hljs-comment"># Note that 0.0 (not 1.0) means that STG is disabled (all other guidance is disabled at 1.0)</span>
	modality_scale=<span class="hljs-number">3.0</span>,
	guidance_rescale=<span class="hljs-number">0.7</span>,
	audio_guidance_scale=<span class="hljs-number">7.0</span>, <span class="hljs-comment"># Note that a higher CFG guidance scale is recommended for audio</span>
	audio_stg_scale=<span class="hljs-number">1.0</span>,
	audio_modality_scale=<span class="hljs-number">3.0</span>,
	audio_guidance_rescale=<span class="hljs-number">0.7</span>,
	spatio_temporal_guidance_blocks=[<span class="hljs-number">28</span>],
	use_cross_timestep=<span class="hljs-literal">True</span>,
	generator=generator,
	output_type=<span class="hljs-string">"np"</span>,
	return_dict=<span class="hljs-literal">False</span>,
	)

	encode_video(
	video[<span class="hljs-number">0</span>],
	fps=frame_rate,
	audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(),
	audio_sample_rate=pipe.vocoder.config.output_sampling_rate,
	output_path=<span class="hljs-string">"ltx2_3_i2v_stage_1.mp4"</span>,
	)`,wrap:!1}}),je=new W({props:{title:"Prompt Enhancement",local:"prompt-enhancement",headingTag:"h2"}}),Ze=new E({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwR2VtbWEzUHJvY2Vzc29yJTBBZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMExUWDJQaXBlbGluZSUwQWZyb20lMjBkaWZmdXNlcnMucGlwZWxpbmVzLmx0eDIuZXhwb3J0X3V0aWxzJTIwaW1wb3J0JTIwZW5jb2RlX3ZpZGVvJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMubHR4Mi51dGlscyUyMGltcG9ydCUyMERFRkFVTFRfTkVHQVRJVkVfUFJPTVBUJTJDJTIwVDJWX0RFRkFVTFRfU1lTVEVNX1BST01QVCUwQSUwQWRldmljZSUyMCUzRCUyMCUyMmN1ZGElMjIlMEF3aWR0aCUyMCUzRCUyMDc2OCUwQWhlaWdodCUyMCUzRCUyMDUxMiUwQXJhbmRvbV9zZWVkJTIwJTNEJTIwNDIlMEFmcmFtZV9yYXRlJTIwJTNEJTIwMjQuMCUwQWdlbmVyYXRvciUyMCUzRCUyMHRvcmNoLkdlbmVyYXRvcihkZXZpY2UpLm1hbnVhbF9zZWVkKHJhbmRvbV9zZWVkKSUwQW1vZGVsX3BhdGglMjAlM0QlMjAlMjJkZzg0NSUyRkxUWC0yLjMtRGlmZnVzZXJzJTIyJTBBJTBBcGlwZSUyMCUzRCUyMExUWDJQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfcGF0aCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYpJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoZGV2aWNlJTNEZGV2aWNlKSUwQXBpcGUudmFlLmVuYWJsZV90aWxpbmcoKSUwQWlmJTIwZ2V0YXR0cihwaXBlJTJDJTIwJTIycHJvY2Vzc29yJTIyJTJDJTIwTm9uZSklMjBpcyUyME5vbmUlM0ElMEElMjAlMjAlMjAlMjBwcm9jZXNzb3IlMjAlM0QlMjBHZW1tYTNQcm9jZXNzb3IuZnJvbV9wcmV0cmFpbmVkKCUyMmdvb2dsZSUyRmdlbW1hLTMtMTJiLWl0LXFhdC1xNF8wLXVucXVhbnRpemVkJTIyKSUwQSUyMCUyMCUyMCUyMHBpcGUucHJvY2Vzc29yJTIwJTNEJTIwcHJvY2Vzc29yJTBBJTBBcHJvbXB0JTIwJTNEJTIwKCUwQSUyMCUyMCUyMCUyMCUyMkFuJTIwYXN0cm9uYXV0JTIwaGF0Y2hlcyUyMGZyb20lMjBhJTIwZnJhZ2lsZSUyMGVnZyUyMG9uJTIwdGhlJTIwc3VyZmFjZSUyMG9mJTIwdGhlJTIwTW9vbiUyQyUyMHRoZSUyMHNoZWxsJTIwY3JhY2tpbmclMjBhbmQlMjBwZWVsaW5nJTIwYXBhcnQlMjBpbiUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMmdlbnRsZSUyMGxvdy1ncmF2aXR5JTIwbW90aW9uLiUyMEZpbmUlMjBsdW5hciUyMGR1c3QlMjBsaWZ0cyUyMGFuZCUyMGRyaWZ0cyUyMG91dHdhcmQlMjB3aXRoJTIwZWFjaCUyMG1vdmVtZW50JTJDJTIwZmxvYXRpbmclMjBpbiUyMHNsb3clMjBhcmNzJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIyYmVmb3JlJTIwc2V0dGxpbmclMjBiYWNrJTIwb250byUyMHRoZSUyMGdyb3VuZC4lMjBUaGUlMjBhc3Ryb25hdXQlMjBwdXNoZXMlMjBmcmVlJTIwaW4lMjBhJTIwZGVsaWJlcmF0ZSUyQyUyMHdlaWdodGxlc3MlMjBtb3Rpb24lMkMlMjBzbWFsbCUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMmZyYWdtZW50cyUyMG9mJTIwdGhlJTIwZWdnJTIwdHVtYmxpbmclMjBhbmQlMjBzcGlubmluZyUyMHRocm91Z2glMjB0aGUlMjBhaXIuJTIwSW4lMjB0aGUlMjBiYWNrZ3JvdW5kJTJDJTIwdGhlJTIwZGVlcCUyMGRhcmtuZXNzJTIwb2YlMjBzcGFjZSUyMHN1YnRseSUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMnNoaWZ0cyUyMGFzJTIwc3RhcnMlMjBnbGlkZSUyMHdpdGglMjB0aGUlMjBjYW1lcmEncyUyMG1vdmVtZW50JTJDJTIwZW1waGFzaXppbmclMjB2YXN0JTIwZGVwdGglMjBhbmQlMjBzY2FsZS4lMjBUaGUlMjBjYW1lcmElMjBwZXJmb3JtcyUyMGElMjAlMjIlMEElMjAlMjAlMjAlMjAlMjJzbW9vdGglMkMlMjBjaW5lbWF0aWMlMjBzbG93JTIwcHVzaC1pbiUyQyUyMHdpdGglMjBuYXR1cmFsJTIwcGFyYWxsYXglMjBiZXR3ZWVuJTIwdGhlJTIwZm9yZWdyb3VuZCUyMGR1c3QlMkMlMjB0aGUlMjBhc3Ryb25hdXQlMkMlMjBhbmQlMjB0aGUlMjAlMjIlMEElMjAlMjAlMjAlMjAlMjJkaXN0YW50JTIwc3RhcmZpZWxkLiUyMFVsdHJhLXJlYWxpc3RpYyUyMGRldGFpbCUyQyUyMHBoeXNpY2FsbHklMjBhY2N1cmF0ZSUyMGxvdy1ncmF2aXR5JTIwbW90aW9uJTJDJTIwY2luZW1hdGljJTIwbGlnaHRpbmclMkMlMjBhbmQlMjBhJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIyYnJlYXRoLXRha2luZyUyQyUyMG1vdmllLWxpa2UlMjBzaG90LiUyMiUwQSklMEElMEF2aWRlbyUyQyUyMGF1ZGlvJTIwJTNEJTIwcGlwZSglMEElMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBuZWdhdGl2ZV9wcm9tcHQlM0RERUZBVUxUX05FR0FUSVZFX1BST01QVCUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNEd2lkdGglMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0RoZWlnaHQlMkMlMEElMjAlMjAlMjAlMjBudW1fZnJhbWVzJTNEMTIxJTJDJTBBJTIwJTIwJTIwJTIwZnJhbWVfcmF0ZSUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNEMzAlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDMuMCUyQyUwQSUyMCUyMCUyMCUyMHN0Z19zY2FsZSUzRDEuMCUyQyUwQSUyMCUyMCUyMCUyMG1vZGFsaXR5X3NjYWxlJTNEMy4wJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2VfcmVzY2FsZSUzRDAuNyUyQyUwQSUyMCUyMCUyMCUyMGF1ZGlvX2d1aWRhbmNlX3NjYWxlJTNENy4wJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fc3RnX3NjYWxlJTNEMS4wJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fbW9kYWxpdHlfc2NhbGUlM0QzLjAlMkMlMEElMjAlMjAlMjAlMjBhdWRpb19ndWlkYW5jZV9yZXNjYWxlJTNEMC43JTJDJTBBJTIwJTIwJTIwJTIwc3BhdGlvX3RlbXBvcmFsX2d1aWRhbmNlX2Jsb2NrcyUzRCU1QjI4JTVEJTJDJTBBJTIwJTIwJTIwJTIwdXNlX2Nyb3NzX3RpbWVzdGVwJTNEVHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMHN5c3RlbV9wcm9tcHQlM0RUMlZfREVGQVVMVF9TWVNURU1fUFJPTVBUJTJDJTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yJTNEZ2VuZXJhdG9yJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3R5cGUlM0QlMjJucCUyMiUyQyUwQSUyMCUyMCUyMCUyMHJldHVybl9kaWN0JTNERmFsc2UlMkMlMEEpJTBBJTBBZW5jb2RlX3ZpZGVvKCUwQSUyMCUyMCUyMCUyMHZpZGVvJTVCMCU1RCUyQyUwQSUyMCUyMCUyMCUyMGZwcyUzRGZyYW1lX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBhdWRpbyUzRGF1ZGlvJTVCMCU1RC5mbG9hdCgpLmNwdSgpJTJDJTBBJTIwJTIwJTIwJTIwYXVkaW9fc2FtcGxlX3JhdGUlM0RwaXBlLnZvY29kZXIuY29uZmlnLm91dHB1dF9zYW1wbGluZ19yYXRlJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3BhdGglM0QlMjJsdHgyXzNfdDJ2X3N0YWdlXzEubXA0JTIyJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> Gemma3Processor
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTX2Pipeline
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.export_utils <span class="hljs-keyword">import</span> encode_video
	<span class="hljs-keyword">from</span> diffusers.pipelines.ltx2.utils <span class="hljs-keyword">import</span> DEFAULT_NEGATIVE_PROMPT, T2V_DEFAULT_SYSTEM_PROMPT

	device = <span class="hljs-string">"cuda"</span>
	width = <span class="hljs-number">768</span>
	height = <span class="hljs-number">512</span>
	random_seed = <span class="hljs-number">42</span>
	frame_rate = <span class="hljs-number">24.0</span>
	generator = torch.Generator(device).manual_seed(random_seed)
	model_path = <span class="hljs-string">"dg845/LTX-2.3-Diffusers"</span>

	pipe = LTX2Pipeline.from_pretrained(model_path, torch_dtype=torch.bfloat16)
	pipe.enable_model_cpu_offload(device=device)
	pipe.vae.enable_tiling()
	<span class="hljs-keyword">if</span> <span class="hljs-built_in">getattr</span>(pipe, <span class="hljs-string">"processor"</span>, <span class="hljs-literal">None</span>) <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>:
	processor = Gemma3Processor.from_pretrained(<span class="hljs-string">"google/gemma-3-12b-it-qat-q4_0-unquantized"</span>)
	pipe.processor = processor

	prompt = (
	<span class="hljs-string">"An astronaut hatches from a fragile egg on the surface of the Moon, the shell cracking and peeling apart in "</span>
	<span class="hljs-string">"gentle low-gravity motion. Fine lunar dust lifts and drifts outward with each movement, floating in slow arcs "</span>
	<span class="hljs-string">"before settling back onto the ground. The astronaut pushes free in a deliberate, weightless motion, small "</span>
	<span class="hljs-string">"fragments of the egg tumbling and spinning through the air. In the background, the deep darkness of space subtly "</span>
	<span class="hljs-string">"shifts as stars glide with the camera's movement, emphasizing vast depth and scale. The camera performs a "</span>
	<span class="hljs-string">"smooth, cinematic slow push-in, with natural parallax between the foreground dust, the astronaut, and the "</span>
	<span class="hljs-string">"distant starfield. Ultra-realistic detail, physically accurate low-gravity motion, cinematic lighting, and a "</span>
	<span class="hljs-string">"breath-taking, movie-like shot."</span>
	)

	video, audio = pipe(
	prompt=prompt,
	negative_prompt=DEFAULT_NEGATIVE_PROMPT,
	width=width,
	height=height,
	num_frames=<span class="hljs-number">121</span>,
	frame_rate=frame_rate,
	num_inference_steps=<span class="hljs-number">30</span>,
	guidance_scale=<span class="hljs-number">3.0</span>,
	stg_scale=<span class="hljs-number">1.0</span>,
	modality_scale=<span class="hljs-number">3.0</span>,
	guidance_rescale=<span class="hljs-number">0.7</span>,
	audio_guidance_scale=<span class="hljs-number">7.0</span>,
	audio_stg_scale=<span class="hljs-number">1.0</span>,
	audio_modality_scale=<span class="hljs-number">3.0</span>,
	audio_guidance_rescale=<span class="hljs-number">0.7</span>,
	spatio_temporal_guidance_blocks=[<span class="hljs-number">28</span>],
	use_cross_timestep=<span class="hljs-literal">True</span>,
	system_prompt=T2V_DEFAULT_SYSTEM_PROMPT,
	generator=generator,
	output_type=<span class="hljs-string">"np"</span>,
	return_dict=<span class="hljs-literal">False</span>,
	)

	encode_video(
	video[<span class="hljs-number">0</span>],
	fps=frame_rate,
	audio=audio[<span class="hljs-number">0</span>].<span class="hljs-built_in">float</span>().cpu(),
	audio_sample_rate=pipe.vocoder.config.output_sampling_rate,
	output_path=<span class="hljs-string">"ltx2_3_t2v_stage_1.mp4"</span>,
	)`,wrap:!1}}),Ge=new W({props:{title:"LTX2Pipeline",local:"diffusers.LTX2Pipeline",headingTag:"h2"}}),Ie=new v({props:{name:"class diffusers.LTX2Pipeline",anchor:"diffusers.LTX2Pipeline",parameters:[{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"vae",val:": AutoencoderKLLTX2Video"},{name:"audio_vae",val:": AutoencoderKLLTX2Audio"},{name:"text_encoder",val:": Gemma3ForConditionalGeneration"},{name:"tokenizer",val:": transformers.models.gemma.tokenization_gemma.GemmaTokenizer \| transformers.models.gemma.tokenization_gemma_fast.GemmaTokenizerFast"},{name:"connectors",val:": LTX2TextConnectors"},{name:"transformer",val:": LTX2VideoTransformer3DModel"},{name:"vocoder",val:": diffusers.pipelines.ltx2.vocoder.LTX2Vocoder \| diffusers.pipelines.ltx2.vocoder.LTX2VocoderWithBWE"},{name:"processor",val:": transformers.models.gemma3.processing_gemma3.Gemma3Processor \| None = None"}],parametersDescription:[{anchor:"diffusers.LTX2Pipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/pr_13370/en/api/models/ltx_video_transformer3d#diffusers.LTXVideoTransformer3DModel">LTXVideoTransformer3DModel</a>) —
	Conditional Transformer architecture to denoise the encoded video latents.`,name:"transformer"},{anchor:"diffusers.LTX2Pipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_13370/en/api/schedulers/flow_match_euler_discrete#diffusers.FlowMatchEulerDiscreteScheduler">FlowMatchEulerDiscreteScheduler</a>) —
	A scheduler to be used in combination with <code>transformer</code> to denoise the encoded image latents.`,name:"scheduler"},{anchor:"diffusers.LTX2Pipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_13370/en/api/models/autoencoderkl_ltx_video#diffusers.AutoencoderKLLTXVideo">AutoencoderKLLTXVideo</a>) —
	Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.LTX2Pipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>T5EncoderModel</code>) —
	<a href="https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5EncoderModel" rel="nofollow">T5</a>, specifically
	the <a href="https://huggingface.co/google/t5-v1_1-xxl" rel="nofollow">google/t5-v1_1-xxl</a> variant.`,name:"text_encoder"},{anchor:"diffusers.LTX2Pipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) —
	Tokenizer of class
	<a href="https://huggingface.co/docs/transformers/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a>.`,name:"tokenizer"},{anchor:"diffusers.LTX2Pipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>T5TokenizerFast</code>) —
	Second Tokenizer of class
	<a href="https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5TokenizerFast" rel="nofollow">T5TokenizerFast</a>.`,name:"tokenizer"},{anchor:"diffusers.LTX2Pipeline.connectors",description:`<strong>connectors</strong> (<code>LTX2TextConnectors</code>) —
	Text connector stack used to adapt text encoder hidden states for the video and audio branches.`,name:"connectors"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2.py#L185"}}),Xe=new v({props:{name:"__call__",anchor:"diffusers.LTX2Pipeline.__call__",parameters:[{name:"prompt",val:": str \| list[str] = None"},{name:"negative_prompt",val:": str \| list[str] \| None = None"},{name:"height",val:": int = 512"},{name:"width",val:": int = 768"},{name:"num_frames",val:": int = 121"},{name:"frame_rate",val:": float = 24.0"},{name:"num_inference_steps",val:": int = 40"},{name:"sigmas",val:": list[float] \| None = None"},{name:"timesteps",val:": list = None"},{name:"guidance_scale",val:": float = 4.0"},{name:"stg_scale",val:": float = 0.0"},{name:"modality_scale",val:": float = 1.0"},{name:"guidance_rescale",val:": float = 0.0"},{name:"audio_guidance_scale",val:": float \| None = None"},{name:"audio_stg_scale",val:": float \| None = None"},{name:"audio_modality_scale",val:": float \| None = None"},{name:"audio_guidance_rescale",val:": float \| None = None"},{name:"spatio_temporal_guidance_blocks",val:": list[int] \| None = None"},{name:"noise_scale",val:": float = 0.0"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"generator",val:": torch._C.Generator \| list[torch._C.Generator] \| None = None"},{name:"latents",val:": torch.Tensor \| None = None"},{name:"audio_latents",val:": torch.Tensor \| None = None"},{name:"prompt_embeds",val:": torch.Tensor \| None = None"},{name:"prompt_attention_mask",val:": torch.Tensor \| None = None"},{name:"negative_prompt_embeds",val:": torch.Tensor \| None = None"},{name:"negative_prompt_attention_mask",val:": torch.Tensor \| None = None"},{name:"decode_timestep",val:": float \| list[float] = 0.0"},{name:"decode_noise_scale",val:": float \| list[float] \| None = None"},{name:"use_cross_timestep",val:": bool = False"},{name:"system_prompt",val:": str \| None = None"},{name:"prompt_max_new_tokens",val:": int = 512"},{name:"prompt_enhancement_kwargs",val:": dict[str, typing.Any] \| None = None"},{name:"prompt_enhancement_seed",val:": int = 10"},{name:"output_type",val:": str = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"attention_kwargs",val:": dict[str, typing.Any] \| None = None"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": list = ['latents']"},{name:"max_sequence_length",val:": int = 1024"}],parametersDescription:[{anchor:"diffusers.LTX2Pipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) —
	The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>.
	instead.`,name:"prompt"},{anchor:"diffusers.LTX2Pipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>512</code>) —
	The height in pixels of the generated image. This is set to 480 by default for the best results.`,name:"height"},{anchor:"diffusers.LTX2Pipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>768</code>) —
	The width in pixels of the generated image. This is set to 848 by default for the best results.`,name:"width"},{anchor:"diffusers.LTX2Pipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, <em>optional</em>, defaults to <code>121</code>) —
	The number of video frames to generate`,name:"num_frames"},{anchor:"diffusers.LTX2Pipeline.__call__.frame_rate",description:`<strong>frame_rate</strong> (<code>float</code>, <em>optional</em>, defaults to <code>24.0</code>) —
	The frames per second (FPS) of the generated video.`,name:"frame_rate"},{anchor:"diffusers.LTX2Pipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 40) —
	The number of denoising steps. More denoising steps usually lead to a higher quality image at the
	expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.LTX2Pipeline.__call__.sigmas",description:`<strong>sigmas</strong> (<code>List[float]</code>, <em>optional</em>) —
	Custom sigmas to use for the denoising process with schedulers which support a <code>sigmas</code> argument in
	their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is passed
	will be used.`,name:"sigmas"},{anchor:"diffusers.LTX2Pipeline.__call__.timesteps",description:`<strong>timesteps</strong> (<code>list[int]</code>, <em>optional</em>) —
	Custom timesteps to use for the denoising process with schedulers which support a <code>timesteps</code> argument
	in their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is
	passed will be used. Must be in descending order.`,name:"timesteps"},{anchor:"diffusers.LTX2Pipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>4.0</code>) —
	Guidance scale as defined in <a href="https://huggingface.co/papers/2207.12598" rel="nofollow">Classifier-Free Diffusion
	Guidance</a>. <code>guidance_scale</code> is defined as <code>w</code> of equation 2.
	of <a href="https://huggingface.co/papers/2205.11487" rel="nofollow">Imagen Paper</a>. Guidance scale is enabled by setting
	<code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to
	the text <code>prompt</code>, usually at the expense of lower image quality. Used for the video modality (there is
	a separate value <code>audio_guidance_scale</code> for the audio modality).`,name:"guidance_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.stg_scale",description:`<strong>stg_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) —
	Video guidance scale for Spatio-Temporal Guidance (STG), proposed in <a href="https://arxiv.org/abs/2411.18664" rel="nofollow">Spatiotemporal Skip Guidance for
	Enhanced Video Diffusion Sampling</a>. STG uses a CFG-like estimate
	where we move the sample away from a weak sample from a perturbed version of the denoising model.
	Enabling STG will result in an additional denoising model forward pass; the default value of <code>0.0</code>
	means that STG is disabled.`,name:"stg_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.modality_scale",description:`<strong>modality_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1.0</code>) —
	Video guidance scale for LTX-2.X modality isolation guidance, where we move the sample away from a
	weaker sample generated by the denoising model withy cross-modality (audio-to-video and video-to-audio)
	cross attention disabled using a CFG-like estimate. Enabling modality guidance will result in an
	additional denoising model forward pass; the default value of <code>1.0</code> means that modality guidance is
	disabled.`,name:"modality_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.guidance_rescale",description:`<strong>guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) —
	Guidance rescale factor proposed by <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are
	Flawed</a> <code>guidance_scale</code> is defined as <code>φ</code> in equation 16. of
	<a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are
	Flawed</a>. Guidance rescale factor should fix overexposure when
	using zero terminal SNR. Used for the video modality.`,name:"guidance_rescale"},{anchor:"diffusers.LTX2Pipeline.__call__.audio_guidance_scale",description:`<strong>audio_guidance_scale</strong> (<code>float</code>, <em>optional</em> defaults to <code>None</code>) —
	Audio guidance scale for CFG with respect to the negative prompt. The CFG update rule is the same for
	video and audio, but they can use different values for the guidance scale. The LTX-2.X authors suggest
	that the <code>audio_guidance_scale</code> should be higher relative to the video <code>guidance_scale</code> (e.g. for
	LTX-2.3 they suggest 3.0 for video and 7.0 for audio). If <code>None</code>, defaults to the video value
	<code>guidance_scale</code>.`,name:"audio_guidance_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.audio_stg_scale",description:`<strong>audio_stg_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) —
	Audio guidance scale for STG. As with CFG, the STG update rule is otherwise the same for video and
	audio. For LTX-2.3, a value of 1.0 is suggested for both video and audio. If <code>None</code>, defaults to the
	video value <code>stg_scale</code>.`,name:"audio_stg_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.audio_modality_scale",description:`<strong>audio_modality_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) —
	Audio guidance scale for LTX-2.X modality isolation guidance. As with CFG, the modality guidance rule
	is otherwise the same for video and audio. For LTX-2.3, a value of 3.0 is suggested for both video and
	audio. If <code>None</code>, defaults to the video value <code>modality_scale</code>.`,name:"audio_modality_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.audio_guidance_rescale",description:`<strong>audio_guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) —
	A separate guidance rescale factor for the audio modality. If <code>None</code>, defaults to the video value
	<code>guidance_rescale</code>.`,name:"audio_guidance_rescale"},{anchor:"diffusers.LTX2Pipeline.__call__.spatio_temporal_guidance_blocks",description:`<strong>spatio_temporal_guidance_blocks</strong> (<code>list[int]</code>, <em>optional</em>, defaults to <code>None</code>) —
	The zero-indexed transformer block indices at which to apply STG. Must be supplied if STG is used
	(<code>stg_scale</code> or <code>audio_stg_scale</code> is greater than <code>0</code>). A value of <code>[29]</code> is recommended for LTX-2.0
	and <code>[28]</code> is recommended for LTX-2.3.`,name:"spatio_temporal_guidance_blocks"},{anchor:"diffusers.LTX2Pipeline.__call__.noise_scale",description:`<strong>noise_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) —
	The interpolation factor between random noise and denoised latents at each timestep. Applying noise to
	the <code>latents</code> and <code>audio_latents</code> before continue denoising.`,name:"noise_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTX2Pipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>list[torch.Generator]</code>, <em>optional</em>) —
	One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a>
	to make generation deterministic.`,name:"generator"},{anchor:"diffusers.LTX2Pipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for video
	generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
	tensor will be generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.LTX2Pipeline.__call__.audio_latents",description:`<strong>audio_latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for audio
	generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
	tensor will be generated by sampling using the supplied random <code>generator</code>.`,name:"audio_latents"},{anchor:"diffusers.LTX2Pipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
	provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTX2Pipeline.__call__.prompt_attention_mask",description:`<strong>prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated attention mask for text embeddings.`,name:"prompt_attention_mask"},{anchor:"diffusers.LTX2Pipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) —
	Pre-generated negative text embeddings. For PixArt-Sigma this negative prompt should be "". If not
	provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTX2Pipeline.__call__.negative_prompt_attention_mask",description:`<strong>negative_prompt_attention_mask</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) —
	Pre-generated attention mask for negative text embeddings.`,name:"negative_prompt_attention_mask"},{anchor:"diffusers.LTX2Pipeline.__call__.decode_timestep",description:`<strong>decode_timestep</strong> (<code>float</code>, defaults to <code>0.0</code>) —
	The timestep at which generated video is decoded.`,name:"decode_timestep"},{anchor:"diffusers.LTX2Pipeline.__call__.decode_noise_scale",description:`<strong>decode_noise_scale</strong> (<code>float</code>, defaults to <code>None</code>) —
	The interpolation factor between random noise and denoised latents at the decode timestep.`,name:"decode_noise_scale"},{anchor:"diffusers.LTX2Pipeline.__call__.use_cross_timestep",description:`<strong>use_cross_timestep</strong> (<code>bool</code> <em>optional</em>, defaults to <code>False</code>) —
	Whether to use the cross modality (audio is the cross modality of video, and vice versa) sigma when
	calculating the cross attention modulation parameters. <code>True</code> is the newer (e.g. LTX-2.3) behavior;
	<code>False</code> is the legacy LTX-2.0 behavior.`,name:"use_cross_timestep"},{anchor:"diffusers.LTX2Pipeline.__call__.system_prompt",description:`<strong>system_prompt</strong> (<code>str</code>, <em>optional</em>, defaults to <code>None</code>) —
	Optional system prompt to use for prompt enhancement. The system prompt will be used by the current
	text encoder (by default, a <code>Gemma3ForConditionalGeneration</code> model) to generate an enhanced prompt from
	the original <code>prompt</code> to condition generation. If not supplied, prompt enhancement will not be
	performed.`,name:"system_prompt"},{anchor:"diffusers.LTX2Pipeline.__call__.prompt_max_new_tokens",description:`<strong>prompt_max_new_tokens</strong> (<code>int</code>, <em>optional</em>, defaults to <code>512</code>) —
	The maximum number of new tokens to generate when performing prompt enhancement.`,name:"prompt_max_new_tokens"},{anchor:"diffusers.LTX2Pipeline.__call__.prompt_enhancement_kwargs",description:`<strong>prompt_enhancement_kwargs</strong> (<code>dict[str, Any]</code>, <em>optional</em>, defaults to <code>None</code>) —
	Keyword arguments for <code>self.text_encoder.generate</code>. If not supplied, default arguments of
	<code>do_sample=True</code> and <code>temperature=0.7</code> will be used. See
	<a href="https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.generate" rel="nofollow">https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.generate</a>
	for more details.`,name:"prompt_enhancement_kwargs"},{anchor:"diffusers.LTX2Pipeline.__call__.prompt_enhancement_seed",description:`<strong>prompt_enhancement_seed</strong> (<code>int</code>, <em>optional</em>, default to <code>10</code>) —
	Random seed for any random operations during prompt enhancement.`,name:"prompt_enhancement_seed"},{anchor:"diffusers.LTX2Pipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) —
	The output format of the generate image. Choose between
	<a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.LTX2Pipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether or not to return a <code>~pipelines.ltx.LTX2PipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.LTX2Pipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) —
	A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under
	<code>self.processor</code> in
	<a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.LTX2Pipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) —
	A function that calls at the end of each denoising steps during the inference. The function is called
	with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a list of all tensors as specified by
	<code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.LTX2Pipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>, defaults to <code>["latents"]</code>) —
	The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list
	will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the
	<code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.LTX2Pipeline.__call__.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1024</code>) —
	Maximum sequence length to use with the <code>prompt</code>.`,name:"max_sequence_length"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2.py#L808",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>If <code>return_dict</code> is <code>True</code>, <code>~pipelines.ltx.LTX2PipelineOutput</code> is returned, otherwise a <code>tuple</code> is
	returned where the first element is a list with the generated images.</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>~pipelines.ltx.LTX2PipelineOutput</code> or <code>tuple</code></p>
	`}}),Y=new bn({props:{anchor:"diffusers.LTX2Pipeline.__call__.example",$$slots:{default:[aa]},$$scope:{ctx:C}}}),Be=new v({props:{name:"encode_prompt",anchor:"diffusers.LTX2Pipeline.encode_prompt",parameters:[{name:"prompt",val:": str \| list[str]"},{name:"negative_prompt",val:": str \| list[str] \| None = None"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"prompt_embeds",val:": torch.Tensor \| None = None"},{name:"negative_prompt_embeds",val:": torch.Tensor \| None = None"},{name:"prompt_attention_mask",val:": torch.Tensor \| None = None"},{name:"negative_prompt_attention_mask",val:": torch.Tensor \| None = None"},{name:"max_sequence_length",val:": int = 1024"},{name:"scale_factor",val:": int = 8"},{name:"device",val:": torch.device \| None = None"},{name:"dtype",val:": torch.dtype \| None = None"}],parametersDescription:[{anchor:"diffusers.LTX2Pipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) —
	prompt to be encoded`,name:"prompt"},{anchor:"diffusers.LTX2Pipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) —
	The prompt or prompts not to guide the image generation. If not defined, one has to pass
	<code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is
	less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.LTX2Pipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether to use classifier free guidance or not.`,name:"do_classifier_free_guidance"},{anchor:"diffusers.LTX2Pipeline.encode_prompt.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	Number of videos that should be generated per prompt. torch device to place the resulting embeddings on`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTX2Pipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
	provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTX2Pipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt
	weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input
	argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTX2Pipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>, <em>optional</em>):
	torch device`,name:"device"},{anchor:"diffusers.LTX2Pipeline.encode_prompt.dtype",description:`<strong>dtype</strong> — (<code>torch.dtype</code>, <em>optional</em>):
	torch dtype`,name:"dtype"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2.py#L337"}}),Ve=new v({props:{name:"enhance_prompt",anchor:"diffusers.LTX2Pipeline.enhance_prompt",parameters:[{name:"prompt",val:": str"},{name:"system_prompt",val:": str"},{name:"max_new_tokens",val:": int = 512"},{name:"seed",val:": int = 10"},{name:"generator",val:": torch._C.Generator \| None = None"},{name:"generation_kwargs",val:": dict[str, typing.Any] \| None = None"},{name:"device",val:": str \| torch.device \| None = None"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2.py#L423"}}),We=new W({props:{title:"LTX2ImageToVideoPipeline",local:"diffusers.LTX2ImageToVideoPipeline",headingTag:"h2"}}),Ce=new v({props:{name:"class diffusers.LTX2ImageToVideoPipeline",anchor:"diffusers.LTX2ImageToVideoPipeline",parameters:[{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"vae",val:": AutoencoderKLLTX2Video"},{name:"audio_vae",val:": AutoencoderKLLTX2Audio"},{name:"text_encoder",val:": Gemma3ForConditionalGeneration"},{name:"tokenizer",val:": transformers.models.gemma.tokenization_gemma.GemmaTokenizer \| transformers.models.gemma.tokenization_gemma_fast.GemmaTokenizerFast"},{name:"connectors",val:": LTX2TextConnectors"},{name:"transformer",val:": LTX2VideoTransformer3DModel"},{name:"vocoder",val:": diffusers.pipelines.ltx2.vocoder.LTX2Vocoder \| diffusers.pipelines.ltx2.vocoder.LTX2VocoderWithBWE"},{name:"processor",val:": transformers.models.gemma3.processing_gemma3.Gemma3Processor \| None = None"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py#L205"}}),ke=new v({props:{name:"__call__",anchor:"diffusers.LTX2ImageToVideoPipeline.__call__",parameters:[{name:"image",val:": PIL.Image.Image \| numpy.ndarray \| torch.Tensor \| list[PIL.Image.Image] \| list[numpy.ndarray] \| list[torch.Tensor] = None"},{name:"prompt",val:": str \| list[str] = None"},{name:"negative_prompt",val:": str \| list[str] \| None = None"},{name:"height",val:": int = 512"},{name:"width",val:": int = 768"},{name:"num_frames",val:": int = 121"},{name:"frame_rate",val:": float = 24.0"},{name:"num_inference_steps",val:": int = 40"},{name:"sigmas",val:": list[float] \| None = None"},{name:"timesteps",val:": list[int] \| None = None"},{name:"guidance_scale",val:": float = 4.0"},{name:"stg_scale",val:": float = 0.0"},{name:"modality_scale",val:": float = 1.0"},{name:"guidance_rescale",val:": float = 0.0"},{name:"audio_guidance_scale",val:": float \| None = None"},{name:"audio_stg_scale",val:": float \| None = None"},{name:"audio_modality_scale",val:": float \| None = None"},{name:"audio_guidance_rescale",val:": float \| None = None"},{name:"spatio_temporal_guidance_blocks",val:": list[int] \| None = None"},{name:"noise_scale",val:": float = 0.0"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"generator",val:": torch._C.Generator \| list[torch._C.Generator] \| None = None"},{name:"latents",val:": torch.Tensor \| None = None"},{name:"audio_latents",val:": torch.Tensor \| None = None"},{name:"prompt_embeds",val:": torch.Tensor \| None = None"},{name:"prompt_attention_mask",val:": torch.Tensor \| None = None"},{name:"negative_prompt_embeds",val:": torch.Tensor \| None = None"},{name:"negative_prompt_attention_mask",val:": torch.Tensor \| None = None"},{name:"decode_timestep",val:": float \| list[float] = 0.0"},{name:"decode_noise_scale",val:": float \| list[float] \| None = None"},{name:"use_cross_timestep",val:": bool = False"},{name:"system_prompt",val:": str \| None = None"},{name:"prompt_max_new_tokens",val:": int = 512"},{name:"prompt_enhancement_kwargs",val:": dict[str, typing.Any] \| None = None"},{name:"prompt_enhancement_seed",val:": int = 10"},{name:"output_type",val:": str = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"attention_kwargs",val:": dict[str, typing.Any] \| None = None"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": list = ['latents']"},{name:"max_sequence_length",val:": int = 1024"}],parametersDescription:[{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.image",description:`<strong>image</strong> (<code>PipelineImageInput</code>) —
	The input image to condition the generation on. Must be an image, a list of images or a <code>torch.Tensor</code>.`,name:"image"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) —
	The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>.
	instead.`,name:"prompt"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>512</code>) —
	The height in pixels of the generated image. This is set to 480 by default for the best results.`,name:"height"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>768</code>) —
	The width in pixels of the generated image. This is set to 848 by default for the best results.`,name:"width"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, <em>optional</em>, defaults to <code>121</code>) —
	The number of video frames to generate`,name:"num_frames"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.frame_rate",description:`<strong>frame_rate</strong> (<code>float</code>, <em>optional</em>, defaults to <code>24.0</code>) —
	The frames per second (FPS) of the generated video.`,name:"frame_rate"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 40) —
	The number of denoising steps. More denoising steps usually lead to a higher quality image at the
	expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.sigmas",description:`<strong>sigmas</strong> (<code>List[float]</code>, <em>optional</em>) —
	Custom sigmas to use for the denoising process with schedulers which support a <code>sigmas</code> argument in
	their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is passed
	will be used.`,name:"sigmas"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.timesteps",description:`<strong>timesteps</strong> (<code>List[int]</code>, <em>optional</em>) —
	Custom timesteps to use for the denoising process with schedulers which support a <code>timesteps</code> argument
	in their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is
	passed will be used. Must be in descending order.`,name:"timesteps"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>4.0</code>) —
	Guidance scale as defined in <a href="https://huggingface.co/papers/2207.12598" rel="nofollow">Classifier-Free Diffusion
	Guidance</a>. <code>guidance_scale</code> is defined as <code>w</code> of equation 2.
	of <a href="https://huggingface.co/papers/2205.11487" rel="nofollow">Imagen Paper</a>. Guidance scale is enabled by setting
	<code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to
	the text <code>prompt</code>, usually at the expense of lower image quality. Used for the video modality (there is
	a separate value <code>audio_guidance_scale</code> for the audio modality).`,name:"guidance_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.stg_scale",description:`<strong>stg_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) —
	Video guidance scale for Spatio-Temporal Guidance (STG), proposed in <a href="https://arxiv.org/abs/2411.18664" rel="nofollow">Spatiotemporal Skip Guidance for
	Enhanced Video Diffusion Sampling</a>. STG uses a CFG-like estimate
	where we move the sample away from a weak sample from a perturbed version of the denoising model.
	Enabling STG will result in an additional denoising model forward pass; the default value of <code>0.0</code>
	means that STG is disabled.`,name:"stg_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.modality_scale",description:`<strong>modality_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1.0</code>) —
	Video guidance scale for LTX-2.X modality isolation guidance, where we move the sample away from a
	weaker sample generated by the denoising model withy cross-modality (audio-to-video and video-to-audio)
	cross attention disabled using a CFG-like estimate. Enabling modality guidance will result in an
	additional denoising model forward pass; the default value of <code>1.0</code> means that modality guidance is
	disabled.`,name:"modality_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.guidance_rescale",description:`<strong>guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) —
	Guidance rescale factor proposed by <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are
	Flawed</a> <code>guidance_scale</code> is defined as <code>φ</code> in equation 16. of
	<a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are
	Flawed</a>. Guidance rescale factor should fix overexposure when
	using zero terminal SNR. Used for the video modality.`,name:"guidance_rescale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.audio_guidance_scale",description:`<strong>audio_guidance_scale</strong> (<code>float</code>, <em>optional</em> defaults to <code>None</code>) —
	Audio guidance scale for CFG with respect to the negative prompt. The CFG update rule is the same for
	video and audio, but they can use different values for the guidance scale. The LTX-2.X authors suggest
	that the <code>audio_guidance_scale</code> should be higher relative to the video <code>guidance_scale</code> (e.g. for
	LTX-2.3 they suggest 3.0 for video and 7.0 for audio). If <code>None</code>, defaults to the video value
	<code>guidance_scale</code>.`,name:"audio_guidance_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.audio_stg_scale",description:`<strong>audio_stg_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) —
	Audio guidance scale for STG. As with CFG, the STG update rule is otherwise the same for video and
	audio. For LTX-2.3, a value of 1.0 is suggested for both video and audio. If <code>None</code>, defaults to the
	video value <code>stg_scale</code>.`,name:"audio_stg_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.audio_modality_scale",description:`<strong>audio_modality_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) —
	Audio guidance scale for LTX-2.X modality isolation guidance. As with CFG, the modality guidance rule
	is otherwise the same for video and audio. For LTX-2.3, a value of 3.0 is suggested for both video and
	audio. If <code>None</code>, defaults to the video value <code>modality_scale</code>.`,name:"audio_modality_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.audio_guidance_rescale",description:`<strong>audio_guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) —
	A separate guidance rescale factor for the audio modality. If <code>None</code>, defaults to the video value
	<code>guidance_rescale</code>.`,name:"audio_guidance_rescale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.spatio_temporal_guidance_blocks",description:`<strong>spatio_temporal_guidance_blocks</strong> (<code>list[int]</code>, <em>optional</em>, defaults to <code>None</code>) —
	The zero-indexed transformer block indices at which to apply STG. Must be supplied if STG is used
	(<code>stg_scale</code> or <code>audio_stg_scale</code> is greater than <code>0</code>). A value of <code>[29]</code> is recommended for LTX-2.0
	and <code>[28]</code> is recommended for LTX-2.3.`,name:"spatio_temporal_guidance_blocks"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.noise_scale",description:`<strong>noise_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) —
	The interpolation factor between random noise and denoised latents at each timestep. Applying noise to
	the <code>latents</code> and <code>audio_latents</code> before continue denoising.`,name:"noise_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>list[torch.Generator]</code>, <em>optional</em>) —
	One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a>
	to make generation deterministic.`,name:"generator"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for video
	generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
	tensor will be generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.audio_latents",description:`<strong>audio_latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for audio
	generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
	tensor will be generated by sampling using the supplied random <code>generator</code>.`,name:"audio_latents"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
	provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.prompt_attention_mask",description:`<strong>prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated attention mask for text embeddings.`,name:"prompt_attention_mask"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) —
	Pre-generated negative text embeddings. For PixArt-Sigma this negative prompt should be "". If not
	provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.negative_prompt_attention_mask",description:`<strong>negative_prompt_attention_mask</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) —
	Pre-generated attention mask for negative text embeddings.`,name:"negative_prompt_attention_mask"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.decode_timestep",description:`<strong>decode_timestep</strong> (<code>float</code>, defaults to <code>0.0</code>) —
	The timestep at which generated video is decoded.`,name:"decode_timestep"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.decode_noise_scale",description:`<strong>decode_noise_scale</strong> (<code>float</code>, defaults to <code>None</code>) —
	The interpolation factor between random noise and denoised latents at the decode timestep.`,name:"decode_noise_scale"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.use_cross_timestep",description:`<strong>use_cross_timestep</strong> (<code>bool</code> <em>optional</em>, defaults to <code>False</code>) —
	Whether to use the cross modality (audio is the cross modality of video, and vice versa) sigma when
	calculating the cross attention modulation parameters. <code>True</code> is the newer (e.g. LTX-2.3) behavior;
	<code>False</code> is the legacy LTX-2.0 behavior.`,name:"use_cross_timestep"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.system_prompt",description:`<strong>system_prompt</strong> (<code>str</code>, <em>optional</em>, defaults to <code>None</code>) —
	Optional system prompt to use for prompt enhancement. The system prompt will be used by the current
	text encoder (by default, a <code>Gemma3ForConditionalGeneration</code> model) to generate an enhanced prompt from
	the original <code>prompt</code> to condition generation. If not supplied, prompt enhancement will not be
	performed.`,name:"system_prompt"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.prompt_max_new_tokens",description:`<strong>prompt_max_new_tokens</strong> (<code>int</code>, <em>optional</em>, defaults to <code>512</code>) —
	The maximum number of new tokens to generate when performing prompt enhancement.`,name:"prompt_max_new_tokens"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.prompt_enhancement_kwargs",description:`<strong>prompt_enhancement_kwargs</strong> (<code>dict[str, Any]</code>, <em>optional</em>, defaults to <code>None</code>) —
	Keyword arguments for <code>self.text_encoder.generate</code>. If not supplied, default arguments of
	<code>do_sample=True</code> and <code>temperature=0.7</code> will be used. See
	<a href="https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.generate" rel="nofollow">https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.generate</a>
	for more details.`,name:"prompt_enhancement_kwargs"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.prompt_enhancement_seed",description:`<strong>prompt_enhancement_seed</strong> (<code>int</code>, <em>optional</em>, default to <code>10</code>) —
	Random seed for any random operations during prompt enhancement.`,name:"prompt_enhancement_seed"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) —
	The output format of the generate image. Choose between
	<a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether or not to return a <code>~pipelines.ltx.LTX2PipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) —
	A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under
	<code>self.processor</code> in
	<a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) —
	A function that calls at the end of each denoising steps during the inference. The function is called
	with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a list of all tensors as specified by
	<code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) —
	The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list
	will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the
	<code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1024</code>) —
	Maximum sequence length to use with the <code>prompt</code>.`,name:"max_sequence_length"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py#L868",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>If <code>return_dict</code> is <code>True</code>, <code>~pipelines.ltx.LTX2PipelineOutput</code> is returned, otherwise a <code>tuple</code> is
	returned where the first element is a list with the generated images.</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>~pipelines.ltx.LTX2PipelineOutput</code> or <code>tuple</code></p>
	`}}),z=new bn({props:{anchor:"diffusers.LTX2ImageToVideoPipeline.__call__.example",$$slots:{default:[oa]},$$scope:{ctx:C}}}),xe=new v({props:{name:"encode_prompt",anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt",parameters:[{name:"prompt",val:": str \| list[str]"},{name:"negative_prompt",val:": str \| list[str] \| None = None"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"prompt_embeds",val:": torch.Tensor \| None = None"},{name:"negative_prompt_embeds",val:": torch.Tensor \| None = None"},{name:"prompt_attention_mask",val:": torch.Tensor \| None = None"},{name:"negative_prompt_attention_mask",val:": torch.Tensor \| None = None"},{name:"max_sequence_length",val:": int = 1024"},{name:"scale_factor",val:": int = 8"},{name:"device",val:": torch.device \| None = None"},{name:"dtype",val:": torch.dtype \| None = None"}],parametersDescription:[{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) —
	prompt to be encoded`,name:"prompt"},{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) —
	The prompt or prompts not to guide the image generation. If not defined, one has to pass
	<code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is
	less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether to use classifier free guidance or not.`,name:"do_classifier_free_guidance"},{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	Number of videos that should be generated per prompt. torch device to place the resulting embeddings on`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
	provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt
	weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input
	argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>, <em>optional</em>):
	torch device`,name:"device"},{anchor:"diffusers.LTX2ImageToVideoPipeline.encode_prompt.dtype",description:`<strong>dtype</strong> — (<code>torch.dtype</code>, <em>optional</em>):
	torch dtype`,name:"dtype"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py#L342"}}),Re=new v({props:{name:"enhance_prompt",anchor:"diffusers.LTX2ImageToVideoPipeline.enhance_prompt",parameters:[{name:"image",val:": PIL.Image.Image \| numpy.ndarray \| torch.Tensor \| list[PIL.Image.Image] \| list[numpy.ndarray] \| list[torch.Tensor]"},{name:"prompt",val:": str"},{name:"system_prompt",val:": str"},{name:"max_new_tokens",val:": int = 512"},{name:"seed",val:": int = 10"},{name:"generator",val:": torch._C.Generator \| None = None"},{name:"generation_kwargs",val:": dict[str, typing.Any] \| None = None"},{name:"device",val:": str \| torch.device \| None = None"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py#L428"}}),Ne=new W({props:{title:"LTX2ConditionPipeline",local:"diffusers.LTX2ConditionPipeline",headingTag:"h2"}}),Le=new v({props:{name:"class diffusers.LTX2ConditionPipeline",anchor:"diffusers.LTX2ConditionPipeline",parameters:[{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"vae",val:": AutoencoderKLLTX2Video"},{name:"audio_vae",val:": AutoencoderKLLTX2Audio"},{name:"text_encoder",val:": Gemma3ForConditionalGeneration"},{name:"tokenizer",val:": transformers.models.gemma.tokenization_gemma.GemmaTokenizer \| transformers.models.gemma.tokenization_gemma_fast.GemmaTokenizerFast"},{name:"connectors",val:": LTX2TextConnectors"},{name:"transformer",val:": LTX2VideoTransformer3DModel"},{name:"vocoder",val:": diffusers.pipelines.ltx2.vocoder.LTX2Vocoder \| diffusers.pipelines.ltx2.vocoder.LTX2VocoderWithBWE"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py#L235"}}),Ee=new v({props:{name:"__call__",anchor:"diffusers.LTX2ConditionPipeline.__call__",parameters:[{name:"conditions",val:": diffusers.pipelines.ltx2.pipeline_ltx2_condition.LTX2VideoCondition \| list[diffusers.pipelines.ltx2.pipeline_ltx2_condition.LTX2VideoCondition] \| None = None"},{name:"prompt",val:": str \| list[str] = None"},{name:"negative_prompt",val:": str \| list[str] \| None = None"},{name:"height",val:": int = 512"},{name:"width",val:": int = 768"},{name:"num_frames",val:": int = 121"},{name:"frame_rate",val:": float = 24.0"},{name:"num_inference_steps",val:": int = 40"},{name:"sigmas",val:": list[float] \| None = None"},{name:"timesteps",val:": list[float] \| None = None"},{name:"guidance_scale",val:": float = 4.0"},{name:"stg_scale",val:": float = 0.0"},{name:"modality_scale",val:": float = 1.0"},{name:"guidance_rescale",val:": float = 0.0"},{name:"audio_guidance_scale",val:": float \| None = None"},{name:"audio_stg_scale",val:": float \| None = None"},{name:"audio_modality_scale",val:": float \| None = None"},{name:"audio_guidance_rescale",val:": float \| None = None"},{name:"spatio_temporal_guidance_blocks",val:": list[int] \| None = None"},{name:"noise_scale",val:": float \| None = None"},{name:"num_videos_per_prompt",val:": int \| None = 1"},{name:"generator",val:": torch._C.Generator \| list[torch._C.Generator] \| None = None"},{name:"latents",val:": torch.Tensor \| None = None"},{name:"audio_latents",val:": torch.Tensor \| None = None"},{name:"prompt_embeds",val:": torch.Tensor \| None = None"},{name:"prompt_attention_mask",val:": torch.Tensor \| None = None"},{name:"negative_prompt_embeds",val:": torch.Tensor \| None = None"},{name:"negative_prompt_attention_mask",val:": torch.Tensor \| None = None"},{name:"decode_timestep",val:": float \| list[float] = 0.0"},{name:"decode_noise_scale",val:": float \| list[float] \| None = None"},{name:"use_cross_timestep",val:": bool = False"},{name:"output_type",val:": str = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"attention_kwargs",val:": dict[str, typing.Any] \| None = None"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": list = ['latents']"},{name:"max_sequence_length",val:": int = 1024"}],parametersDescription:[{anchor:"diffusers.LTX2ConditionPipeline.__call__.conditions",description:`<strong>conditions</strong> (<code>List[LTXVideoCondition], optional</code>) —
	The list of frame-conditioning items for the video generation.`,name:"conditions"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) —
	The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>.
	instead.`,name:"prompt"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>512</code>) —
	The height in pixels of the generated image. This is set to 480 by default for the best results.`,name:"height"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>768</code>) —
	The width in pixels of the generated image. This is set to 848 by default for the best results.`,name:"width"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, <em>optional</em>, defaults to <code>121</code>) —
	The number of video frames to generate`,name:"num_frames"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.frame_rate",description:`<strong>frame_rate</strong> (<code>float</code>, <em>optional</em>, defaults to <code>24.0</code>) —
	The frames per second (FPS) of the generated video.`,name:"frame_rate"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 40) —
	The number of denoising steps. More denoising steps usually lead to a higher quality image at the
	expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.sigmas",description:`<strong>sigmas</strong> (<code>List[float]</code>, <em>optional</em>) —
	Custom sigmas to use for the denoising process with schedulers which support a <code>sigmas</code> argument in
	their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is passed
	will be used.`,name:"sigmas"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.timesteps",description:`<strong>timesteps</strong> (<code>List[int]</code>, <em>optional</em>) —
	Custom timesteps to use for the denoising process with schedulers which support a <code>timesteps</code> argument
	in their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is
	passed will be used. Must be in descending order.`,name:"timesteps"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>4.0</code>) —
	Guidance scale as defined in <a href="https://huggingface.co/papers/2207.12598" rel="nofollow">Classifier-Free Diffusion
	Guidance</a>. <code>guidance_scale</code> is defined as <code>w</code> of equation 2.
	of <a href="https://huggingface.co/papers/2205.11487" rel="nofollow">Imagen Paper</a>. Guidance scale is enabled by setting
	<code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to
	the text <code>prompt</code>, usually at the expense of lower image quality. Used for the video modality (there is
	a separate value <code>audio_guidance_scale</code> for the audio modality).`,name:"guidance_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.stg_scale",description:`<strong>stg_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) —
	Video guidance scale for Spatio-Temporal Guidance (STG), proposed in <a href="https://arxiv.org/abs/2411.18664" rel="nofollow">Spatiotemporal Skip Guidance for
	Enhanced Video Diffusion Sampling</a>. STG uses a CFG-like estimate
	where we move the sample away from a weak sample from a perturbed version of the denoising model.
	Enabling STG will result in an additional denoising model forward pass; the default value of <code>0.0</code>
	means that STG is disabled.`,name:"stg_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.modality_scale",description:`<strong>modality_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1.0</code>) —
	Video guidance scale for LTX-2.X modality isolation guidance, where we move the sample away from a
	weaker sample generated by the denoising model withy cross-modality (audio-to-video and video-to-audio)
	cross attention disabled using a CFG-like estimate. Enabling modality guidance will result in an
	additional denoising model forward pass; the default value of <code>1.0</code> means that modality guidance is
	disabled.`,name:"modality_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.guidance_rescale",description:`<strong>guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) —
	Guidance rescale factor proposed by <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are
	Flawed</a> <code>guidance_scale</code> is defined as <code>φ</code> in equation 16. of
	<a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are
	Flawed</a>. Guidance rescale factor should fix overexposure when
	using zero terminal SNR. Used for the video modality.`,name:"guidance_rescale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.audio_guidance_scale",description:`<strong>audio_guidance_scale</strong> (<code>float</code>, <em>optional</em> defaults to <code>None</code>) —
	Audio guidance scale for CFG with respect to the negative prompt. The CFG update rule is the same for
	video and audio, but they can use different values for the guidance scale. The LTX-2.X authors suggest
	that the <code>audio_guidance_scale</code> should be higher relative to the video <code>guidance_scale</code> (e.g. for
	LTX-2.3 they suggest 3.0 for video and 7.0 for audio). If <code>None</code>, defaults to the video value
	<code>guidance_scale</code>.`,name:"audio_guidance_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.audio_stg_scale",description:`<strong>audio_stg_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) —
	Audio guidance scale for STG. As with CFG, the STG update rule is otherwise the same for video and
	audio. For LTX-2.3, a value of 1.0 is suggested for both video and audio. If <code>None</code>, defaults to the
	video value <code>stg_scale</code>.`,name:"audio_stg_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.audio_modality_scale",description:`<strong>audio_modality_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) —
	Audio guidance scale for LTX-2.X modality isolation guidance. As with CFG, the modality guidance rule
	is otherwise the same for video and audio. For LTX-2.3, a value of 3.0 is suggested for both video and
	audio. If <code>None</code>, defaults to the video value <code>modality_scale</code>.`,name:"audio_modality_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.audio_guidance_rescale",description:`<strong>audio_guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) —
	A separate guidance rescale factor for the audio modality. If <code>None</code>, defaults to the video value
	<code>guidance_rescale</code>.`,name:"audio_guidance_rescale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.spatio_temporal_guidance_blocks",description:`<strong>spatio_temporal_guidance_blocks</strong> (<code>list[int]</code>, <em>optional</em>, defaults to <code>None</code>) —
	The zero-indexed transformer block indices at which to apply STG. Must be supplied if STG is used
	(<code>stg_scale</code> or <code>audio_stg_scale</code> is greater than <code>0</code>). A value of <code>[29]</code> is recommended for LTX-2.0
	and <code>[28]</code> is recommended for LTX-2.3.`,name:"spatio_temporal_guidance_blocks"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.noise_scale",description:`<strong>noise_scale</strong> (<code>float</code>, <em>optional</em>, defaults to <code>None</code>) —
	The interpolation factor between random noise and denoised latents at each timestep. Applying noise to
	the <code>latents</code> and <code>audio_latents</code> before continue denoising. If not set, will be inferred from the
	sigma schedule.`,name:"noise_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) —
	One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a>
	to make generation deterministic.`,name:"generator"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for video
	generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
	tensor will be generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.audio_latents",description:`<strong>audio_latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for audio
	generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
	tensor will be generated by sampling using the supplied random <code>generator</code>.`,name:"audio_latents"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
	provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.prompt_attention_mask",description:`<strong>prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated attention mask for text embeddings.`,name:"prompt_attention_mask"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) —
	Pre-generated negative text embeddings. For PixArt-Sigma this negative prompt should be "". If not
	provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.negative_prompt_attention_mask",description:`<strong>negative_prompt_attention_mask</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) —
	Pre-generated attention mask for negative text embeddings.`,name:"negative_prompt_attention_mask"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.decode_timestep",description:`<strong>decode_timestep</strong> (<code>float</code>, defaults to <code>0.0</code>) —
	The timestep at which generated video is decoded.`,name:"decode_timestep"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.decode_noise_scale",description:`<strong>decode_noise_scale</strong> (<code>float</code>, defaults to <code>None</code>) —
	The interpolation factor between random noise and denoised latents at the decode timestep.`,name:"decode_noise_scale"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.use_cross_timestep",description:`<strong>use_cross_timestep</strong> (<code>bool</code> <em>optional</em>, defaults to <code>False</code>) —
	Whether to use the cross modality (audio is the cross modality of video, and vice versa) sigma when
	calculating the cross attention modulation parameters. <code>True</code> is the newer (e.g. LTX-2.3) behavior;
	<code>False</code> is the legacy LTX-2.0 behavior.`,name:"use_cross_timestep"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) —
	The output format of the generate image. Choose between
	<a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether or not to return a <code>~pipelines.ltx.LTX2PipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) —
	A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under
	<code>self.processor</code> in
	<a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) —
	A function that calls at the end of each denoising steps during the inference. The function is called
	with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a list of all tensors as specified by
	<code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) —
	The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list
	will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the
	<code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.LTX2ConditionPipeline.__call__.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1024</code>) —
	Maximum sequence length to use with the <code>prompt</code>.`,name:"max_sequence_length"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py#L997",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>If <code>return_dict</code> is <code>True</code>, <code>~pipelines.ltx.LTX2PipelineOutput</code> is returned, otherwise a <code>tuple</code> is
	returned where the first element is a list with the generated images.</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>~pipelines.ltx.LTX2PipelineOutput</code> or <code>tuple</code></p>
	`}}),$=new bn({props:{anchor:"diffusers.LTX2ConditionPipeline.__call__.example",$$slots:{default:[la]},$$scope:{ctx:C}}}),Fe=new v({props:{name:"apply_visual_conditioning",anchor:"diffusers.LTX2ConditionPipeline.apply_visual_conditioning",parameters:[{name:"latents",val:": Tensor"},{name:"conditioning_mask",val:": Tensor"},{name:"condition_latents",val:": list"},{name:"condition_strengths",val:": list"},{name:"condition_indices",val:": list"},{name:"latent_height",val:": int"},{name:"latent_width",val:": int"}],parametersDescription:[{anchor:"diffusers.LTX2ConditionPipeline.apply_visual_conditioning.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>) —
	Initial packed (patchified) latents of shape [batch_size, patch_seq_len, hidden_dim].`,name:"latents"},{anchor:"diffusers.LTX2ConditionPipeline.apply_visual_conditioning.conditioning_mask",description:`<strong>conditioning_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Initial packed (patchified) conditioning mask of shape [batch_size, patch_seq_len, 1] with values in
	[0, 1] where 0 means that the denoising model output will be fully used and 1 means that the condition
	will be fully used (with intermediate values specifying a blend of the denoised and latent values).`,name:"conditioning_mask"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py#L756",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>Returns a 3-tuple of tensors where:</p>
	<ol>
	<li>The first element is the packed video latents (with unchanged shape [batch_size, patch_seq_len,
	hidden_dim]) with the conditions applied</li>
	<li>The second element is the packed conditioning mask with conditioning strengths applied</li>
	<li>The third element holds the clean conditioning latents.</li>
	</ol>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>Tuple[torch.Tensor, torch.Tensor, torch.Tensor]</code></p>
	`}}),Qe=new v({props:{name:"encode_prompt",anchor:"diffusers.LTX2ConditionPipeline.encode_prompt",parameters:[{name:"prompt",val:": str \| list[str]"},{name:"negative_prompt",val:": str \| list[str] \| None = None"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"prompt_embeds",val:": torch.Tensor \| None = None"},{name:"negative_prompt_embeds",val:": torch.Tensor \| None = None"},{name:"prompt_attention_mask",val:": torch.Tensor \| None = None"},{name:"negative_prompt_attention_mask",val:": torch.Tensor \| None = None"},{name:"max_sequence_length",val:": int = 1024"},{name:"scale_factor",val:": int = 8"},{name:"device",val:": torch.device \| None = None"},{name:"dtype",val:": torch.dtype \| None = None"}],parametersDescription:[{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) —
	prompt to be encoded`,name:"prompt"},{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>list[str]</code>, <em>optional</em>) —
	The prompt or prompts not to guide the image generation. If not defined, one has to pass
	<code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is
	less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether to use classifier free guidance or not.`,name:"do_classifier_free_guidance"},{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	Number of videos that should be generated per prompt. torch device to place the resulting embeddings on`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
	provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt
	weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input
	argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>, <em>optional</em>):
	torch device`,name:"device"},{anchor:"diffusers.LTX2ConditionPipeline.encode_prompt.dtype",description:`<strong>dtype</strong> — (<code>torch.dtype</code>, <em>optional</em>):
	torch dtype`,name:"dtype"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py#L369"}}),Ye=new v({props:{name:"preprocess_conditions",anchor:"diffusers.LTX2ConditionPipeline.preprocess_conditions",parameters:[{name:"conditions",val:": diffusers.pipelines.ltx2.pipeline_ltx2_condition.LTX2VideoCondition \| list[diffusers.pipelines.ltx2.pipeline_ltx2_condition.LTX2VideoCondition] \| None = None"},{name:"height",val:": int = 512"},{name:"width",val:": int = 768"},{name:"num_frames",val:": int = 121"},{name:"device",val:": torch.device \| None = None"}],parametersDescription:[{anchor:"diffusers.LTX2ConditionPipeline.preprocess_conditions.conditions",description:`<strong>conditions</strong> (<code>LTX2VideoCondition</code> or <code>List[LTX2VideoCondition]</code>, <em>optional</em>, defaults to <code>None</code>) —
	A list of image/video condition instances.`,name:"conditions"},{anchor:"diffusers.LTX2ConditionPipeline.preprocess_conditions.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>512</code>) —
	The desired height in pixels.`,name:"height"},{anchor:"diffusers.LTX2ConditionPipeline.preprocess_conditions.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>768</code>) —
	The desired width in pixels.`,name:"width"},{anchor:"diffusers.LTX2ConditionPipeline.preprocess_conditions.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, <em>optional</em>, defaults to <code>121</code>) —
	The desired number of frames in the generated video.`,name:"num_frames"},{anchor:"diffusers.LTX2ConditionPipeline.preprocess_conditions.device",description:`<strong>device</strong> (<code>torch.device</code>, <em>optional</em>, defaults to <code>None</code>) —
	The device on which to put the preprocessed image/video tensors.`,name:"device"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py#L674",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>Returns a 3-tuple of lists of length <code>len(conditions)</code> as follows:</p>
	<ol>
	<li>The first list is a list of preprocessed video tensors of shape [batch_size=1, num_channels,
	num_frames, height, width].</li>
	<li>The second list is a list of conditioning strengths.</li>
	<li>The third list is a list of indices in latent space to insert the corresponding condition.</li>
	</ol>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>Tuple[List[torch.Tensor], List[float], List[int]]</code></p>
	`}}),Se=new v({props:{name:"trim_conditioning_sequence",anchor:"diffusers.LTX2ConditionPipeline.trim_conditioning_sequence",parameters:[{name:"start_frame",val:": int"},{name:"sequence_num_frames",val:": int"},{name:"target_num_frames",val:": int"}],parametersDescription:[{anchor:"diffusers.LTX2ConditionPipeline.trim_conditioning_sequence.start_frame",description:"<strong>start_frame</strong> (int) — The target frame number of the first frame in the sequence.",name:"start_frame"},{anchor:"diffusers.LTX2ConditionPipeline.trim_conditioning_sequence.sequence_num_frames",description:"<strong>sequence_num_frames</strong> (int) — The number of frames in the sequence.",name:"sequence_num_frames"},{anchor:"diffusers.LTX2ConditionPipeline.trim_conditioning_sequence.target_num_frames",description:"<strong>target_num_frames</strong> (int) — The target number of frames in the generated video.",name:"target_num_frames"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py#L657",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>updated sequence length</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>int</p>
	`}}),Ae=new W({props:{title:"LTX2LatentUpsamplePipeline",local:"diffusers.LTX2LatentUpsamplePipeline",headingTag:"h2"}}),ze=new v({props:{name:"class diffusers.LTX2LatentUpsamplePipeline",anchor:"diffusers.LTX2LatentUpsamplePipeline",parameters:[{name:"vae",val:": AutoencoderKLLTX2Video"},{name:"latent_upsampler",val:": LTX2LatentUpsamplerModel"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2_latent_upsample.py#L104"}}),He=new v({props:{name:"__call__",anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__",parameters:[{name:"video",val:": list[PIL.Image.Image \| numpy.ndarray \| torch.Tensor \| list[PIL.Image.Image] \| list[numpy.ndarray] \| list[torch.Tensor]] \| None = None"},{name:"height",val:": int = 512"},{name:"width",val:": int = 768"},{name:"num_frames",val:": int = 121"},{name:"spatial_patch_size",val:": int = 1"},{name:"temporal_patch_size",val:": int = 1"},{name:"latents",val:": torch.Tensor \| None = None"},{name:"latents_normalized",val:": bool = False"},{name:"decode_timestep",val:": float \| list[float] = 0.0"},{name:"decode_noise_scale",val:": float \| list[float] \| None = None"},{name:"adain_factor",val:": float = 0.0"},{name:"tone_map_compression_ratio",val:": float = 0.0"},{name:"generator",val:": torch._C.Generator \| list[torch._C.Generator] \| None = None"},{name:"output_type",val:": str \| None = 'pil'"},{name:"return_dict",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.video",description:`<strong>video</strong> (<code>list[PipelineImageInput]</code>, <em>optional</em>) —
	The video to be upsampled (such as a LTX 2.0 first stage output). If not supplied, <code>latents</code> should be
	supplied.`,name:"video"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>512</code>) —
	The height in pixels of the input video (not the generated video, which will have a larger resolution).`,name:"height"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>768</code>) —
	The width in pixels of the input video (not the generated video, which will have a larger resolution).`,name:"width"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, <em>optional</em>, defaults to <code>121</code>) —
	The number of frames in the input video.`,name:"num_frames"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.spatial_patch_size",description:`<strong>spatial_patch_size</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) —
	The spatial patch size of the video latents. Used when <code>latents</code> is supplied if unpacking is necessary.`,name:"spatial_patch_size"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.temporal_patch_size",description:`<strong>temporal_patch_size</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) —
	The temporal patch size of the video latents. Used when <code>latents</code> is supplied if unpacking is
	necessary.`,name:"temporal_patch_size"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) —
	Pre-generated video latents. This can be supplied in place of the <code>video</code> argument. Can either be a
	patch sequence of shape <code>(batch_size, seq_len, hidden_dim)</code> or a video latent of shape <code>(batch_size, latent_channels, latent_frames, latent_height, latent_width)</code>.`,name:"latents"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.latents_normalized",description:`<strong>latents_normalized</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	If <code>latents</code> are supplied, whether the <code>latents</code> are normalized using the VAE latent mean and std. If
	<code>True</code>, the <code>latents</code> will be denormalized before being supplied to the latent upsampler.`,name:"latents_normalized"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.decode_timestep",description:`<strong>decode_timestep</strong> (<code>float</code>, defaults to <code>0.0</code>) —
	The timestep at which generated video is decoded.`,name:"decode_timestep"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.decode_noise_scale",description:`<strong>decode_noise_scale</strong> (<code>float</code>, defaults to <code>None</code>) —
	The interpolation factor between random noise and denoised latents at the decode timestep.`,name:"decode_noise_scale"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.adain_factor",description:`<strong>adain_factor</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) —
	Adaptive Instance Normalization (AdaIN) blending factor between the upsampled and original latents.
	Should be in [-10.0, 10.0]; supplying 0.0 (the default) means that AdaIN is not performed.`,name:"adain_factor"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.tone_map_compression_ratio",description:`<strong>tone_map_compression_ratio</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) —
	The compression strength for tone mapping, which will reduce the dynamic range of the latent values.
	This is useful for regularizing high-variance latents or for conditioning outputs during generation.
	Should be in [0, 1], where 0.0 (the default) means tone mapping is not applied and 1.0 corresponds to
	the full compression effect.`,name:"tone_map_compression_ratio"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>list[torch.Generator]</code>, <em>optional</em>) —
	One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a>
	to make generation deterministic.`,name:"generator"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) —
	The output format of the generate image. Choose between
	<a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether or not to return a <code>~pipelines.ltx.LTXPipelineOutput</code> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2_latent_upsample.py#L264",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>If <code>return_dict</code> is <code>True</code>, <code>~pipelines.ltx.LTXPipelineOutput</code> is returned, otherwise a <code>tuple</code> is
	returned where the first element is the upsampled video.</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>~pipelines.ltx.LTXPipelineOutput</code> or <code>tuple</code></p>
	`}}),ee=new bn({props:{anchor:"diffusers.LTX2LatentUpsamplePipeline.__call__.example",$$slots:{default:[ia]},$$scope:{ctx:C}}}),Pe=new v({props:{name:"adain_filter_latent",anchor:"diffusers.LTX2LatentUpsamplePipeline.adain_filter_latent",parameters:[{name:"latents",val:": Tensor"},{name:"reference_latents",val:": Tensor"},{name:"factor",val:": float = 1.0"}],parametersDescription:[{anchor:"diffusers.LTX2LatentUpsamplePipeline.adain_filter_latent.latent",description:`<strong>latent</strong> (<code>torch.Tensor</code>) —
	Input latents to normalize`,name:"latent"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.adain_filter_latent.reference_latents",description:`<strong>reference_latents</strong> (<code>torch.Tensor</code>) —
	The reference latents providing style statistics.`,name:"reference_latents"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.adain_filter_latent.factor",description:`<strong>factor</strong> (<code>float</code>) —
	Blending factor between original and transformed latent. Range: -10.0 to 10.0, Default: 1.0`,name:"factor"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2_latent_upsample.py#L168",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>The transformed latent tensor</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>torch.Tensor</p>
	`}}),$e=new v({props:{name:"tone_map_latents",anchor:"diffusers.LTX2LatentUpsamplePipeline.tone_map_latents",parameters:[{name:"latents",val:": Tensor"},{name:"compression",val:": float"}],parametersDescription:[{anchor:"diffusers.LTX2LatentUpsamplePipeline.tone_map_latents.latents",description:`<strong>latents</strong> — torch.Tensor
	Input latent tensor with arbitrary shape. Expected to be roughly in [-1, 1] or [0, 1] range.`,name:"latents"},{anchor:"diffusers.LTX2LatentUpsamplePipeline.tone_map_latents.compression",description:`<strong>compression</strong> — float
	Compression strength in the range [0, 1].
	<ul>
	<li>0.0: No tone-mapping (identity transform)</li>
	<li>1.0: Full compression effect</li>
	</ul>`,name:"compression"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_ltx2_latent_upsample.py#L196",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>torch.Tensor
	The tone-mapped latent tensor of the same shape as input.</p>
	`}}),De=new W({props:{title:"LTX2PipelineOutput",local:"diffusers.pipelines.ltx2.pipeline_output.LTX2PipelineOutput",headingTag:"h2"}}),qe=new v({props:{name:"class diffusers.pipelines.ltx2.pipeline_output.LTX2PipelineOutput",anchor:"diffusers.pipelines.ltx2.pipeline_output.LTX2PipelineOutput",parameters:[{name:"frames",val:": Tensor"},{name:"audio",val:": Tensor"}],parametersDescription:[{anchor:"diffusers.pipelines.ltx2.pipeline_output.LTX2PipelineOutput.frames",description:`<strong>frames</strong> (<code>torch.Tensor</code>, <code>np.ndarray</code>, or list[list[PIL.Image.Image]]) —
	List of video outputs - It can be a nested list of length <code>batch_size,</code> with each sub-list containing
	denoised PIL image sequences of length <code>num_frames.</code> It can also be a NumPy array or Torch tensor of shape
	<code>(batch_size, num_frames, channels, height, width)</code>.`,name:"frames"},{anchor:"diffusers.pipelines.ltx2.pipeline_output.LTX2PipelineOutput.audio",description:`<strong>audio</strong> (<code>torch.Tensor</code>, <code>np.ndarray</code>) —
	TODO`,name:"audio"}],source:"https://github.com/huggingface/diffusers/blob/vr_13370/src/diffusers/pipelines/ltx2/pipeline_output.py#L9"}}),Ke=new sa({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/ltx2.md"}}),{c(){f=i("meta"),I=s(),y=i("p"),_=s(),p(T.$$.fragment),r=s(),w=i("div"),w.innerHTML=us,Xt=s(),se=i("p"),se.innerHTML=hs,Bt=s(),ae=i("p"),ae.innerHTML=Ms,Vt=s(),oe=i("p"),oe.innerHTML=gs,Wt=s(),p(le.$$.fragment),Ct=s(),ie=i("p"),ie.textContent=fs,kt=s(),de=i("ul"),de.innerHTML=_s,xt=s(),re=i("p"),re.textContent=Ts,Rt=s(),p(pe.$$.fragment),Nt=s(),p(ce.$$.fragment),Lt=s(),me=i("p"),me.textContent=ys,Et=s(),p(ue.$$.fragment),Ft=s(),p(he.$$.fragment),Qt=s(),Me=i("p"),Me.innerHTML=ws,Yt=s(),p(ge.$$.fragment),St=s(),fe=i("p"),fe.textContent=Js,At=s(),p(_e.$$.fragment),zt=s(),Te=i("p"),Te.textContent=bs,Ht=s(),p(ye.$$.fragment),Pt=s(),we=i("p"),we.textContent=Us,$t=s(),Je=i("ol"),Je.innerHTML=js,Dt=s(),be=i("p"),be.innerHTML=vs,qt=s(),p(Ue.$$.fragment),Kt=s(),p(je.$$.fragment),Ot=s(),ve=i("p"),ve.innerHTML=Zs,en=s(),p(Ze.$$.fragment),tn=s(),p(Ge.$$.fragment),nn=s(),G=i("div"),p(Ie.$$.fragment),jn=s(),tt=i("p"),tt.textContent=Gs,vn=s(),nt=i("p"),nt.innerHTML=Is,Zn=s(),k=i("div"),p(Xe.$$.fragment),Gn=s(),st=i("p"),st.textContent=Xs,In=s(),p(Y.$$.fragment),Xn=s(),S=i("div"),p(Be.$$.fragment),Bn=s(),at=i("p"),at.textContent=Bs,Vn=s(),A=i("div"),p(Ve.$$.fragment),Wn=s(),ot=i("p"),ot.innerHTML=Vs,sn=s(),p(We.$$.fragment),an=s(),j=i("div"),p(Ce.$$.fragment),Cn=s(),lt=i("p"),lt.textContent=Ws,kn=s(),it=i("p"),it.innerHTML=Cs,xn=s(),dt=i("p"),dt.textContent=ks,Rn=s(),x=i("div"),p(ke.$$.fragment),Nn=s(),rt=i("p"),rt.textContent=xs,Ln=s(),p(z.$$.fragment),En=s(),H=i("div"),p(xe.$$.fragment),Fn=s(),pt=i("p"),pt.textContent=Rs,Qn=s(),P=i("div"),p(Re.$$.fragment),Yn=s(),ct=i("p"),ct.innerHTML=Ns,on=s(),p(Ne.$$.fragment),ln=s(),J=i("div"),p(Le.$$.fragment),Sn=s(),mt=i("p"),mt.textContent=Ls,An=s(),ut=i("p"),ut.innerHTML=Es,zn=s(),ht=i("p"),ht.textContent=Fs,Hn=s(),R=i("div"),p(Ee.$$.fragment),Pn=s(),Mt=i("p"),Mt.textContent=Qs,$n=s(),p($.$$.fragment),Dn=s(),D=i("div"),p(Fe.$$.fragment),qn=s(),gt=i("p"),gt.textContent=Ys,Kn=s(),q=i("div"),p(Qe.$$.fragment),On=s(),ft=i("p"),ft.textContent=Ss,es=s(),K=i("div"),p(Ye.$$.fragment),ts=s(),_t=i("p"),_t.textContent=As,ns=s(),O=i("div"),p(Se.$$.fragment),ss=s(),Tt=i("p"),Tt.textContent=zs,dn=s(),p(Ae.$$.fragment),rn=s(),V=i("div"),p(ze.$$.fragment),as=s(),N=i("div"),p(He.$$.fragment),os=s(),yt=i("p"),yt.textContent=Hs,ls=s(),p(ee.$$.fragment),is=s(),te=i("div"),p(Pe.$$.fragment),ds=s(),wt=i("p"),wt.textContent=Ps,rs=s(),L=i("div"),p($e.$$.fragment),ps=s(),Jt=i("p"),Jt.textContent=$s,cs=s(),bt=i("p"),bt.innerHTML=Ds,pn=s(),p(De.$$.fragment),cn=s(),F=i("div"),p(qe.$$.fragment),ms=s(),Ut=i("p"),Ut.textContent=qs,mn=s(),p(Ke.$$.fragment),un=s(),It=i("p"),this.h()},l(e){const n=na("svelte-u9bgzb",document.head);f=d(n,"META",{name:!0,content:!0}),n.forEach(t),I=a(e),y=d(e,"P",{}),U(y).forEach(t),_=a(e),c(T.$$.fragment,e),r=a(e),w=d(e,"DIV",{class:!0,"data-svelte-h":!0}),g(w)!=="svelte-si9ct8"&&(w.innerHTML=us),Xt=a(e),se=d(e,"P",{"data-svelte-h":!0}),g(se)!=="svelte-3vo67t"&&(se.innerHTML=hs),Bt=a(e),ae=d(e,"P",{"data-svelte-h":!0}),g(ae)!=="svelte-1134kk7"&&(ae.innerHTML=Ms),Vt=a(e),oe=d(e,"P",{"data-svelte-h":!0}),g(oe)!=="svelte-qw2r3a"&&(oe.innerHTML=gs),Wt=a(e),c(le.$$.fragment,e),Ct=a(e),ie=d(e,"P",{"data-svelte-h":!0}),g(ie)!=="svelte-7k8pa1"&&(ie.textContent=fs),kt=a(e),de=d(e,"UL",{"data-svelte-h":!0}),g(de)!=="svelte-12js9fh"&&(de.innerHTML=_s),xt=a(e),re=d(e,"P",{"data-svelte-h":!0}),g(re)!=="svelte-v5oo8i"&&(re.textContent=Ts),Rt=a(e),c(pe.$$.fragment,e),Nt=a(e),c(ce.$$.fragment,e),Lt=a(e),me=d(e,"P",{"data-svelte-h":!0}),g(me)!=="svelte-71u96j"&&(me.textContent=ys),Et=a(e),c(ue.$$.fragment,e),Ft=a(e),c(he.$$.fragment,e),Qt=a(e),Me=d(e,"P",{"data-svelte-h":!0}),g(Me)!=="svelte-rabflj"&&(Me.innerHTML=ws),Yt=a(e),c(ge.$$.fragment,e),St=a(e),fe=d(e,"P",{"data-svelte-h":!0}),g(fe)!=="svelte-1nr79xi"&&(fe.textContent=Js),At=a(e),c(_e.$$.fragment,e),zt=a(e),Te=d(e,"P",{"data-svelte-h":!0}),g(Te)!=="svelte-q9sktm"&&(Te.textContent=bs),Ht=a(e),c(ye.$$.fragment,e),Pt=a(e),we=d(e,"P",{"data-svelte-h":!0}),g(we)!=="svelte-mni5wh"&&(we.textContent=Us),$t=a(e),Je=d(e,"OL",{"data-svelte-h":!0}),g(Je)!=="svelte-azzl8m"&&(Je.innerHTML=js),Dt=a(e),be=d(e,"P",{"data-svelte-h":!0}),g(be)!=="svelte-1k5mdn5"&&(be.innerHTML=vs),qt=a(e),c(Ue.$$.fragment,e),Kt=a(e),c(je.$$.fragment,e),Ot=a(e),ve=d(e,"P",{"data-svelte-h":!0}),g(ve)!=="svelte-iec67l"&&(ve.innerHTML=Zs),en=a(e),c(Ze.$$.fragment,e),tn=a(e),c(Ge.$$.fragment,e),nn=a(e),G=d(e,"DIV",{class:!0});var X=U(G);c(Ie.$$.fragment,X),jn=a(X),tt=d(X,"P",{"data-svelte-h":!0}),g(tt)!=="svelte-19ipoo4"&&(tt.textContent=Gs),vn=a(X),nt=d(X,"P",{"data-svelte-h":!0}),g(nt)!=="svelte-1sr6eg8"&&(nt.innerHTML=Is),Zn=a(X),k=d(X,"DIV",{class:!0});var Q=U(k);c(Xe.$$.fragment,Q),Gn=a(Q),st=d(Q,"P",{"data-svelte-h":!0}),g(st)!=="svelte-v78lg8"&&(st.textContent=Xs),In=a(Q),c(Y.$$.fragment,Q),Q.forEach(t),Xn=a(X),S=d(X,"DIV",{class:!0});var Oe=U(S);c(Be.$$.fragment,Oe),Bn=a(Oe),at=d(Oe,"P",{"data-svelte-h":!0}),g(at)!=="svelte-16q0ax1"&&(at.textContent=Bs),Oe.forEach(t),Vn=a(X),A=d(X,"DIV",{class:!0});var et=U(A);c(Ve.$$.fragment,et),Wn=a(et),ot=d(et,"P",{"data-svelte-h":!0}),g(ot)!=="svelte-ycu0kg"&&(ot.innerHTML=Vs),et.forEach(t),X.forEach(t),sn=a(e),c(We.$$.fragment,e),an=a(e),j=d(e,"DIV",{class:!0});var B=U(j);c(Ce.$$.fragment,B),Cn=a(B),lt=d(B,"P",{"data-svelte-h":!0}),g(lt)!=="svelte-10tczlw"&&(lt.textContent=Ws),kn=a(B),it=d(B,"P",{"data-svelte-h":!0}),g(it)!=="svelte-1sr6eg8"&&(it.innerHTML=Cs),xn=a(B),dt=d(B,"P",{"data-svelte-h":!0}),g(dt)!=="svelte-1yyqpus"&&(dt.textContent=ks),Rn=a(B),x=d(B,"DIV",{class:!0});var jt=U(x);c(ke.$$.fragment,jt),Nn=a(jt),rt=d(jt,"P",{"data-svelte-h":!0}),g(rt)!=="svelte-v78lg8"&&(rt.textContent=xs),Ln=a(jt),c(z.$$.fragment,jt),jt.forEach(t),En=a(B),H=d(B,"DIV",{class:!0});var Mn=U(H);c(xe.$$.fragment,Mn),Fn=a(Mn),pt=d(Mn,"P",{"data-svelte-h":!0}),g(pt)!=="svelte-16q0ax1"&&(pt.textContent=Rs),Mn.forEach(t),Qn=a(B),P=d(B,"DIV",{class:!0});var gn=U(P);c(Re.$$.fragment,gn),Yn=a(gn),ct=d(gn,"P",{"data-svelte-h":!0}),g(ct)!=="svelte-ycu0kg"&&(ct.innerHTML=Ns),gn.forEach(t),B.forEach(t),on=a(e),c(Ne.$$.fragment,e),ln=a(e),J=d(e,"DIV",{class:!0});var Z=U(J);c(Le.$$.fragment,Z),Sn=a(Z),mt=d(Z,"P",{"data-svelte-h":!0}),g(mt)!=="svelte-8bvrgw"&&(mt.textContent=Ls),An=a(Z),ut=d(Z,"P",{"data-svelte-h":!0}),g(ut)!=="svelte-1sr6eg8"&&(ut.innerHTML=Es),zn=a(Z),ht=d(Z,"P",{"data-svelte-h":!0}),g(ht)!=="svelte-1yyqpus"&&(ht.textContent=Fs),Hn=a(Z),R=d(Z,"DIV",{class:!0});var vt=U(R);c(Ee.$$.fragment,vt),Pn=a(vt),Mt=d(vt,"P",{"data-svelte-h":!0}),g(Mt)!=="svelte-v78lg8"&&(Mt.textContent=Qs),$n=a(vt),c($.$$.fragment,vt),vt.forEach(t),Dn=a(Z),D=d(Z,"DIV",{class:!0});var fn=U(D);c(Fe.$$.fragment,fn),qn=a(fn),gt=d(fn,"P",{"data-svelte-h":!0}),g(gt)!=="svelte-1l041al"&&(gt.textContent=Ys),fn.forEach(t),Kn=a(Z),q=d(Z,"DIV",{class:!0});var _n=U(q);c(Qe.$$.fragment,_n),On=a(_n),ft=d(_n,"P",{"data-svelte-h":!0}),g(ft)!=="svelte-16q0ax1"&&(ft.textContent=Ss),_n.forEach(t),es=a(Z),K=d(Z,"DIV",{class:!0});var Tn=U(K);c(Ye.$$.fragment,Tn),ts=a(Tn),_t=d(Tn,"P",{"data-svelte-h":!0}),g(_t)!=="svelte-176er7i"&&(_t.textContent=As),Tn.forEach(t),ns=a(Z),O=d(Z,"DIV",{class:!0});var yn=U(O);c(Se.$$.fragment,yn),ss=a(yn),Tt=d(yn,"P",{"data-svelte-h":!0}),g(Tt)!=="svelte-1eod455"&&(Tt.textContent=zs),yn.forEach(t),Z.forEach(t),dn=a(e),c(Ae.$$.fragment,e),rn=a(e),V=d(e,"DIV",{class:!0});var ne=U(V);c(ze.$$.fragment,ne),as=a(ne),N=d(ne,"DIV",{class:!0});var Zt=U(N);c(He.$$.fragment,Zt),os=a(Zt),yt=d(Zt,"P",{"data-svelte-h":!0}),g(yt)!=="svelte-v78lg8"&&(yt.textContent=Hs),ls=a(Zt),c(ee.$$.fragment,Zt),Zt.forEach(t),is=a(ne),te=d(ne,"DIV",{class:!0});var wn=U(te);c(Pe.$$.fragment,wn),ds=a(wn),wt=d(wn,"P",{"data-svelte-h":!0}),g(wt)!=="svelte-tr32vd"&&(wt.textContent=Ps),wn.forEach(t),rs=a(ne),L=d(ne,"DIV",{class:!0});var Gt=U(L);c($e.$$.fragment,Gt),ps=a(Gt),Jt=d(Gt,"P",{"data-svelte-h":!0}),g(Jt)!=="svelte-1p9wfz7"&&(Jt.textContent=$s),cs=a(Gt),bt=d(Gt,"P",{"data-svelte-h":!0}),g(bt)!=="svelte-7cxa61"&&(bt.innerHTML=Ds),Gt.forEach(t),ne.forEach(t),pn=a(e),c(De.$$.fragment,e),cn=a(e),F=d(e,"DIV",{class:!0});var Jn=U(F);c(qe.$$.fragment,Jn),ms=a(Jn),Ut=d(Jn,"P",{"data-svelte-h":!0}),g(Ut)!=="svelte-ia4jjd"&&(Ut.textContent=qs),Jn.forEach(t),mn=a(e),c(Ke.$$.fragment,e),un=a(e),It=d(e,"P",{}),U(It).forEach(t),this.h()},h(){b(f,"name","hf:doc:metadata"),b(f,"content",ra),b(w,"class","flex flex-wrap space-x-1"),b(k,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(A,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(x,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(H,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(K,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(O,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(J,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(te,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(V,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),b(F,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,n){o(document.head,f),l(e,I,n),l(e,y,n),l(e,_,n),m(T,e,n),l(e,r,n),l(e,w,n),l(e,Xt,n),l(e,se,n),l(e,Bt,n),l(e,ae,n),l(e,Vt,n),l(e,oe,n),l(e,Wt,n),m(le,e,n),l(e,Ct,n),l(e,ie,n),l(e,kt,n),l(e,de,n),l(e,xt,n),l(e,re,n),l(e,Rt,n),m(pe,e,n),l(e,Nt,n),m(ce,e,n),l(e,Lt,n),l(e,me,n),l(e,Et,n),m(ue,e,n),l(e,Ft,n),m(he,e,n),l(e,Qt,n),l(e,Me,n),l(e,Yt,n),m(ge,e,n),l(e,St,n),l(e,fe,n),l(e,At,n),m(_e,e,n),l(e,zt,n),l(e,Te,n),l(e,Ht,n),m(ye,e,n),l(e,Pt,n),l(e,we,n),l(e,$t,n),l(e,Je,n),l(e,Dt,n),l(e,be,n),l(e,qt,n),m(Ue,e,n),l(e,Kt,n),m(je,e,n),l(e,Ot,n),l(e,ve,n),l(e,en,n),m(Ze,e,n),l(e,tn,n),m(Ge,e,n),l(e,nn,n),l(e,G,n),m(Ie,G,null),o(G,jn),o(G,tt),o(G,vn),o(G,nt),o(G,Zn),o(G,k),m(Xe,k,null),o(k,Gn),o(k,st),o(k,In),m(Y,k,null),o(G,Xn),o(G,S),m(Be,S,null),o(S,Bn),o(S,at),o(G,Vn),o(G,A),m(Ve,A,null),o(A,Wn),o(A,ot),l(e,sn,n),m(We,e,n),l(e,an,n),l(e,j,n),m(Ce,j,null),o(j,Cn),o(j,lt),o(j,kn),o(j,it),o(j,xn),o(j,dt),o(j,Rn),o(j,x),m(ke,x,null),o(x,Nn),o(x,rt),o(x,Ln),m(z,x,null),o(j,En),o(j,H),m(xe,H,null),o(H,Fn),o(H,pt),o(j,Qn),o(j,P),m(Re,P,null),o(P,Yn),o(P,ct),l(e,on,n),m(Ne,e,n),l(e,ln,n),l(e,J,n),m(Le,J,null),o(J,Sn),o(J,mt),o(J,An),o(J,ut),o(J,zn),o(J,ht),o(J,Hn),o(J,R),m(Ee,R,null),o(R,Pn),o(R,Mt),o(R,$n),m($,R,null),o(J,Dn),o(J,D),m(Fe,D,null),o(D,qn),o(D,gt),o(J,Kn),o(J,q),m(Qe,q,null),o(q,On),o(q,ft),o(J,es),o(J,K),m(Ye,K,null),o(K,ts),o(K,_t),o(J,ns),o(J,O),m(Se,O,null),o(O,ss),o(O,Tt),l(e,dn,n),m(Ae,e,n),l(e,rn,n),l(e,V,n),m(ze,V,null),o(V,as),o(V,N),m(He,N,null),o(N,os),o(N,yt),o(N,ls),m(ee,N,null),o(V,is),o(V,te),m(Pe,te,null),o(te,ds),o(te,wt),o(V,rs),o(V,L),m($e,L,null),o(L,ps),o(L,Jt),o(L,cs),o(L,bt),l(e,pn,n),m(De,e,n),l(e,cn,n),l(e,F,n),m(qe,F,null),o(F,ms),o(F,Ut),l(e,mn,n),m(Ke,e,n),l(e,un,n),l(e,It,n),hn=!0},p(e,[n]){const X={};n&2&&(X.$$scope={dirty:n,ctx:e}),Y.$set(X);const Q={};n&2&&(Q.$$scope={dirty:n,ctx:e}),z.$set(Q);const Oe={};n&2&&(Oe.$$scope={dirty:n,ctx:e}),$.$set(Oe);const et={};n&2&&(et.$$scope={dirty:n,ctx:e}),ee.$set(et)},i(e){hn\|\|(u(T.$$.fragment,e),u(le.$$.fragment,e),u(pe.$$.fragment,e),u(ce.$$.fragment,e),u(ue.$$.fragment,e),u(he.$$.fragment,e),u(ge.$$.fragment,e),u(_e.$$.fragment,e),u(ye.$$.fragment,e),u(Ue.$$.fragment,e),u(je.$$.fragment,e),u(Ze.$$.fragment,e),u(Ge.$$.fragment,e),u(Ie.$$.fragment,e),u(Xe.$$.fragment,e),u(Y.$$.fragment,e),u(Be.$$.fragment,e),u(Ve.$$.fragment,e),u(We.$$.fragment,e),u(Ce.$$.fragment,e),u(ke.$$.fragment,e),u(z.$$.fragment,e),u(xe.$$.fragment,e),u(Re.$$.fragment,e),u(Ne.$$.fragment,e),u(Le.$$.fragment,e),u(Ee.$$.fragment,e),u($.$$.fragment,e),u(Fe.$$.fragment,e),u(Qe.$$.fragment,e),u(Ye.$$.fragment,e),u(Se.$$.fragment,e),u(Ae.$$.fragment,e),u(ze.$$.fragment,e),u(He.$$.fragment,e),u(ee.$$.fragment,e),u(Pe.$$.fragment,e),u($e.$$.fragment,e),u(De.$$.fragment,e),u(qe.$$.fragment,e),u(Ke.$$.fragment,e),hn=!0)},o(e){h(T.$$.fragment,e),h(le.$$.fragment,e),h(pe.$$.fragment,e),h(ce.$$.fragment,e),h(ue.$$.fragment,e),h(he.$$.fragment,e),h(ge.$$.fragment,e),h(_e.$$.fragment,e),h(ye.$$.fragment,e),h(Ue.$$.fragment,e),h(je.$$.fragment,e),h(Ze.$$.fragment,e),h(Ge.$$.fragment,e),h(Ie.$$.fragment,e),h(Xe.$$.fragment,e),h(Y.$$.fragment,e),h(Be.$$.fragment,e),h(Ve.$$.fragment,e),h(We.$$.fragment,e),h(Ce.$$.fragment,e),h(ke.$$.fragment,e),h(z.$$.fragment,e),h(xe.$$.fragment,e),h(Re.$$.fragment,e),h(Ne.$$.fragment,e),h(Le.$$.fragment,e),h(Ee.$$.fragment,e),h($.$$.fragment,e),h(Fe.$$.fragment,e),h(Qe.$$.fragment,e),h(Ye.$$.fragment,e),h(Se.$$.fragment,e),h(Ae.$$.fragment,e),h(ze.$$.fragment,e),h(He.$$.fragment,e),h(ee.$$.fragment,e),h(Pe.$$.fragment,e),h($e.$$.fragment,e),h(De.$$.fragment,e),h(qe.$$.fragment,e),h(Ke.$$.fragment,e),hn=!1},d(e){e&&(t(I),t(y),t(_),t(r),t(w),t(Xt),t(se),t(Bt),t(ae),t(Vt),t(oe),t(Wt),t(Ct),t(ie),t(kt),t(de),t(xt),t(re),t(Rt),t(Nt),t(Lt),t(me),t(Et),t(Ft),t(Qt),t(Me),t(Yt),t(St),t(fe),t(At),t(zt),t(Te),t(Ht),t(Pt),t(we),t($t),t(Je),t(Dt),t(be),t(qt),t(Kt),t(Ot),t(ve),t(en),t(tn),t(nn),t(G),t(sn),t(an),t(j),t(on),t(ln),t(J),t(dn),t(rn),t(V),t(pn),t(cn),t(F),t(mn),t(un),t(It)),t(f),M(T,e),M(le,e),M(pe,e),M(ce,e),M(ue,e),M(he,e),M(ge,e),M(_e,e),M(ye,e),M(Ue,e),M(je,e),M(Ze,e),M(Ge,e),M(Ie),M(Xe),M(Y),M(Be),M(Ve),M(We,e),M(Ce),M(ke),M(z),M(xe),M(Re),M(Ne,e),M(Le),M(Ee),M($),M(Fe),M(Qe),M(Ye),M(Se),M(Ae,e),M(ze),M(He),M(ee),M(Pe),M($e),M(De,e),M(qe),M(Ke,e)}}}const ra='{"title":"LTX-2","local":"ltx-2","sections":[{"title":"Two-stages Generation","local":"two-stages-generation","sections":[],"depth":2},{"title":"Distilled checkpoint generation","local":"distilled-checkpoint-generation","sections":[],"depth":2},{"title":"Condition Pipeline Generation","local":"condition-pipeline-generation","sections":[],"depth":2},{"title":"Multimodal Guidance","local":"multimodal-guidance","sections":[],"depth":2},{"title":"Prompt Enhancement","local":"prompt-enhancement","sections":[],"depth":2},{"title":"LTX2Pipeline","local":"diffusers.LTX2Pipeline","sections":[],"depth":2},{"title":"LTX2ImageToVideoPipeline","local":"diffusers.LTX2ImageToVideoPipeline","sections":[],"depth":2},{"title":"LTX2ConditionPipeline","local":"diffusers.LTX2ConditionPipeline","sections":[],"depth":2},{"title":"LTX2LatentUpsamplePipeline","local":"diffusers.LTX2LatentUpsamplePipeline","sections":[],"depth":2},{"title":"LTX2PipelineOutput","local":"diffusers.pipelines.ltx2.pipeline_output.LTX2PipelineOutput","sections":[],"depth":2}],"depth":1}';function pa(C){return Os(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class fa extends ea{constructor(f){super(),ta(this,f,pa,da,Ks,{})}}export{fa as component};

Xet Storage Details

Size:: 197 kB
Xet hash:: 34d96f6767f72fd2dd4a5796bdc87ca23d2fb383f17b6243000d56ccb00b7b89

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.