Buckets:
| import{s as ct,o as mt,n as De}from"../chunks/scheduler.8c3d61f6.js";import{S as gt,i as ft,g as p,s as o,r as g,A as ut,h as d,f as n,c as s,j as Z,u as f,x as y,k as I,y as l,a as r,v as u,d as h,t as _,w as T}from"../chunks/index.da70eac4.js";import{T as ht}from"../chunks/Tip.1d9b8c37.js";import{D as oe}from"../chunks/Docstring.6b390b9a.js";import{C as $e}from"../chunks/CodeBlock.00a903b3.js";import{E as dt}from"../chunks/ExampleCodeBlock.db12be95.js";import{H as ue,E as _t}from"../chunks/EditOnGithub.1e64e623.js";function Tt(V){let a,x='Make sure to check out the Schedulers <a href="../../using-diffusers/schedulers.md">guide</a> to learn how to explore the tradeoff between scheduler speed and quality, and see the <a href="../../using-diffusers/loading.md#reuse-a-pipeline">reuse components across pipelines</a> section to learn how to efficiently load the same components into multiple pipelines.';return{c(){a=p("p"),a.innerHTML=x},l(m){a=d(m,"P",{"data-svelte-h":!0}),y(a)!=="svelte-w7r39y"&&(a.innerHTML=x)},m(m,c){r(m,a,c)},p:De,d(m){m&&n(a)}}}function bt(V){let a,x="Examples:",m,c,b;return c=new $e({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTBBJTBBcGlwZSUyMCUzRCUyMExUWFBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJMaWdodHJpY2tzJTJGTFRYLVZpZGVvJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMEFwaXBlLnRvKCUyMmN1ZGElMjIpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyQSUyMHdvbWFuJTIwd2l0aCUyMGxvbmclMjBicm93biUyMGhhaXIlMjBhbmQlMjBsaWdodCUyMHNraW4lMjBzbWlsZXMlMjBhdCUyMGFub3RoZXIlMjB3b21hbiUyMHdpdGglMjBsb25nJTIwYmxvbmRlJTIwaGFpci4lMjBUaGUlMjB3b21hbiUyMHdpdGglMjBicm93biUyMGhhaXIlMjB3ZWFycyUyMGElMjBibGFjayUyMGphY2tldCUyMGFuZCUyMGhhcyUyMGElMjBzbWFsbCUyQyUyMGJhcmVseSUyMG5vdGljZWFibGUlMjBtb2xlJTIwb24lMjBoZXIlMjByaWdodCUyMGNoZWVrLiUyMFRoZSUyMGNhbWVyYSUyMGFuZ2xlJTIwaXMlMjBhJTIwY2xvc2UtdXAlMkMlMjBmb2N1c2VkJTIwb24lMjB0aGUlMjB3b21hbiUyMHdpdGglMjBicm93biUyMGhhaXIncyUyMGZhY2UuJTIwVGhlJTIwbGlnaHRpbmclMjBpcyUyMHdhcm0lMjBhbmQlMjBuYXR1cmFsJTJDJTIwbGlrZWx5JTIwZnJvbSUyMHRoZSUyMHNldHRpbmclMjBzdW4lMkMlMjBjYXN0aW5nJTIwYSUyMHNvZnQlMjBnbG93JTIwb24lMjB0aGUlMjBzY2VuZS4lMjBUaGUlMjBzY2VuZSUyMGFwcGVhcnMlMjB0byUyMGJlJTIwcmVhbC1saWZlJTIwZm9vdGFnZSUyMiUwQW5lZ2F0aXZlX3Byb21wdCUyMCUzRCUyMCUyMndvcnN0JTIwcXVhbGl0eSUyQyUyMGluY29uc2lzdGVudCUyMG1vdGlvbiUyQyUyMGJsdXJyeSUyQyUyMGppdHRlcnklMkMlMjBkaXN0b3J0ZWQlMjIlMEElMEF2aWRlbyUyMCUzRCUyMHBpcGUoJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbmVnYXRpdmVfcHJvbXB0JTNEbmVnYXRpdmVfcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwd2lkdGglM0Q3MDQlMkMlMEElMjAlMjAlMjAlMjBoZWlnaHQlM0Q0ODAlMkMlMEElMjAlMjAlMjAlMjBudW1fZnJhbWVzJTNEMTYxJTJDJTBBJTIwJTIwJTIwJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDUwJTJDJTBBKS5mcmFtZXMlNUIwJTVEJTBBZXhwb3J0X3RvX3ZpZGVvKHZpZGVvJTJDJTIwJTIyb3V0cHV0Lm1wNCUyMiUyQyUyMGZwcyUzRDI0KQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTXPipeline | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| <span class="hljs-meta">>>> </span>pipe = LTXPipeline.from_pretrained(<span class="hljs-string">"Lightricks/LTX-Video"</span>, torch_dtype=torch.bfloat16) | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage"</span> | |
| <span class="hljs-meta">>>> </span>negative_prompt = <span class="hljs-string">"worst quality, inconsistent motion, blurry, jittery, distorted"</span> | |
| <span class="hljs-meta">>>> </span>video = pipe( | |
| <span class="hljs-meta">... </span> prompt=prompt, | |
| <span class="hljs-meta">... </span> negative_prompt=negative_prompt, | |
| <span class="hljs-meta">... </span> width=<span class="hljs-number">704</span>, | |
| <span class="hljs-meta">... </span> height=<span class="hljs-number">480</span>, | |
| <span class="hljs-meta">... </span> num_frames=<span class="hljs-number">161</span>, | |
| <span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">50</span>, | |
| <span class="hljs-meta">... </span>).frames[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>export_to_video(video, <span class="hljs-string">"output.mp4"</span>, fps=<span class="hljs-number">24</span>)`,wrap:!1}}),{c(){a=p("p"),a.textContent=x,m=o(),g(c.$$.fragment)},l(i){a=d(i,"P",{"data-svelte-h":!0}),y(a)!=="svelte-kvfsh7"&&(a.textContent=x),m=s(i),f(c.$$.fragment,i)},m(i,M){r(i,a,M),r(i,m,M),u(c,i,M),b=!0},p:De,i(i){b||(h(c.$$.fragment,i),b=!0)},o(i){_(c.$$.fragment,i),b=!1},d(i){i&&(n(a),n(m)),T(c,i)}}}function yt(V){let a,x="Examples:",m,c,b;return c=new $e({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYSW1hZ2VUb1ZpZGVvUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTJDJTIwbG9hZF9pbWFnZSUwQSUwQXBpcGUlMjAlM0QlMjBMVFhJbWFnZVRvVmlkZW9QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTIyTGlnaHRyaWNrcyUyRkxUWC1WaWRlbyUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYpJTBBcGlwZS50byglMjJjdWRhJTIyKSUwQSUwQWltYWdlJTIwJTNEJTIwbG9hZF9pbWFnZSglMEElMjAlMjAlMjAlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZhLXItci1vLXclMkZ0aW55LW1lbWUtZGF0YXNldC1jYXB0aW9uZWQlMkZyZXNvbHZlJTJGbWFpbiUyRmltYWdlcyUyRjgucG5nJTIyJTBBKSUwQXByb21wdCUyMCUzRCUyMCUyMkElMjB5b3VuZyUyMGdpcmwlMjBzdGFuZHMlMjBjYWxtbHklMjBpbiUyMHRoZSUyMGZvcmVncm91bmQlMkMlMjBsb29raW5nJTIwZGlyZWN0bHklMjBhdCUyMHRoZSUyMGNhbWVyYSUyQyUyMGFzJTIwYSUyMGhvdXNlJTIwZmlyZSUyMHJhZ2VzJTIwaW4lMjB0aGUlMjBiYWNrZ3JvdW5kLiUyMEZsYW1lcyUyMGVuZ3VsZiUyMHRoZSUyMHN0cnVjdHVyZSUyQyUyMHdpdGglMjBzbW9rZSUyMGJpbGxvd2luZyUyMGludG8lMjB0aGUlMjBhaXIuJTIwRmlyZWZpZ2h0ZXJzJTIwaW4lMjBwcm90ZWN0aXZlJTIwZ2VhciUyMHJ1c2glMjB0byUyMHRoZSUyMHNjZW5lJTJDJTIwYSUyMGZpcmUlMjB0cnVjayUyMGxhYmVsZWQlMjAnMzgnJTIwdmlzaWJsZSUyMGJlaGluZCUyMHRoZW0uJTIwVGhlJTIwZ2lybCdzJTIwbmV1dHJhbCUyMGV4cHJlc3Npb24lMjBjb250cmFzdHMlMjBzaGFycGx5JTIwd2l0aCUyMHRoZSUyMGNoYW9zJTIwb2YlMjB0aGUlMjBmaXJlJTJDJTIwY3JlYXRpbmclMjBhJTIwcG9pZ25hbnQlMjBhbmQlMjBlbW90aW9uYWxseSUyMGNoYXJnZWQlMjBzY2VuZS4lMjIlMEFuZWdhdGl2ZV9wcm9tcHQlMjAlM0QlMjAlMjJ3b3JzdCUyMHF1YWxpdHklMkMlMjBpbmNvbnNpc3RlbnQlMjBtb3Rpb24lMkMlMjBibHVycnklMkMlMjBqaXR0ZXJ5JTJDJTIwZGlzdG9ydGVkJTIyJTBBJTBBdmlkZW8lMjAlM0QlMjBwaXBlKCUwQSUyMCUyMCUyMCUyMGltYWdlJTNEaW1hZ2UlMkMlMEElMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMEElMjAlMjAlMjAlMjBuZWdhdGl2ZV9wcm9tcHQlM0RuZWdhdGl2ZV9wcm9tcHQlMkMlMEElMjAlMjAlMjAlMjB3aWR0aCUzRDcwNCUyQyUwQSUyMCUyMCUyMCUyMGhlaWdodCUzRDQ4MCUyQyUwQSUyMCUyMCUyMCUyMG51bV9mcmFtZXMlM0QxNjElMkMlMEElMjAlMjAlMjAlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNENTAlMkMlMEEpLmZyYW1lcyU1QjAlNUQlMEFleHBvcnRfdG9fdmlkZW8odmlkZW8lMkMlMjAlMjJvdXRwdXQubXA0JTIyJTJDJTIwZnBzJTNEMjQp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTXImageToVideoPipeline | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video, load_image | |
| <span class="hljs-meta">>>> </span>pipe = LTXImageToVideoPipeline.from_pretrained(<span class="hljs-string">"Lightricks/LTX-Video"</span>, torch_dtype=torch.bfloat16) | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-meta">>>> </span>image = load_image( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"https://huggingface.co/datasets/a-r-r-o-w/tiny-meme-dataset-captioned/resolve/main/images/8.png"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"A young girl stands calmly in the foreground, looking directly at the camera, as a house fire rages in the background. Flames engulf the structure, with smoke billowing into the air. Firefighters in protective gear rush to the scene, a fire truck labeled '38' visible behind them. The girl's neutral expression contrasts sharply with the chaos of the fire, creating a poignant and emotionally charged scene."</span> | |
| <span class="hljs-meta">>>> </span>negative_prompt = <span class="hljs-string">"worst quality, inconsistent motion, blurry, jittery, distorted"</span> | |
| <span class="hljs-meta">>>> </span>video = pipe( | |
| <span class="hljs-meta">... </span> image=image, | |
| <span class="hljs-meta">... </span> prompt=prompt, | |
| <span class="hljs-meta">... </span> negative_prompt=negative_prompt, | |
| <span class="hljs-meta">... </span> width=<span class="hljs-number">704</span>, | |
| <span class="hljs-meta">... </span> height=<span class="hljs-number">480</span>, | |
| <span class="hljs-meta">... </span> num_frames=<span class="hljs-number">161</span>, | |
| <span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">50</span>, | |
| <span class="hljs-meta">... </span>).frames[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>export_to_video(video, <span class="hljs-string">"output.mp4"</span>, fps=<span class="hljs-number">24</span>)`,wrap:!1}}),{c(){a=p("p"),a.textContent=x,m=o(),g(c.$$.fragment)},l(i){a=d(i,"P",{"data-svelte-h":!0}),y(a)!=="svelte-kvfsh7"&&(a.textContent=x),m=s(i),f(c.$$.fragment,i)},m(i,M){r(i,a,M),r(i,m,M),u(c,i,M),b=!0},p:De,i(i){b||(h(c.$$.fragment,i),b=!0)},o(i){_(c.$$.fragment,i),b=!1},d(i){i&&(n(a),n(m)),T(c,i)}}}function Mt(V){let a,x,m,c,b,i,M,Ae='<a href="https://huggingface.co/Lightricks/LTX-Video" rel="nofollow">LTX Video</a> is the first DiT-based video generation model capable of generating high-quality videos in real-time. It produces 24 FPS videos at a 768x512 resolution faster than they can be watched. Trained on a large-scale dataset of diverse videos, the model generates high-resolution videos with realistic and varied content. We provide a model for both text-to-video as well as image + text-to-video usecases.',he,$,_e,C,Te,N,Oe="Loading the original LTX Video checkpoints is also possible with <code>~ModelMixin.from_single_file</code>.",be,R,ye,z,Ke="Alternatively, the pipeline can be used to load the weights with <code>~FromSingleFileMixin.from_single_file</code>.",Me,H,ve,F,et='Refer to <a href="https://huggingface.co/docs/diffusers/main/en/api/pipelines/cogvideox#memory-optimization" rel="nofollow">this section</a> to learn more about optimizing memory consumption.',we,E,xe,v,Y,Ge,se,tt="Pipeline for text-to-video generation.",Pe,ie,nt='Reference: <a href="https://github.com/Lightricks/LTX-Video" rel="nofollow">https://github.com/Lightricks/LTX-Video</a>',Be,L,Q,We,ae,ot="Function invoked when calling the pipeline for generation.",Ce,G,Ne,P,S,Re,re,st="Encodes the prompt into text encoder hidden states.",Je,q,Le,w,D,ze,le,it="Pipeline for image-to-video generation.",He,pe,at='Reference: <a href="https://github.com/Lightricks/LTX-Video" rel="nofollow">https://github.com/Lightricks/LTX-Video</a>',Fe,X,A,Ee,de,rt="Function invoked when calling the pipeline for generation.",Ye,B,Qe,W,O,Se,ce,lt="Encodes the prompt into text encoder hidden states.",Xe,K,ke,j,ee,qe,me,pt="Output class for LTX pipelines.",je,te,Ue,fe,Ze;return b=new ue({props:{title:"LTX",local:"ltx",headingTag:"h1"}}),$=new ht({props:{$$slots:{default:[Tt]},$$scope:{ctx:V}}}),C=new ue({props:{title:"Loading Single Files",local:"loading-single-files",headingTag:"h2"}}),R=new $e({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwQXV0b2VuY29kZXJLTExUWFZpZGVvJTJDJTIwTFRYSW1hZ2VUb1ZpZGVvUGlwZWxpbmUlMkMlMjBMVFhWaWRlb1RyYW5zZm9ybWVyM0RNb2RlbCUwQSUwQXNpbmdsZV9maWxlX3VybCUyMCUzRCUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZMaWdodHJpY2tzJTJGTFRYLVZpZGVvJTJGbHR4LXZpZGVvLTJiLXYwLjkuc2FmZXRlbnNvcnMlMjIlMEF0cmFuc2Zvcm1lciUyMCUzRCUyMExUWFZpZGVvVHJhbnNmb3JtZXIzRE1vZGVsLmZyb21fc2luZ2xlX2ZpbGUoJTBBJTIwJTIwc2luZ2xlX2ZpbGVfdXJsJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUwQSklMEF2YWUlMjAlM0QlMjBBdXRvZW5jb2RlcktMTFRYVmlkZW8uZnJvbV9zaW5nbGVfZmlsZShzaW5nbGVfZmlsZV91cmwlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQXBpcGUlMjAlM0QlMjBMVFhJbWFnZVRvVmlkZW9QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIyTGlnaHRyaWNrcyUyRkxUWC1WaWRlbyUyMiUyQyUyMHRyYW5zZm9ybWVyJTNEdHJhbnNmb3JtZXIlMkMlMjB2YWUlM0R2YWUlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQSUwQSUyMyUyMC4uLiUyMGluZmVyZW5jZSUyMGNvZGUlMjAuLi4=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoencoderKLLTXVideo, LTXImageToVideoPipeline, LTXVideoTransformer3DModel | |
| single_file_url = <span class="hljs-string">"https://huggingface.co/Lightricks/LTX-Video/ltx-video-2b-v0.9.safetensors"</span> | |
| transformer = LTXVideoTransformer3DModel.from_single_file( | |
| single_file_url, torch_dtype=torch.bfloat16 | |
| ) | |
| vae = AutoencoderKLLTXVideo.from_single_file(single_file_url, torch_dtype=torch.bfloat16) | |
| pipe = LTXImageToVideoPipeline.from_pretrained( | |
| <span class="hljs-string">"Lightricks/LTX-Video"</span>, transformer=transformer, vae=vae, torch_dtype=torch.bfloat16 | |
| ) | |
| <span class="hljs-comment"># ... inference code ...</span>`,wrap:!1}}),H=new $e({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwTFRYSW1hZ2VUb1ZpZGVvUGlwZWxpbmUlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwVDVFbmNvZGVyTW9kZWwlMkMlMjBUNVRva2VuaXplciUwQSUwQXNpbmdsZV9maWxlX3VybCUyMCUzRCUyMCUyMmh0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZMaWdodHJpY2tzJTJGTFRYLVZpZGVvJTJGbHR4LXZpZGVvLTJiLXYwLjkuc2FmZXRlbnNvcnMlMjIlMEF0ZXh0X2VuY29kZXIlMjAlM0QlMjBUNUVuY29kZXJNb2RlbC5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIyTGlnaHRyaWNrcyUyRkxUWC1WaWRlbyUyMiUyQyUyMHN1YmZvbGRlciUzRCUyMnRleHRfZW5jb2RlciUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEpJTBBdG9rZW5pemVyJTIwJTNEJTIwVDVUb2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMkxpZ2h0cmlja3MlMkZMVFgtVmlkZW8lMjIlMkMlMjBzdWJmb2xkZXIlM0QlMjJ0b2tlbml6ZXIlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQXBpcGUlMjAlM0QlMjBMVFhJbWFnZVRvVmlkZW9QaXBlbGluZS5mcm9tX3NpbmdsZV9maWxlKCUwQSUyMCUyMHNpbmdsZV9maWxlX3VybCUyQyUyMHRleHRfZW5jb2RlciUzRHRleHRfZW5jb2RlciUyQyUyMHRva2VuaXplciUzRHRva2VuaXplciUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> LTXImageToVideoPipeline | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> T5EncoderModel, T5Tokenizer | |
| single_file_url = <span class="hljs-string">"https://huggingface.co/Lightricks/LTX-Video/ltx-video-2b-v0.9.safetensors"</span> | |
| text_encoder = T5EncoderModel.from_pretrained( | |
| <span class="hljs-string">"Lightricks/LTX-Video"</span>, subfolder=<span class="hljs-string">"text_encoder"</span>, torch_dtype=torch.bfloat16 | |
| ) | |
| tokenizer = T5Tokenizer.from_pretrained( | |
| <span class="hljs-string">"Lightricks/LTX-Video"</span>, subfolder=<span class="hljs-string">"tokenizer"</span>, torch_dtype=torch.bfloat16 | |
| ) | |
| pipe = LTXImageToVideoPipeline.from_single_file( | |
| single_file_url, text_encoder=text_encoder, tokenizer=tokenizer, torch_dtype=torch.bfloat16 | |
| )`,wrap:!1}}),E=new ue({props:{title:"LTXPipeline",local:"diffusers.LTXPipeline",headingTag:"h2"}}),Y=new oe({props:{name:"class diffusers.LTXPipeline",anchor:"diffusers.LTXPipeline",parameters:[{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"vae",val:": AutoencoderKLLTXVideo"},{name:"text_encoder",val:": T5EncoderModel"},{name:"tokenizer",val:": T5TokenizerFast"},{name:"transformer",val:": LTXVideoTransformer3DModel"}],parametersDescription:[{anchor:"diffusers.LTXPipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/pr_10312/en/api/models/ltx_video_transformer3d#diffusers.LTXVideoTransformer3DModel">LTXVideoTransformer3DModel</a>) — | |
| Conditional Transformer architecture to denoise the encoded video latents.`,name:"transformer"},{anchor:"diffusers.LTXPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_10312/en/api/schedulers/flow_match_euler_discrete#diffusers.FlowMatchEulerDiscreteScheduler">FlowMatchEulerDiscreteScheduler</a>) — | |
| A scheduler to be used in combination with <code>transformer</code> to denoise the encoded image latents.`,name:"scheduler"},{anchor:"diffusers.LTXPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_10312/en/api/models/autoencoderkl_ltx_video#diffusers.AutoencoderKLLTXVideo">AutoencoderKLLTXVideo</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.LTXPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>T5EncoderModel</code>) — | |
| <a href="https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5EncoderModel" rel="nofollow">T5</a>, specifically | |
| the <a href="https://huggingface.co/google/t5-v1_1-xxl" rel="nofollow">google/t5-v1_1-xxl</a> variant.`,name:"text_encoder"},{anchor:"diffusers.LTXPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) — | |
| Tokenizer of class | |
| <a href="https://huggingface.co/docs/transformers/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a>.`,name:"tokenizer"},{anchor:"diffusers.LTXPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>T5TokenizerFast</code>) — | |
| Second Tokenizer of class | |
| <a href="https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5TokenizerFast" rel="nofollow">T5TokenizerFast</a>.`,name:"tokenizer"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/ltx/pipeline_ltx.py#L143"}}),Q=new oe({props:{name:"__call__",anchor:"diffusers.LTXPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"height",val:": int = 512"},{name:"width",val:": int = 704"},{name:"num_frames",val:": int = 161"},{name:"frame_rate",val:": int = 25"},{name:"num_inference_steps",val:": int = 50"},{name:"timesteps",val:": typing.List[int] = None"},{name:"guidance_scale",val:": float = 3"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int, typing.Dict], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": typing.List[str] = ['latents']"},{name:"max_sequence_length",val:": int = 128"}],parametersDescription:[{anchor:"diffusers.LTXPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.LTXPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, defaults to <code>512</code>) — | |
| The height in pixels of the generated image. This is set to 480 by default for the best results.`,name:"height"},{anchor:"diffusers.LTXPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, defaults to <code>704</code>) — | |
| The width in pixels of the generated image. This is set to 848 by default for the best results.`,name:"width"},{anchor:"diffusers.LTXPipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, defaults to <code>161</code>) — | |
| The number of video frames to generate`,name:"num_frames"},{anchor:"diffusers.LTXPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.LTXPipeline.__call__.timesteps",description:`<strong>timesteps</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Custom timesteps to use for the denoising process with schedulers which support a <code>timesteps</code> argument | |
| in their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is | |
| passed will be used. Must be in descending order.`,name:"timesteps"},{anchor:"diffusers.LTXPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, defaults to <code>3 </code>) — | |
| Guidance scale as defined in <a href="https://arxiv.org/abs/2207.12598" rel="nofollow">Classifier-Free Diffusion Guidance</a>. | |
| <code>guidance_scale</code> is defined as <code>w</code> of equation 2. of <a href="https://arxiv.org/pdf/2205.11487.pdf" rel="nofollow">Imagen | |
| Paper</a>. Guidance scale is enabled by setting <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to the text <code>prompt</code>, | |
| usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.LTXPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTXPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a> | |
| to make generation deterministic.`,name:"generator"},{anchor:"diffusers.LTXPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will ge generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.LTXPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTXPipeline.__call__.prompt_attention_mask",description:`<strong>prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated attention mask for text embeddings.`,name:"prompt_attention_mask"},{anchor:"diffusers.LTXPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. For PixArt-Sigma this negative prompt should be "". If not | |
| provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTXPipeline.__call__.negative_prompt_attention_mask",description:`<strong>negative_prompt_attention_mask</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) — | |
| Pre-generated attention mask for negative text embeddings.`,name:"negative_prompt_attention_mask"},{anchor:"diffusers.LTXPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generate image. Choose between | |
| <a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.LTXPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>~pipelines.ltx.LTXPipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.LTXPipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.LTXPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls at the end of each denoising steps during the inference. The function is called | |
| with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a list of all tensors as specified by | |
| <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.LTXPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) — | |
| The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list | |
| will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the | |
| <code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.LTXPipeline.__call__.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code> defaults to <code>128 </code>) — | |
| Maximum sequence length to use with the <code>prompt</code>.`,name:"max_sequence_length"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/ltx/pipeline_ltx.py#L494",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <code>~pipelines.ltx.LTXPipelineOutput</code> is returned, otherwise a <code>tuple</code> is | |
| returned where the first element is a list with the generated images.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>~pipelines.ltx.LTXPipelineOutput</code> or <code>tuple</code></p> | |
| `}}),G=new dt({props:{anchor:"diffusers.LTXPipeline.__call__.example",$$slots:{default:[bt]},$$scope:{ctx:V}}}),S=new oe({props:{name:"encode_prompt",anchor:"diffusers.LTXPipeline.encode_prompt",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"max_sequence_length",val:": int = 128"},{name:"device",val:": typing.Optional[torch.device] = None"},{name:"dtype",val:": typing.Optional[torch.dtype] = None"}],parametersDescription:[{anchor:"diffusers.LTXPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.LTXPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.LTXPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to use classifier free guidance or not.`,name:"do_classifier_free_guidance"},{anchor:"diffusers.LTXPipeline.encode_prompt.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| Number of videos that should be generated per prompt. torch device to place the resulting embeddings on`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTXPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTXPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTXPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>, <em>optional</em>): | |
| torch device`,name:"device"},{anchor:"diffusers.LTXPipeline.encode_prompt.dtype",description:`<strong>dtype</strong> — (<code>torch.dtype</code>, <em>optional</em>): | |
| torch dtype`,name:"dtype"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/ltx/pipeline_ltx.py#L250"}}),q=new ue({props:{title:"LTXImageToVideoPipeline",local:"diffusers.LTXImageToVideoPipeline",headingTag:"h2"}}),D=new oe({props:{name:"class diffusers.LTXImageToVideoPipeline",anchor:"diffusers.LTXImageToVideoPipeline",parameters:[{name:"scheduler",val:": FlowMatchEulerDiscreteScheduler"},{name:"vae",val:": AutoencoderKLLTXVideo"},{name:"text_encoder",val:": T5EncoderModel"},{name:"tokenizer",val:": T5TokenizerFast"},{name:"transformer",val:": LTXVideoTransformer3DModel"}],parametersDescription:[{anchor:"diffusers.LTXImageToVideoPipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/pr_10312/en/api/models/ltx_video_transformer3d#diffusers.LTXVideoTransformer3DModel">LTXVideoTransformer3DModel</a>) — | |
| Conditional Transformer architecture to denoise the encoded video latents.`,name:"transformer"},{anchor:"diffusers.LTXImageToVideoPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_10312/en/api/schedulers/flow_match_euler_discrete#diffusers.FlowMatchEulerDiscreteScheduler">FlowMatchEulerDiscreteScheduler</a>) — | |
| A scheduler to be used in combination with <code>transformer</code> to denoise the encoded image latents.`,name:"scheduler"},{anchor:"diffusers.LTXImageToVideoPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_10312/en/api/models/autoencoderkl_ltx_video#diffusers.AutoencoderKLLTXVideo">AutoencoderKLLTXVideo</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.LTXImageToVideoPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>T5EncoderModel</code>) — | |
| <a href="https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5EncoderModel" rel="nofollow">T5</a>, specifically | |
| the <a href="https://huggingface.co/google/t5-v1_1-xxl" rel="nofollow">google/t5-v1_1-xxl</a> variant.`,name:"text_encoder"},{anchor:"diffusers.LTXImageToVideoPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) — | |
| Tokenizer of class | |
| <a href="https://huggingface.co/docs/transformers/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a>.`,name:"tokenizer"},{anchor:"diffusers.LTXImageToVideoPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>T5TokenizerFast</code>) — | |
| Second Tokenizer of class | |
| <a href="https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5TokenizerFast" rel="nofollow">T5TokenizerFast</a>.`,name:"tokenizer"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/ltx/pipeline_ltx_image2video.py#L162"}}),A=new oe({props:{name:"__call__",anchor:"diffusers.LTXImageToVideoPipeline.__call__",parameters:[{name:"image",val:": typing.Union[PIL.Image.Image, numpy.ndarray, torch.Tensor, typing.List[PIL.Image.Image], typing.List[numpy.ndarray], typing.List[torch.Tensor]] = None"},{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"height",val:": int = 512"},{name:"width",val:": int = 704"},{name:"num_frames",val:": int = 161"},{name:"frame_rate",val:": int = 25"},{name:"num_inference_steps",val:": int = 50"},{name:"timesteps",val:": typing.List[int] = None"},{name:"guidance_scale",val:": float = 3"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"callback_on_step_end",val:": typing.Optional[typing.Callable[[int, int, typing.Dict], NoneType]] = None"},{name:"callback_on_step_end_tensor_inputs",val:": typing.List[str] = ['latents']"},{name:"max_sequence_length",val:": int = 128"}],parametersDescription:[{anchor:"diffusers.LTXImageToVideoPipeline.__call__.image",description:`<strong>image</strong> (<code>PipelineImageInput</code>) — | |
| The input image to condition the generation on. Must be an image, a list of images or a <code>torch.Tensor</code>.`,name:"image"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, defaults to <code>512</code>) — | |
| The height in pixels of the generated image. This is set to 480 by default for the best results.`,name:"height"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, defaults to <code>704</code>) — | |
| The width in pixels of the generated image. This is set to 848 by default for the best results.`,name:"width"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, defaults to <code>161</code>) — | |
| The number of video frames to generate`,name:"num_frames"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.timesteps",description:`<strong>timesteps</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Custom timesteps to use for the denoising process with schedulers which support a <code>timesteps</code> argument | |
| in their <code>set_timesteps</code> method. If not defined, the default behavior when <code>num_inference_steps</code> is | |
| passed will be used. Must be in descending order.`,name:"timesteps"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, defaults to <code>3 </code>) — | |
| Guidance scale as defined in <a href="https://arxiv.org/abs/2207.12598" rel="nofollow">Classifier-Free Diffusion Guidance</a>. | |
| <code>guidance_scale</code> is defined as <code>w</code> of equation 2. of <a href="https://arxiv.org/pdf/2205.11487.pdf" rel="nofollow">Imagen | |
| Paper</a>. Guidance scale is enabled by setting <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to the text <code>prompt</code>, | |
| usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a> | |
| to make generation deterministic.`,name:"generator"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will ge generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.prompt_attention_mask",description:`<strong>prompt_attention_mask</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated attention mask for text embeddings.`,name:"prompt_attention_mask"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. For PixArt-Sigma this negative prompt should be "". If not | |
| provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.negative_prompt_attention_mask",description:`<strong>negative_prompt_attention_mask</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) — | |
| Pre-generated attention mask for negative text embeddings.`,name:"negative_prompt_attention_mask"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generate image. Choose between | |
| <a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>~pipelines.ltx.LTXPipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.attention_kwargs",description:`<strong>attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow">diffusers.models.attention_processor</a>.`,name:"attention_kwargs"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls at the end of each denoising steps during the inference. The function is called | |
| with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a list of all tensors as specified by | |
| <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) — | |
| The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list | |
| will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the | |
| <code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"},{anchor:"diffusers.LTXImageToVideoPipeline.__call__.max_sequence_length",description:`<strong>max_sequence_length</strong> (<code>int</code> defaults to <code>128 </code>) — | |
| Maximum sequence length to use with the <code>prompt</code>.`,name:"max_sequence_length"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/ltx/pipeline_ltx_image2video.py#L553",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <code>~pipelines.ltx.LTXPipelineOutput</code> is returned, otherwise a <code>tuple</code> is | |
| returned where the first element is a list with the generated images.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>~pipelines.ltx.LTXPipelineOutput</code> or <code>tuple</code></p> | |
| `}}),B=new dt({props:{anchor:"diffusers.LTXImageToVideoPipeline.__call__.example",$$slots:{default:[yt]},$$scope:{ctx:V}}}),O=new oe({props:{name:"encode_prompt",anchor:"diffusers.LTXImageToVideoPipeline.encode_prompt",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_attention_mask",val:": typing.Optional[torch.Tensor] = None"},{name:"max_sequence_length",val:": int = 128"},{name:"device",val:": typing.Optional[torch.device] = None"},{name:"dtype",val:": typing.Optional[torch.dtype] = None"}],parametersDescription:[{anchor:"diffusers.LTXImageToVideoPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.LTXImageToVideoPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.LTXImageToVideoPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to use classifier free guidance or not.`,name:"do_classifier_free_guidance"},{anchor:"diffusers.LTXImageToVideoPipeline.encode_prompt.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| Number of videos that should be generated per prompt. torch device to place the resulting embeddings on`,name:"num_videos_per_prompt"},{anchor:"diffusers.LTXImageToVideoPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.LTXImageToVideoPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.LTXImageToVideoPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>, <em>optional</em>): | |
| torch device`,name:"device"},{anchor:"diffusers.LTXImageToVideoPipeline.encode_prompt.dtype",description:`<strong>dtype</strong> — (<code>torch.dtype</code>, <em>optional</em>): | |
| torch dtype`,name:"dtype"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/ltx/pipeline_ltx_image2video.py#L273"}}),K=new ue({props:{title:"LTXPipelineOutput",local:"diffusers.pipelines.ltx.pipeline_output.LTXPipelineOutput",headingTag:"h2"}}),ee=new oe({props:{name:"class diffusers.pipelines.ltx.pipeline_output.LTXPipelineOutput",anchor:"diffusers.pipelines.ltx.pipeline_output.LTXPipelineOutput",parameters:[{name:"frames",val:": Tensor"}],parametersDescription:[{anchor:"diffusers.pipelines.ltx.pipeline_output.LTXPipelineOutput.frames",description:`<strong>frames</strong> (<code>torch.Tensor</code>, <code>np.ndarray</code>, or List[List[PIL.Image.Image]]) — | |
| List of video outputs - It can be a nested list of length <code>batch_size,</code> with each sub-list containing | |
| denoised PIL image sequences of length <code>num_frames.</code> It can also be a NumPy array or Torch tensor of shape | |
| <code>(batch_size, num_frames, channels, height, width)</code>.`,name:"frames"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/ltx/pipeline_output.py#L8"}}),te=new _t({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/ltx_video.md"}}),{c(){a=p("meta"),x=o(),m=p("p"),c=o(),g(b.$$.fragment),i=o(),M=p("p"),M.innerHTML=Ae,he=o(),g($.$$.fragment),_e=o(),g(C.$$.fragment),Te=o(),N=p("p"),N.innerHTML=Oe,be=o(),g(R.$$.fragment),ye=o(),z=p("p"),z.innerHTML=Ke,Me=o(),g(H.$$.fragment),ve=o(),F=p("p"),F.innerHTML=et,we=o(),g(E.$$.fragment),xe=o(),v=p("div"),g(Y.$$.fragment),Ge=o(),se=p("p"),se.textContent=tt,Pe=o(),ie=p("p"),ie.innerHTML=nt,Be=o(),L=p("div"),g(Q.$$.fragment),We=o(),ae=p("p"),ae.textContent=ot,Ce=o(),g(G.$$.fragment),Ne=o(),P=p("div"),g(S.$$.fragment),Re=o(),re=p("p"),re.textContent=st,Je=o(),g(q.$$.fragment),Le=o(),w=p("div"),g(D.$$.fragment),ze=o(),le=p("p"),le.textContent=it,He=o(),pe=p("p"),pe.innerHTML=at,Fe=o(),X=p("div"),g(A.$$.fragment),Ee=o(),de=p("p"),de.textContent=rt,Ye=o(),g(B.$$.fragment),Qe=o(),W=p("div"),g(O.$$.fragment),Se=o(),ce=p("p"),ce.textContent=lt,Xe=o(),g(K.$$.fragment),ke=o(),j=p("div"),g(ee.$$.fragment),qe=o(),me=p("p"),me.textContent=pt,je=o(),g(te.$$.fragment),Ue=o(),fe=p("p"),this.h()},l(e){const t=ut("svelte-u9bgzb",document.head);a=d(t,"META",{name:!0,content:!0}),t.forEach(n),x=s(e),m=d(e,"P",{}),Z(m).forEach(n),c=s(e),f(b.$$.fragment,e),i=s(e),M=d(e,"P",{"data-svelte-h":!0}),y(M)!=="svelte-1t4cyrb"&&(M.innerHTML=Ae),he=s(e),f($.$$.fragment,e),_e=s(e),f(C.$$.fragment,e),Te=s(e),N=d(e,"P",{"data-svelte-h":!0}),y(N)!=="svelte-kyrfh3"&&(N.innerHTML=Oe),be=s(e),f(R.$$.fragment,e),ye=s(e),z=d(e,"P",{"data-svelte-h":!0}),y(z)!=="svelte-rvy320"&&(z.innerHTML=Ke),Me=s(e),f(H.$$.fragment,e),ve=s(e),F=d(e,"P",{"data-svelte-h":!0}),y(F)!=="svelte-obf3nv"&&(F.innerHTML=et),we=s(e),f(E.$$.fragment,e),xe=s(e),v=d(e,"DIV",{class:!0});var J=Z(v);f(Y.$$.fragment,J),Ge=s(J),se=d(J,"P",{"data-svelte-h":!0}),y(se)!=="svelte-19ipoo4"&&(se.textContent=tt),Pe=s(J),ie=d(J,"P",{"data-svelte-h":!0}),y(ie)!=="svelte-1sr6eg8"&&(ie.innerHTML=nt),Be=s(J),L=d(J,"DIV",{class:!0});var U=Z(L);f(Q.$$.fragment,U),We=s(U),ae=d(U,"P",{"data-svelte-h":!0}),y(ae)!=="svelte-v78lg8"&&(ae.textContent=ot),Ce=s(U),f(G.$$.fragment,U),U.forEach(n),Ne=s(J),P=d(J,"DIV",{class:!0});var ne=Z(P);f(S.$$.fragment,ne),Re=s(ne),re=d(ne,"P",{"data-svelte-h":!0}),y(re)!=="svelte-16q0ax1"&&(re.textContent=st),ne.forEach(n),J.forEach(n),Je=s(e),f(q.$$.fragment,e),Le=s(e),w=d(e,"DIV",{class:!0});var k=Z(w);f(D.$$.fragment,k),ze=s(k),le=d(k,"P",{"data-svelte-h":!0}),y(le)!=="svelte-10tczlw"&&(le.textContent=it),He=s(k),pe=d(k,"P",{"data-svelte-h":!0}),y(pe)!=="svelte-1sr6eg8"&&(pe.innerHTML=at),Fe=s(k),X=d(k,"DIV",{class:!0});var ge=Z(X);f(A.$$.fragment,ge),Ee=s(ge),de=d(ge,"P",{"data-svelte-h":!0}),y(de)!=="svelte-v78lg8"&&(de.textContent=rt),Ye=s(ge),f(B.$$.fragment,ge),ge.forEach(n),Qe=s(k),W=d(k,"DIV",{class:!0});var Ie=Z(W);f(O.$$.fragment,Ie),Se=s(Ie),ce=d(Ie,"P",{"data-svelte-h":!0}),y(ce)!=="svelte-16q0ax1"&&(ce.textContent=lt),Ie.forEach(n),k.forEach(n),Xe=s(e),f(K.$$.fragment,e),ke=s(e),j=d(e,"DIV",{class:!0});var Ve=Z(j);f(ee.$$.fragment,Ve),qe=s(Ve),me=d(Ve,"P",{"data-svelte-h":!0}),y(me)!=="svelte-ia4jjd"&&(me.textContent=pt),Ve.forEach(n),je=s(e),f(te.$$.fragment,e),Ue=s(e),fe=d(e,"P",{}),Z(fe).forEach(n),this.h()},h(){I(a,"name","hf:doc:metadata"),I(a,"content",vt),I(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),I(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),I(v,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),I(X,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),I(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),I(w,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),I(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){l(document.head,a),r(e,x,t),r(e,m,t),r(e,c,t),u(b,e,t),r(e,i,t),r(e,M,t),r(e,he,t),u($,e,t),r(e,_e,t),u(C,e,t),r(e,Te,t),r(e,N,t),r(e,be,t),u(R,e,t),r(e,ye,t),r(e,z,t),r(e,Me,t),u(H,e,t),r(e,ve,t),r(e,F,t),r(e,we,t),u(E,e,t),r(e,xe,t),r(e,v,t),u(Y,v,null),l(v,Ge),l(v,se),l(v,Pe),l(v,ie),l(v,Be),l(v,L),u(Q,L,null),l(L,We),l(L,ae),l(L,Ce),u(G,L,null),l(v,Ne),l(v,P),u(S,P,null),l(P,Re),l(P,re),r(e,Je,t),u(q,e,t),r(e,Le,t),r(e,w,t),u(D,w,null),l(w,ze),l(w,le),l(w,He),l(w,pe),l(w,Fe),l(w,X),u(A,X,null),l(X,Ee),l(X,de),l(X,Ye),u(B,X,null),l(w,Qe),l(w,W),u(O,W,null),l(W,Se),l(W,ce),r(e,Xe,t),u(K,e,t),r(e,ke,t),r(e,j,t),u(ee,j,null),l(j,qe),l(j,me),r(e,je,t),u(te,e,t),r(e,Ue,t),r(e,fe,t),Ze=!0},p(e,[t]){const J={};t&2&&(J.$$scope={dirty:t,ctx:e}),$.$set(J);const U={};t&2&&(U.$$scope={dirty:t,ctx:e}),G.$set(U);const ne={};t&2&&(ne.$$scope={dirty:t,ctx:e}),B.$set(ne)},i(e){Ze||(h(b.$$.fragment,e),h($.$$.fragment,e),h(C.$$.fragment,e),h(R.$$.fragment,e),h(H.$$.fragment,e),h(E.$$.fragment,e),h(Y.$$.fragment,e),h(Q.$$.fragment,e),h(G.$$.fragment,e),h(S.$$.fragment,e),h(q.$$.fragment,e),h(D.$$.fragment,e),h(A.$$.fragment,e),h(B.$$.fragment,e),h(O.$$.fragment,e),h(K.$$.fragment,e),h(ee.$$.fragment,e),h(te.$$.fragment,e),Ze=!0)},o(e){_(b.$$.fragment,e),_($.$$.fragment,e),_(C.$$.fragment,e),_(R.$$.fragment,e),_(H.$$.fragment,e),_(E.$$.fragment,e),_(Y.$$.fragment,e),_(Q.$$.fragment,e),_(G.$$.fragment,e),_(S.$$.fragment,e),_(q.$$.fragment,e),_(D.$$.fragment,e),_(A.$$.fragment,e),_(B.$$.fragment,e),_(O.$$.fragment,e),_(K.$$.fragment,e),_(ee.$$.fragment,e),_(te.$$.fragment,e),Ze=!1},d(e){e&&(n(x),n(m),n(c),n(i),n(M),n(he),n(_e),n(Te),n(N),n(be),n(ye),n(z),n(Me),n(ve),n(F),n(we),n(xe),n(v),n(Je),n(Le),n(w),n(Xe),n(ke),n(j),n(je),n(Ue),n(fe)),n(a),T(b,e),T($,e),T(C,e),T(R,e),T(H,e),T(E,e),T(Y),T(Q),T(G),T(S),T(q,e),T(D),T(A),T(B),T(O),T(K,e),T(ee),T(te,e)}}}const vt='{"title":"LTX","local":"ltx","sections":[{"title":"Loading Single Files","local":"loading-single-files","sections":[],"depth":2},{"title":"LTXPipeline","local":"diffusers.LTXPipeline","sections":[],"depth":2},{"title":"LTXImageToVideoPipeline","local":"diffusers.LTXImageToVideoPipeline","sections":[],"depth":2},{"title":"LTXPipelineOutput","local":"diffusers.pipelines.ltx.pipeline_output.LTXPipelineOutput","sections":[],"depth":2}],"depth":1}';function wt(V){return mt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Zt extends gt{constructor(a){super(),ft(this,a,wt,Mt,ct,{})}}export{Zt as component}; | |
Xet Storage Details
- Size:
- 55.6 kB
- Xet hash:
- e68a11439746a4ba2c921511311432147c028c704f8527aaca5bccfeaa72de6c
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.