Image-to-Video
Diffusers
Safetensors
LTX2Pipeline
text-to-video
video-to-video
image-text-to-video
audio-to-video
text-to-audio
video-to-audio
audio-to-audio
text-to-audio-video
image-to-audio-video
image-text-to-audio-video
ltx-2
ltx-2-3
ltx-video
ltxv
lightricks
Instructions to use diffusers/LTX-2.3-Distilled-Diffusers with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use diffusers/LTX-2.3-Distilled-Diffusers with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline from diffusers.utils import load_image, export_to_video # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("diffusers/LTX-2.3-Distilled-Diffusers", dtype=torch.bfloat16, device_map="cuda") pipe.to("cuda") prompt = "A man with short gray hair plays a red electric guitar." image = load_image( "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/guitar-man.png" ) output = pipe(image=image, prompt=prompt).frames[0] export_to_video(output, "output.mp4") - Notebooks
- Google Colab
- Kaggle
| { | |
| "_class_name": "LTX2VocoderWithBWE", | |
| "_diffusers_version": "0.37.0.dev0", | |
| "act_fn": "snakebeta", | |
| "antialias": true, | |
| "antialias_kernel_size": 12, | |
| "antialias_ratio": 2, | |
| "bwe_act_fn": "snakebeta", | |
| "bwe_antialias": true, | |
| "bwe_antialias_kernel_size": 12, | |
| "bwe_antialias_ratio": 2, | |
| "bwe_final_act_fn": null, | |
| "bwe_final_bias": false, | |
| "bwe_hidden_channels": 512, | |
| "bwe_in_channels": 128, | |
| "bwe_leaky_relu_negative_slope": 0.1, | |
| "bwe_out_channels": 2, | |
| "bwe_resnet_dilations": [ | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ] | |
| ], | |
| "bwe_resnet_kernel_sizes": [ | |
| 3, | |
| 7, | |
| 11 | |
| ], | |
| "bwe_upsample_factors": [ | |
| 6, | |
| 5, | |
| 2, | |
| 2, | |
| 2 | |
| ], | |
| "bwe_upsample_kernel_sizes": [ | |
| 12, | |
| 11, | |
| 4, | |
| 4, | |
| 4 | |
| ], | |
| "filter_length": 512, | |
| "final_act_fn": null, | |
| "final_bias": false, | |
| "hidden_channels": 1536, | |
| "hop_length": 80, | |
| "in_channels": 128, | |
| "input_sampling_rate": 16000, | |
| "leaky_relu_negative_slope": 0.1, | |
| "num_mel_channels": 64, | |
| "out_channels": 2, | |
| "output_sampling_rate": 48000, | |
| "resnet_dilations": [ | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ] | |
| ], | |
| "resnet_kernel_sizes": [ | |
| 3, | |
| 7, | |
| 11 | |
| ], | |
| "upsample_factors": [ | |
| 5, | |
| 2, | |
| 2, | |
| 2, | |
| 2, | |
| 2 | |
| ], | |
| "upsample_kernel_sizes": [ | |
| 11, | |
| 4, | |
| 4, | |
| 4, | |
| 4, | |
| 4 | |
| ], | |
| "window_length": 512 | |
| } | |