Instructions to use vidfom/Ltx-3 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- llama-cpp-python
How to use vidfom/Ltx-3 with llama-cpp-python:
# !pip install llama-cpp-python from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="vidfom/Ltx-3", filename="ComfyUI/models/text_encoders/gemma-3-12b-it-qat-UD-Q4_K_XL.gguf", )
llm.create_chat_completion( messages = "No input example has been defined for this model task." )
- Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- llama.cpp
How to use vidfom/Ltx-3 with llama.cpp:
Install from brew
brew install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Install from WinGet (Windows)
winget install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Use pre-built binary
# Download pre-built binary from: # https://github.com/ggerganov/llama.cpp/releases # Start a local OpenAI-compatible server with a web UI: ./llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: ./llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Build from source code
git clone https://github.com/ggerganov/llama.cpp.git cd llama.cpp cmake -B build cmake --build build -j --target llama-server llama-cli # Start a local OpenAI-compatible server with a web UI: ./build/bin/llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: ./build/bin/llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Use Docker
docker model run hf.co/vidfom/Ltx-3:UD-Q4_K_XL
- LM Studio
- Jan
- Ollama
How to use vidfom/Ltx-3 with Ollama:
ollama run hf.co/vidfom/Ltx-3:UD-Q4_K_XL
- Unsloth Studio
How to use vidfom/Ltx-3 with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for vidfom/Ltx-3 to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for vidfom/Ltx-3 to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for vidfom/Ltx-3 to start chatting
- Docker Model Runner
How to use vidfom/Ltx-3 with Docker Model Runner:
docker model run hf.co/vidfom/Ltx-3:UD-Q4_K_XL
- Lemonade
How to use vidfom/Ltx-3 with Lemonade:
Pull the model
# Download Lemonade from https://lemonade-server.ai/ lemonade pull vidfom/Ltx-3:UD-Q4_K_XL
Run and chat with the model
lemonade run user.Ltx-3-UD-Q4_K_XL
List all available models
lemonade list
| import comfy | |
| import comfy_extras.nodes_lt as nodes_lt | |
| import comfy_extras.nodes_post_processing as post_processing | |
| import nodes | |
| from .nodes_registry import comfy_node | |
| def blur_internal(image, blur_radius): | |
| if blur_radius > 0: | |
| # https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#getgaussiankernel | |
| # sigma = 0.3 * blur_radius + 0.5 is what is recommended in the OpenCV doc for the | |
| # relationship between sigma and kernel size 2*blur_radius + 1, however we want somewhat weaker | |
| # blurring, so we use 0.3 * blur_radius instead, reducing the sigma value by 0.5 | |
| sigma = 0.3 * blur_radius | |
| image = post_processing.Blur.execute(image, blur_radius, sigma)[0] | |
| return image | |
| class LTXVAddGuideAdvanced: | |
| def INPUT_TYPES(s): | |
| return { | |
| "required": { | |
| "positive": ("CONDITIONING",), | |
| "negative": ("CONDITIONING",), | |
| "vae": ("VAE",), | |
| "latent": ("LATENT",), | |
| "image": ("IMAGE",), | |
| "frame_idx": ( | |
| "INT", | |
| { | |
| "default": 0, | |
| "min": -9999, | |
| "max": 9999, | |
| "tooltip": "Frame index to start the conditioning at. For single-frame images or " | |
| "videos with 1-8 frames, any frame_idx value is acceptable. For videos with 9+ " | |
| "frames, frame_idx must be divisible by 8, otherwise it will be rounded down to " | |
| "the nearest multiple of 8. Negative values are counted from the end of the video.", | |
| }, | |
| ), | |
| "strength": ( | |
| "FLOAT", | |
| { | |
| "default": 1.0, | |
| "min": 0.0, | |
| "max": 1.0, | |
| "tooltip": "Strength of the conditioning. Higher values will make the conditioning more exact.", | |
| }, | |
| ), | |
| "crf": ( | |
| "INT", | |
| { | |
| "default": 29, | |
| "min": 0, | |
| "max": 51, | |
| "step": 1, | |
| "tooltip": "CRF value for the video. Higher values mean more motion, lower values mean higher quality.", | |
| }, | |
| ), | |
| "blur_radius": ( | |
| "INT", | |
| { | |
| "default": 0, | |
| "min": 0, | |
| "max": 7, | |
| "step": 1, | |
| "tooltip": "Blur kernel radius size. Higher values mean more motion, lower values mean higher quality.", | |
| }, | |
| ), | |
| "interpolation": ( | |
| [ | |
| "lanczos", | |
| "bislerp", | |
| "nearest", | |
| "bilinear", | |
| "bicubic", | |
| "area", | |
| "nearest-exact", | |
| ], | |
| {"default": "lanczos"}, | |
| ), | |
| "crop": (["center", "disabled"], {"default": "disabled"}), | |
| } | |
| } | |
| RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") | |
| RETURN_NAMES = ("positive", "negative", "latent") | |
| CATEGORY = "conditioning/video_models" | |
| FUNCTION = "generate" | |
| DESCRIPTION = ( | |
| "Adds a conditioning frame or a video at a specific frame index. " | |
| "This node is used to add a keyframe or a video segment which should appear in the " | |
| "generated video at a specified index. It resizes the image to the correct size and " | |
| "applies preprocessing to it." | |
| ) | |
| def generate( | |
| self, | |
| positive, | |
| negative, | |
| vae, | |
| latent, | |
| image, | |
| frame_idx, | |
| strength, | |
| crf, | |
| blur_radius, | |
| interpolation, | |
| crop, | |
| ): | |
| _, width_scale_factor, height_scale_factor = vae.downscale_index_formula | |
| width, height = ( | |
| latent["samples"].shape[4] * width_scale_factor, | |
| latent["samples"].shape[3] * height_scale_factor, | |
| ) | |
| image = ( | |
| comfy.utils.common_upscale( | |
| image.movedim(-1, 1), width, height, interpolation, crop=crop | |
| ) | |
| .movedim(1, -1) | |
| .clamp(0, 1) | |
| ) | |
| image = nodes_lt.LTXVPreprocess().execute(image, crf)[0] | |
| image = blur_internal(image, blur_radius) | |
| return nodes_lt.LTXVAddGuide().execute( | |
| positive=positive, | |
| negative=negative, | |
| vae=vae, | |
| latent=latent, | |
| image=image, | |
| frame_idx=frame_idx, | |
| strength=strength, | |
| ) | |
| class LTXVImgToVideoAdvanced: | |
| def INPUT_TYPES(s): | |
| return { | |
| "required": { | |
| "positive": ("CONDITIONING",), | |
| "negative": ("CONDITIONING",), | |
| "vae": ("VAE",), | |
| "image": ("IMAGE",), | |
| "width": ( | |
| "INT", | |
| { | |
| "default": 768, | |
| "min": 64, | |
| "max": nodes.MAX_RESOLUTION, | |
| "step": 32, | |
| }, | |
| ), | |
| "height": ( | |
| "INT", | |
| { | |
| "default": 512, | |
| "min": 64, | |
| "max": nodes.MAX_RESOLUTION, | |
| "step": 32, | |
| }, | |
| ), | |
| "length": ( | |
| "INT", | |
| {"default": 97, "min": 9, "max": nodes.MAX_RESOLUTION, "step": 8}, | |
| ), | |
| "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), | |
| "crf": ( | |
| "INT", | |
| { | |
| "default": 29, | |
| "min": 0, | |
| "max": 51, | |
| "step": 1, | |
| "tooltip": "CRF value for the video. Higher values mean more motion, lower values mean higher quality.", | |
| }, | |
| ), | |
| "blur_radius": ( | |
| "INT", | |
| { | |
| "default": 0, | |
| "min": 0, | |
| "max": 7, | |
| "step": 1, | |
| "tooltip": "Blur kernel radius size. Higher values mean more motion, lower values mean higher quality.", | |
| }, | |
| ), | |
| "interpolation": ( | |
| [ | |
| "lanczos", | |
| "bislerp", | |
| "nearest", | |
| "bilinear", | |
| "bicubic", | |
| "area", | |
| "nearest-exact", | |
| ], | |
| {"default": "lanczos"}, | |
| ), | |
| "crop": (["center", "disabled"], {"default": "disabled"}), | |
| "strength": ("FLOAT", {"default": 0.9, "min": 0, "max": 1}), | |
| } | |
| } | |
| RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") | |
| RETURN_NAMES = ("positive", "negative", "latent") | |
| CATEGORY = "conditioning/video_models" | |
| FUNCTION = "generate" | |
| DESCRIPTION = ( | |
| "Adds a conditioning frame or a video at index 0. " | |
| "This node is used to add a keyframe or a video segment which should appear in the " | |
| "generated video at index 0. It resizes the image to the correct size " | |
| "and applies preprocessing to it." | |
| ) | |
| def generate( | |
| self, | |
| positive, | |
| negative, | |
| vae, | |
| image, | |
| width, | |
| height, | |
| length, | |
| batch_size, | |
| crf, | |
| blur_radius, | |
| interpolation, | |
| crop, | |
| strength, | |
| ): | |
| image = comfy.utils.common_upscale( | |
| image.movedim(-1, 1), width, height, interpolation, crop=crop | |
| ).movedim(1, -1) | |
| image = nodes_lt.LTXVPreprocess().execute(image, crf)[0] | |
| image = blur_internal(image, blur_radius) | |
| return nodes_lt.LTXVImgToVideo().execute( | |
| positive=positive, | |
| negative=negative, | |
| vae=vae, | |
| image=image, | |
| width=width, | |
| height=height, | |
| length=length, | |
| batch_size=batch_size, | |
| strength=strength, | |
| ) | |
| class LTXVAddGuideAdvancedAttention: | |
| """Extended keyframe guide node with per-guide attention strength control. | |
| Same preprocessing as LTXVAddGuideAdvanced (CRF, blur, interpolation, crop), | |
| plus attention_strength and attention_mask inputs to control how strongly | |
| this guide's conditioning influences generation via self-attention. | |
| """ | |
| def INPUT_TYPES(s): | |
| return { | |
| "required": { | |
| "positive": ("CONDITIONING",), | |
| "negative": ("CONDITIONING",), | |
| "vae": ("VAE",), | |
| "latent": ("LATENT",), | |
| "image": ("IMAGE",), | |
| "frame_idx": ( | |
| "INT", | |
| { | |
| "default": 0, | |
| "min": -9999, | |
| "max": 9999, | |
| "tooltip": ( | |
| "Frame index to start the conditioning at. " | |
| "Negative values are counted from the end of the video." | |
| ), | |
| }, | |
| ), | |
| "strength": ( | |
| "FLOAT", | |
| { | |
| "default": 1.0, | |
| "min": 0.0, | |
| "max": 1.0, | |
| "tooltip": "Strength of the conditioning. Higher values make it more exact.", | |
| }, | |
| ), | |
| "crf": ( | |
| "INT", | |
| { | |
| "default": 29, | |
| "min": 0, | |
| "max": 51, | |
| "step": 1, | |
| "tooltip": "CRF value. Higher = more motion, lower = higher quality.", | |
| }, | |
| ), | |
| "blur_radius": ( | |
| "INT", | |
| { | |
| "default": 0, | |
| "min": 0, | |
| "max": 7, | |
| "step": 1, | |
| "tooltip": "Blur kernel radius. Higher = more motion.", | |
| }, | |
| ), | |
| "interpolation": ( | |
| [ | |
| "lanczos", | |
| "bislerp", | |
| "nearest", | |
| "bilinear", | |
| "bicubic", | |
| "area", | |
| "nearest-exact", | |
| ], | |
| {"default": "lanczos"}, | |
| ), | |
| "crop": (["center", "disabled"], {"default": "disabled"}), | |
| "attention_strength": ( | |
| "FLOAT", | |
| { | |
| "default": 1.0, | |
| "min": 0.0, | |
| "max": 1.0, | |
| "step": 0.01, | |
| "tooltip": ( | |
| "Controls how strongly this guide influences generation via " | |
| "self-attention. 1.0 = full conditioning, 0.0 = ignore." | |
| ), | |
| }, | |
| ), | |
| }, | |
| "optional": { | |
| "attention_mask": ( | |
| "MASK", | |
| { | |
| "tooltip": ( | |
| "Optional pixel-space spatial mask. Shape (F, H, W) or (H, W). " | |
| "Values in [0, 1]. Controls per-region conditioning influence. " | |
| "Multiplied by attention_strength." | |
| ), | |
| }, | |
| ), | |
| }, | |
| } | |
| RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") | |
| RETURN_NAMES = ("positive", "negative", "latent") | |
| CATEGORY = "conditioning/video_models" | |
| FUNCTION = "generate" | |
| DESCRIPTION = ( | |
| "Adds a conditioning frame/video at a specific frame index with per-guide " | |
| "attention strength control. Same preprocessing as LTXVAddGuideAdvanced, " | |
| "plus attention_strength and optional spatial attention_mask." | |
| ) | |
| def generate( | |
| self, | |
| positive, | |
| negative, | |
| vae, | |
| latent, | |
| image, | |
| frame_idx, | |
| strength, | |
| crf, | |
| blur_radius, | |
| interpolation, | |
| crop, | |
| attention_strength=1.0, | |
| attention_mask=None, | |
| ): | |
| from .iclora_attention import append_guide_attention_entry, normalize_mask | |
| # Preprocessing: resize, CRF, blur (same as LTXVAddGuideAdvanced) | |
| scale_factors = vae.downscale_index_formula | |
| _, width_scale_factor, height_scale_factor = scale_factors | |
| latent_image = latent["samples"] | |
| noise_mask = nodes_lt.get_noise_mask(latent) | |
| _, _, latent_length, latent_height, latent_width = latent_image.shape | |
| width = latent_width * width_scale_factor | |
| height = latent_height * height_scale_factor | |
| image = ( | |
| comfy.utils.common_upscale( | |
| image.movedim(-1, 1), width, height, interpolation, crop=crop | |
| ) | |
| .movedim(1, -1) | |
| .clamp(0, 1) | |
| ) | |
| image = nodes_lt.LTXVPreprocess().execute(image, crf)[0] | |
| image = blur_internal(image, blur_radius) | |
| # Encode | |
| _, t = nodes_lt.LTXVAddGuide.encode( | |
| vae, latent_width, latent_height, image, scale_factors | |
| ) | |
| # Compute latent index | |
| frame_idx, latent_idx = nodes_lt.LTXVAddGuide.get_latent_index( | |
| positive, latent_length, len(image), frame_idx, scale_factors | |
| ) | |
| assert ( | |
| latent_idx + t.shape[2] <= latent_length | |
| ), "Conditioning frames exceed the length of the latent sequence." | |
| # Append keyframe | |
| positive, negative, latent_image, noise_mask = ( | |
| nodes_lt.LTXVAddGuide.append_keyframe( | |
| positive, | |
| negative, | |
| frame_idx, | |
| latent_image, | |
| noise_mask, | |
| t, | |
| strength, | |
| scale_factors, | |
| ) | |
| ) | |
| # Track with custom attention strength/mask | |
| pre_filter_count = t.shape[2] * t.shape[3] * t.shape[4] | |
| guide_latent_shape = list(t.shape[2:]) | |
| norm_mask = normalize_mask(attention_mask) | |
| positive = append_guide_attention_entry( | |
| positive, | |
| pre_filter_count, | |
| guide_latent_shape, | |
| attention_strength=attention_strength, | |
| attention_mask=norm_mask, | |
| ) | |
| negative = append_guide_attention_entry( | |
| negative, | |
| pre_filter_count, | |
| guide_latent_shape, | |
| attention_strength=attention_strength, | |
| attention_mask=norm_mask, | |
| ) | |
| return ( | |
| positive, | |
| negative, | |
| {"samples": latent_image, "noise_mask": noise_mask}, | |
| ) | |