Instructions to use vidfom/Ltx-3 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- llama-cpp-python
How to use vidfom/Ltx-3 with llama-cpp-python:
# !pip install llama-cpp-python from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="vidfom/Ltx-3", filename="ComfyUI/models/text_encoders/gemma-3-12b-it-qat-UD-Q4_K_XL.gguf", )
llm.create_chat_completion( messages = "No input example has been defined for this model task." )
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- llama.cpp
How to use vidfom/Ltx-3 with llama.cpp:
Install from brew
brew install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Install from WinGet (Windows)
winget install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Use pre-built binary
# Download pre-built binary from: # https://github.com/ggerganov/llama.cpp/releases # Start a local OpenAI-compatible server with a web UI: ./llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: ./llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Build from source code
git clone https://github.com/ggerganov/llama.cpp.git cd llama.cpp cmake -B build cmake --build build -j --target llama-server llama-cli # Start a local OpenAI-compatible server with a web UI: ./build/bin/llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: ./build/bin/llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Use Docker
docker model run hf.co/vidfom/Ltx-3:UD-Q4_K_XL
- LM Studio
- Jan
- Ollama
How to use vidfom/Ltx-3 with Ollama:
ollama run hf.co/vidfom/Ltx-3:UD-Q4_K_XL
- Unsloth Studio new
How to use vidfom/Ltx-3 with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for vidfom/Ltx-3 to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for vidfom/Ltx-3 to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for vidfom/Ltx-3 to start chatting
- Docker Model Runner
How to use vidfom/Ltx-3 with Docker Model Runner:
docker model run hf.co/vidfom/Ltx-3:UD-Q4_K_XL
- Lemonade
How to use vidfom/Ltx-3 with Lemonade:
Pull the model
# Download Lemonade from https://lemonade-server.ai/ lemonade pull vidfom/Ltx-3:UD-Q4_K_XL
Run and chat with the model
lemonade run user.Ltx-3-UD-Q4_K_XL
List all available models
lemonade list
| #credit to Acly for this module | |
| #from https://github.com/Acly/comfyui-inpaint-nodes | |
| import torch | |
| import torch.nn.functional as F | |
| import comfy | |
| from comfy.model_base import BaseModel | |
| from comfy.model_patcher import ModelPatcher | |
| from comfy.model_management import cast_to_device | |
| from ...libs.log import log_node_warn, log_node_error, log_node_info | |
| class InpaintHead(torch.nn.Module): | |
| def __init__(self, *args, **kwargs): | |
| super().__init__(*args, **kwargs) | |
| self.head = torch.nn.Parameter(torch.empty(size=(320, 5, 3, 3), device="cpu")) | |
| def __call__(self, x): | |
| x = F.pad(x, (1, 1, 1, 1), "replicate") | |
| return F.conv2d(x, weight=self.head) | |
| # injected_model_patcher_calculate_weight = False | |
| # original_calculate_weight = None | |
| class applyFooocusInpaint: | |
| def calculate_weight_patched(self, patches, weight, key, intermediate_dtype=torch.float32): | |
| remaining = [] | |
| for p in patches: | |
| alpha = p[0] | |
| v = p[1] | |
| is_fooocus_patch = isinstance(v, tuple) and len(v) == 2 and v[0] == "fooocus" | |
| if not is_fooocus_patch: | |
| remaining.append(p) | |
| continue | |
| if alpha != 0.0: | |
| v = v[1] | |
| w1 = cast_to_device(v[0], weight.device, torch.float32) | |
| if w1.shape == weight.shape: | |
| w_min = cast_to_device(v[1], weight.device, torch.float32) | |
| w_max = cast_to_device(v[2], weight.device, torch.float32) | |
| w1 = (w1 / 255.0) * (w_max - w_min) + w_min | |
| weight += alpha * cast_to_device(w1, weight.device, weight.dtype) | |
| else: | |
| print( | |
| f"[ApplyFooocusInpaint] Shape mismatch {key}, weight not merged ({w1.shape} != {weight.shape})" | |
| ) | |
| if len(remaining) > 0: | |
| return self.original_calculate_weight(remaining, weight, key, intermediate_dtype) | |
| return weight | |
| def __enter__(self): | |
| try: | |
| print("[comfyui-easy-use] Injecting patched comfy.lora.calculate_weight.calculate_weight") | |
| self.original_calculate_weight = comfy.lora.calculate_weight | |
| comfy.lora.calculate_weight = self.calculate_weight_patched | |
| except AttributeError: | |
| print("[comfyui-easy-use] Injecting patched comfy.model_patcher.ModelPatcher.calculate_weight") | |
| self.original_calculate_weight = ModelPatcher.calculate_weight | |
| ModelPatcher.calculate_weight = self.calculate_weight_patched | |
| def __exit__(self, exc_type, exc_value, traceback): | |
| try: | |
| comfy.lora.calculate_weight = self.original_calculate_weight | |
| except: | |
| ModelPatcher.calculate_weight = self.original_calculate_weight | |
| # def inject_patched_calculate_weight(): | |
| # global injected_model_patcher_calculate_weight | |
| # if not injected_model_patcher_calculate_weight: | |
| # try: | |
| # print("[comfyui-easy-use] Injecting patched comfy.lora.calculate_weight.calculate_weight") | |
| # original_calculate_weight = comfy.lora.calculate_weight | |
| # comfy.lora.original_calculate_weight = original_calculate_weight | |
| # comfy.lora.calculate_weight = calculate_weight_patched | |
| # except AttributeError: | |
| # print("[comfyui-easy-use] Injecting patched comfy.model_patcher.ModelPatcher.calculate_weight") | |
| # original_calculate_weight = ModelPatcher.calculate_weight | |
| # ModelPatcher.original_calculate_weight = original_calculate_weight | |
| # ModelPatcher.calculate_weight = calculate_weight_patched | |
| # injected_model_patcher_calculate_weight = True | |
| class InpaintWorker: | |
| def __init__(self, node_name): | |
| self.node_name = node_name if node_name is not None else "" | |
| def load_fooocus_patch(self, lora: dict, to_load: dict): | |
| patch_dict = {} | |
| loaded_keys = set() | |
| for key in to_load.values(): | |
| if value := lora.get(key, None): | |
| patch_dict[key] = ("fooocus", value) | |
| loaded_keys.add(key) | |
| not_loaded = sum(1 for x in lora if x not in loaded_keys) | |
| if not_loaded > 0: | |
| log_node_info(self.node_name, | |
| f"{len(loaded_keys)} Lora keys loaded, {not_loaded} remaining keys not found in model." | |
| ) | |
| return patch_dict | |
| def _input_block_patch(self, h: torch.Tensor, transformer_options: dict): | |
| if transformer_options["block"][1] == 0: | |
| if self._inpaint_block is None or self._inpaint_block.shape != h.shape: | |
| assert self._inpaint_head_feature is not None | |
| batch = h.shape[0] // self._inpaint_head_feature.shape[0] | |
| self._inpaint_block = self._inpaint_head_feature.to(h).repeat(batch, 1, 1, 1) | |
| h = h + self._inpaint_block | |
| return h | |
| def patch(self, model, latent, patch): | |
| base_model: BaseModel = model.model | |
| latent_pixels = base_model.process_latent_in(latent["samples"]) | |
| noise_mask = latent["noise_mask"].round() | |
| latent_mask = F.max_pool2d(noise_mask, (8, 8)).round().to(latent_pixels) | |
| inpaint_head_model, inpaint_lora = patch | |
| feed = torch.cat([latent_mask, latent_pixels], dim=1) | |
| inpaint_head_model.to(device=feed.device, dtype=feed.dtype) | |
| self._inpaint_head_feature = inpaint_head_model(feed) | |
| self._inpaint_block = None | |
| lora_keys = comfy.lora.model_lora_keys_unet(model.model, {}) | |
| lora_keys.update({x: x for x in base_model.state_dict().keys()}) | |
| loaded_lora = self.load_fooocus_patch(inpaint_lora, lora_keys) | |
| m = model.clone() | |
| m.set_model_input_block_patch(self._input_block_patch) | |
| patched = m.add_patches(loaded_lora, 1.0) | |
| m.model_options['transformer_options']['fooocus'] = True | |
| not_patched_count = sum(1 for x in loaded_lora if x not in patched) | |
| if not_patched_count > 0: | |
| log_node_error(self.node_name, f"Failed to patch {not_patched_count} keys") | |
| # inject_patched_calculate_weight() | |
| return (m,) |