Instructions to use vidfom/Ltx-3 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- llama-cpp-python
How to use vidfom/Ltx-3 with llama-cpp-python:
# !pip install llama-cpp-python from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="vidfom/Ltx-3", filename="ComfyUI/models/text_encoders/gemma-3-12b-it-qat-UD-Q4_K_XL.gguf", )
llm.create_chat_completion( messages = "No input example has been defined for this model task." )
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- llama.cpp
How to use vidfom/Ltx-3 with llama.cpp:
Install from brew
brew install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Install from WinGet (Windows)
winget install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Use pre-built binary
# Download pre-built binary from: # https://github.com/ggerganov/llama.cpp/releases # Start a local OpenAI-compatible server with a web UI: ./llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: ./llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Build from source code
git clone https://github.com/ggerganov/llama.cpp.git cd llama.cpp cmake -B build cmake --build build -j --target llama-server llama-cli # Start a local OpenAI-compatible server with a web UI: ./build/bin/llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: ./build/bin/llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Use Docker
docker model run hf.co/vidfom/Ltx-3:UD-Q4_K_XL
- LM Studio
- Jan
- Ollama
How to use vidfom/Ltx-3 with Ollama:
ollama run hf.co/vidfom/Ltx-3:UD-Q4_K_XL
- Unsloth Studio new
How to use vidfom/Ltx-3 with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for vidfom/Ltx-3 to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for vidfom/Ltx-3 to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for vidfom/Ltx-3 to start chatting
- Docker Model Runner
How to use vidfom/Ltx-3 with Docker Model Runner:
docker model run hf.co/vidfom/Ltx-3:UD-Q4_K_XL
- Lemonade
How to use vidfom/Ltx-3 with Lemonade:
Pull the model
# Download Lemonade from https://lemonade-server.ai/ lemonade pull vidfom/Ltx-3:UD-Q4_K_XL
Run and chat with the model
lemonade run user.Ltx-3-UD-Q4_K_XL
List all available models
lemonade list
| import torch | |
| import numpy as np | |
| from scipy.ndimage import gaussian_filter | |
| class HeatmapHead(torch.nn.Module): | |
| def __init__( | |
| self, | |
| in_channels=640, | |
| out_channels=133, | |
| input_size=(768, 1024), | |
| heatmap_scale=4, | |
| deconv_out_channels=(640,), | |
| deconv_kernel_sizes=(4,), | |
| conv_out_channels=(640,), | |
| conv_kernel_sizes=(1,), | |
| final_layer_kernel_size=1, | |
| device=None, dtype=None, operations=None | |
| ): | |
| super().__init__() | |
| self.heatmap_size = (input_size[0] // heatmap_scale, input_size[1] // heatmap_scale) | |
| self.scale_factor = ((np.array(input_size) - 1) / (np.array(self.heatmap_size) - 1)).astype(np.float32) | |
| # Deconv layers | |
| if deconv_out_channels: | |
| deconv_layers = [] | |
| for out_ch, kernel_size in zip(deconv_out_channels, deconv_kernel_sizes): | |
| if kernel_size == 4: | |
| padding, output_padding = 1, 0 | |
| elif kernel_size == 3: | |
| padding, output_padding = 1, 1 | |
| elif kernel_size == 2: | |
| padding, output_padding = 0, 0 | |
| else: | |
| raise ValueError(f'Unsupported kernel size {kernel_size}') | |
| deconv_layers.extend([ | |
| operations.ConvTranspose2d(in_channels, out_ch, kernel_size, | |
| stride=2, padding=padding, output_padding=output_padding, bias=False, device=device, dtype=dtype), | |
| torch.nn.InstanceNorm2d(out_ch, device=device, dtype=dtype), | |
| torch.nn.SiLU(inplace=True) | |
| ]) | |
| in_channels = out_ch | |
| self.deconv_layers = torch.nn.Sequential(*deconv_layers) | |
| else: | |
| self.deconv_layers = torch.nn.Identity() | |
| # Conv layers | |
| if conv_out_channels: | |
| conv_layers = [] | |
| for out_ch, kernel_size in zip(conv_out_channels, conv_kernel_sizes): | |
| padding = (kernel_size - 1) // 2 | |
| conv_layers.extend([ | |
| operations.Conv2d(in_channels, out_ch, kernel_size, | |
| stride=1, padding=padding, device=device, dtype=dtype), | |
| torch.nn.InstanceNorm2d(out_ch, device=device, dtype=dtype), | |
| torch.nn.SiLU(inplace=True) | |
| ]) | |
| in_channels = out_ch | |
| self.conv_layers = torch.nn.Sequential(*conv_layers) | |
| else: | |
| self.conv_layers = torch.nn.Identity() | |
| self.final_layer = operations.Conv2d(in_channels, out_channels, kernel_size=final_layer_kernel_size, padding=final_layer_kernel_size // 2, device=device, dtype=dtype) | |
| def forward(self, x): # Decode heatmaps to keypoints | |
| heatmaps = self.final_layer(self.conv_layers(self.deconv_layers(x))) | |
| heatmaps_np = heatmaps.float().cpu().numpy() # (B, K, H, W) | |
| B, K, H, W = heatmaps_np.shape | |
| batch_keypoints = [] | |
| batch_scores = [] | |
| for b in range(B): | |
| hm = heatmaps_np[b].copy() # (K, H, W) | |
| # --- vectorised argmax --- | |
| flat = hm.reshape(K, -1) | |
| idx = np.argmax(flat, axis=1) | |
| scores = flat[np.arange(K), idx].copy() | |
| y_locs, x_locs = np.unravel_index(idx, (H, W)) | |
| keypoints = np.stack([x_locs, y_locs], axis=-1).astype(np.float32) # (K, 2) in heatmap space | |
| invalid = scores <= 0. | |
| keypoints[invalid] = -1 | |
| # --- DARK sub-pixel refinement (UDP) --- | |
| # 1. Gaussian blur with max-preserving normalisation | |
| border = 5 # (kernel-1)//2 for kernel=11 | |
| for k in range(K): | |
| origin_max = np.max(hm[k]) | |
| dr = np.zeros((H + 2 * border, W + 2 * border), dtype=np.float32) | |
| dr[border:-border, border:-border] = hm[k].copy() | |
| dr = gaussian_filter(dr, sigma=2.0) | |
| hm[k] = dr[border:-border, border:-border].copy() | |
| cur_max = np.max(hm[k]) | |
| if cur_max > 0: | |
| hm[k] *= origin_max / cur_max | |
| # 2. Log-space for Taylor expansion | |
| np.clip(hm, 1e-3, 50., hm) | |
| np.log(hm, hm) | |
| # 3. Hessian-based Newton step | |
| hm_pad = np.pad(hm, ((0, 0), (1, 1), (1, 1)), mode='edge').flatten() | |
| index = keypoints[:, 0] + 1 + (keypoints[:, 1] + 1) * (W + 2) | |
| index += (W + 2) * (H + 2) * np.arange(0, K) | |
| index = index.astype(int).reshape(-1, 1) | |
| i_ = hm_pad[index] | |
| ix1 = hm_pad[index + 1] | |
| iy1 = hm_pad[index + W + 2] | |
| ix1y1 = hm_pad[index + W + 3] | |
| ix1_y1_ = hm_pad[index - W - 3] | |
| ix1_ = hm_pad[index - 1] | |
| iy1_ = hm_pad[index - 2 - W] | |
| dx = 0.5 * (ix1 - ix1_) | |
| dy = 0.5 * (iy1 - iy1_) | |
| derivative = np.concatenate([dx, dy], axis=1).reshape(K, 2, 1) | |
| dxx = ix1 - 2 * i_ + ix1_ | |
| dyy = iy1 - 2 * i_ + iy1_ | |
| dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_) | |
| hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1).reshape(K, 2, 2) | |
| hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2)) | |
| keypoints -= np.einsum('imn,ink->imk', hessian, derivative).squeeze(axis=-1) | |
| # --- restore to input image space --- | |
| keypoints = keypoints * self.scale_factor | |
| keypoints[invalid] = -1 | |
| batch_keypoints.append(keypoints) | |
| batch_scores.append(scores) | |
| return batch_keypoints, batch_scores | |