Instructions to use vidfom/Ltx-3 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- llama-cpp-python
How to use vidfom/Ltx-3 with llama-cpp-python:
# !pip install llama-cpp-python from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="vidfom/Ltx-3", filename="ComfyUI/models/text_encoders/gemma-3-12b-it-qat-UD-Q4_K_XL.gguf", )
llm.create_chat_completion( messages = "No input example has been defined for this model task." )
- Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- llama.cpp
How to use vidfom/Ltx-3 with llama.cpp:
Install from brew
brew install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Install from WinGet (Windows)
winget install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Use pre-built binary
# Download pre-built binary from: # https://github.com/ggerganov/llama.cpp/releases # Start a local OpenAI-compatible server with a web UI: ./llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: ./llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Build from source code
git clone https://github.com/ggerganov/llama.cpp.git cd llama.cpp cmake -B build cmake --build build -j --target llama-server llama-cli # Start a local OpenAI-compatible server with a web UI: ./build/bin/llama-server -hf vidfom/Ltx-3:UD-Q4_K_XL # Run inference directly in the terminal: ./build/bin/llama-cli -hf vidfom/Ltx-3:UD-Q4_K_XL
Use Docker
docker model run hf.co/vidfom/Ltx-3:UD-Q4_K_XL
- LM Studio
- Jan
- Ollama
How to use vidfom/Ltx-3 with Ollama:
ollama run hf.co/vidfom/Ltx-3:UD-Q4_K_XL
- Unsloth Studio
How to use vidfom/Ltx-3 with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for vidfom/Ltx-3 to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for vidfom/Ltx-3 to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for vidfom/Ltx-3 to start chatting
- Docker Model Runner
How to use vidfom/Ltx-3 with Docker Model Runner:
docker model run hf.co/vidfom/Ltx-3:UD-Q4_K_XL
- Lemonade
How to use vidfom/Ltx-3 with Lemonade:
Pull the model
# Download Lemonade from https://lemonade-server.ai/ lemonade pull vidfom/Ltx-3:UD-Q4_K_XL
Run and chat with the model
lemonade run user.Ltx-3-UD-Q4_K_XL
List all available models
lemonade list
| # code adapted from: https://github.com/Stability-AI/stable-audio-tools | |
| import torch | |
| from torch import nn | |
| from typing import Literal | |
| import math | |
| import comfy.ops | |
| ops = comfy.ops.disable_weight_init | |
| def vae_sample(mean, scale): | |
| stdev = nn.functional.softplus(scale) + 1e-4 | |
| var = stdev * stdev | |
| logvar = torch.log(var) | |
| latents = torch.randn_like(mean) * stdev + mean | |
| kl = (mean * mean + var - logvar - 1).sum(1).mean() | |
| return latents, kl | |
| class VAEBottleneck(nn.Module): | |
| def __init__(self): | |
| super().__init__() | |
| self.is_discrete = False | |
| def encode(self, x, return_info=False, **kwargs): | |
| info = {} | |
| mean, scale = x.chunk(2, dim=1) | |
| x, kl = vae_sample(mean, scale) | |
| info["kl"] = kl | |
| if return_info: | |
| return x, info | |
| else: | |
| return x | |
| def decode(self, x): | |
| return x | |
| def snake_beta(x, alpha, beta): | |
| return x + (1.0 / (beta + 0.000000001)) * pow(torch.sin(x * alpha), 2) | |
| # Adapted from https://github.com/NVIDIA/BigVGAN/blob/main/activations.py under MIT license | |
| class SnakeBeta(nn.Module): | |
| def __init__(self, in_features, alpha=1.0, alpha_trainable=True, alpha_logscale=True): | |
| super(SnakeBeta, self).__init__() | |
| self.in_features = in_features | |
| # initialize alpha | |
| self.alpha_logscale = alpha_logscale | |
| if self.alpha_logscale: # log scale alphas initialized to zeros | |
| self.alpha = nn.Parameter(torch.zeros(in_features) * alpha) | |
| self.beta = nn.Parameter(torch.zeros(in_features) * alpha) | |
| else: # linear scale alphas initialized to ones | |
| self.alpha = nn.Parameter(torch.ones(in_features) * alpha) | |
| self.beta = nn.Parameter(torch.ones(in_features) * alpha) | |
| # self.alpha.requires_grad = alpha_trainable | |
| # self.beta.requires_grad = alpha_trainable | |
| self.no_div_by_zero = 0.000000001 | |
| def forward(self, x): | |
| alpha = self.alpha.unsqueeze(0).unsqueeze(-1).to(x.device) # line up with x to [B, C, T] | |
| beta = self.beta.unsqueeze(0).unsqueeze(-1).to(x.device) | |
| if self.alpha_logscale: | |
| alpha = torch.exp(alpha) | |
| beta = torch.exp(beta) | |
| x = snake_beta(x, alpha, beta) | |
| return x | |
| def WNConv1d(*args, **kwargs): | |
| return torch.nn.utils.parametrizations.weight_norm(ops.Conv1d(*args, **kwargs)) | |
| def WNConvTranspose1d(*args, **kwargs): | |
| return torch.nn.utils.parametrizations.weight_norm(ops.ConvTranspose1d(*args, **kwargs)) | |
| def get_activation(activation: Literal["elu", "snake", "none"], antialias=False, channels=None) -> nn.Module: | |
| if activation == "elu": | |
| act = torch.nn.ELU() | |
| elif activation == "snake": | |
| act = SnakeBeta(channels) | |
| elif activation == "none": | |
| act = torch.nn.Identity() | |
| else: | |
| raise ValueError(f"Unknown activation {activation}") | |
| if antialias: | |
| act = Activation1d(act) # noqa: F821 Activation1d is not defined | |
| return act | |
| class ResidualUnit(nn.Module): | |
| def __init__(self, in_channels, out_channels, dilation, use_snake=False, antialias_activation=False): | |
| super().__init__() | |
| self.dilation = dilation | |
| padding = (dilation * (7-1)) // 2 | |
| self.layers = nn.Sequential( | |
| get_activation("snake" if use_snake else "elu", antialias=antialias_activation, channels=out_channels), | |
| WNConv1d(in_channels=in_channels, out_channels=out_channels, | |
| kernel_size=7, dilation=dilation, padding=padding), | |
| get_activation("snake" if use_snake else "elu", antialias=antialias_activation, channels=out_channels), | |
| WNConv1d(in_channels=out_channels, out_channels=out_channels, | |
| kernel_size=1) | |
| ) | |
| def forward(self, x): | |
| res = x | |
| #x = checkpoint(self.layers, x) | |
| x = self.layers(x) | |
| return x + res | |
| class EncoderBlock(nn.Module): | |
| def __init__(self, in_channels, out_channels, stride, use_snake=False, antialias_activation=False): | |
| super().__init__() | |
| self.layers = nn.Sequential( | |
| ResidualUnit(in_channels=in_channels, | |
| out_channels=in_channels, dilation=1, use_snake=use_snake), | |
| ResidualUnit(in_channels=in_channels, | |
| out_channels=in_channels, dilation=3, use_snake=use_snake), | |
| ResidualUnit(in_channels=in_channels, | |
| out_channels=in_channels, dilation=9, use_snake=use_snake), | |
| get_activation("snake" if use_snake else "elu", antialias=antialias_activation, channels=in_channels), | |
| WNConv1d(in_channels=in_channels, out_channels=out_channels, | |
| kernel_size=2*stride, stride=stride, padding=math.ceil(stride/2)), | |
| ) | |
| def forward(self, x): | |
| return self.layers(x) | |
| class DecoderBlock(nn.Module): | |
| def __init__(self, in_channels, out_channels, stride, use_snake=False, antialias_activation=False, use_nearest_upsample=False): | |
| super().__init__() | |
| if use_nearest_upsample: | |
| upsample_layer = nn.Sequential( | |
| nn.Upsample(scale_factor=stride, mode="nearest"), | |
| WNConv1d(in_channels=in_channels, | |
| out_channels=out_channels, | |
| kernel_size=2*stride, | |
| stride=1, | |
| bias=False, | |
| padding='same') | |
| ) | |
| else: | |
| upsample_layer = WNConvTranspose1d(in_channels=in_channels, | |
| out_channels=out_channels, | |
| kernel_size=2*stride, stride=stride, padding=math.ceil(stride/2)) | |
| self.layers = nn.Sequential( | |
| get_activation("snake" if use_snake else "elu", antialias=antialias_activation, channels=in_channels), | |
| upsample_layer, | |
| ResidualUnit(in_channels=out_channels, out_channels=out_channels, | |
| dilation=1, use_snake=use_snake), | |
| ResidualUnit(in_channels=out_channels, out_channels=out_channels, | |
| dilation=3, use_snake=use_snake), | |
| ResidualUnit(in_channels=out_channels, out_channels=out_channels, | |
| dilation=9, use_snake=use_snake), | |
| ) | |
| def forward(self, x): | |
| return self.layers(x) | |
| class OobleckEncoder(nn.Module): | |
| def __init__(self, | |
| in_channels=2, | |
| channels=128, | |
| latent_dim=32, | |
| c_mults = [1, 2, 4, 8], | |
| strides = [2, 4, 8, 8], | |
| use_snake=False, | |
| antialias_activation=False | |
| ): | |
| super().__init__() | |
| c_mults = [1] + c_mults | |
| self.depth = len(c_mults) | |
| layers = [ | |
| WNConv1d(in_channels=in_channels, out_channels=c_mults[0] * channels, kernel_size=7, padding=3) | |
| ] | |
| for i in range(self.depth-1): | |
| layers += [EncoderBlock(in_channels=c_mults[i]*channels, out_channels=c_mults[i+1]*channels, stride=strides[i], use_snake=use_snake)] | |
| layers += [ | |
| get_activation("snake" if use_snake else "elu", antialias=antialias_activation, channels=c_mults[-1] * channels), | |
| WNConv1d(in_channels=c_mults[-1]*channels, out_channels=latent_dim, kernel_size=3, padding=1) | |
| ] | |
| self.layers = nn.Sequential(*layers) | |
| def forward(self, x): | |
| return self.layers(x) | |
| class OobleckDecoder(nn.Module): | |
| def __init__(self, | |
| out_channels=2, | |
| channels=128, | |
| latent_dim=32, | |
| c_mults = [1, 2, 4, 8], | |
| strides = [2, 4, 8, 8], | |
| use_snake=False, | |
| antialias_activation=False, | |
| use_nearest_upsample=False, | |
| final_tanh=True): | |
| super().__init__() | |
| c_mults = [1] + c_mults | |
| self.depth = len(c_mults) | |
| layers = [ | |
| WNConv1d(in_channels=latent_dim, out_channels=c_mults[-1]*channels, kernel_size=7, padding=3), | |
| ] | |
| for i in range(self.depth-1, 0, -1): | |
| layers += [DecoderBlock( | |
| in_channels=c_mults[i]*channels, | |
| out_channels=c_mults[i-1]*channels, | |
| stride=strides[i-1], | |
| use_snake=use_snake, | |
| antialias_activation=antialias_activation, | |
| use_nearest_upsample=use_nearest_upsample | |
| ) | |
| ] | |
| layers += [ | |
| get_activation("snake" if use_snake else "elu", antialias=antialias_activation, channels=c_mults[0] * channels), | |
| WNConv1d(in_channels=c_mults[0] * channels, out_channels=out_channels, kernel_size=7, padding=3, bias=False), | |
| nn.Tanh() if final_tanh else nn.Identity() | |
| ] | |
| self.layers = nn.Sequential(*layers) | |
| def forward(self, x): | |
| return self.layers(x) | |
| class AudioOobleckVAE(nn.Module): | |
| def __init__(self, | |
| in_channels=2, | |
| channels=128, | |
| latent_dim=64, | |
| c_mults = [1, 2, 4, 8, 16], | |
| strides = [2, 4, 4, 8, 8], | |
| use_snake=True, | |
| antialias_activation=False, | |
| use_nearest_upsample=False, | |
| final_tanh=False): | |
| super().__init__() | |
| self.encoder = OobleckEncoder(in_channels, channels, latent_dim * 2, c_mults, strides, use_snake, antialias_activation) | |
| self.decoder = OobleckDecoder(in_channels, channels, latent_dim, c_mults, strides, use_snake, antialias_activation, | |
| use_nearest_upsample=use_nearest_upsample, final_tanh=final_tanh) | |
| self.bottleneck = VAEBottleneck() | |
| def encode(self, x): | |
| return self.bottleneck.encode(self.encoder(x)) | |
| def decode(self, x): | |
| return self.decoder(self.bottleneck.decode(x)) | |