LTX-2.3-turbo / app.py
Exosfeer
Update default prompt to generic animation prompt
84adcd4
"""
LTX-2.3 Turbo — ZeroGPU Edition
Generates synchronized audio-video content using Lightricks/LTX-2.3 on
free ZeroGPU hardware via Hugging Face Spaces.
UI inspired by alexnasa/ltx-2-TURBO with full feature parity for LTX-2.3:
- Image-to-Video mode (first frame conditioning)
- Interpolate mode (first + last frame)
- Audio input (user provides audio for lip-sync/soundtrack)
- Custom UI components (RadioAnimated, PromptBox, CameraDropdown, AudioDropUpload)
- Duration presets (2s, 3s, 5s, 6s, 8s, 10s, 12s) and resolution selector with SVG icons
Architecture (following alexnasa/ltx-2-TURBO's proven ZeroGPU pattern):
1. Vendored ltx-core and ltx-pipelines added to sys.path before any imports.
2. Model files downloaded at module startup (CPU, no GPU lease).
3. ModelLedger constructed at module level (CPU-only dataclass, no CUDA init).
4. Text encoder loaded at module level (kept in memory for reuse).
5. DistilledPipeline constructed with gemma_root=None (no text encoder in pipeline).
6. Video encoder and transformer pre-loaded at module level via pipeline cache.
7. @spaces.GPU() on encode_prompt — encodes text, returns .detach().cpu() tensors.
8. @spaces.GPU(duration=callable) on generate_video — runs pipeline with pre-encoded
contexts passed as video_context/audio_context kwargs.
9. FP8 quantization fits the 22B transformer on ZeroGPU's A10G (40GB VRAM).
Based on the official LTX-2 codebase: https://github.com/Lightricks/LTX-2
Architecture inspired by alexnasa/ltx-2-TURBO.
"""
# ───────────────────────────────────────────────────────────────────────────
# 0) Add vendored packages to sys.path BEFORE any ltx imports
# ───────────────────────────────────────────────────────────────────────────
import sys
from pathlib import Path
_here = Path(__file__).parent
sys.path.insert(0, str(_here / "packages" / "ltx-pipelines" / "src"))
sys.path.insert(0, str(_here / "packages" / "ltx-core" / "src"))
# ───────────────────────────────────────────────────────────────────────────
# Standard library & third-party imports
# ───────────────────────────────────────────────────────────────────────────
import json
import logging
import os
import random
import subprocess
import tempfile
import time
import traceback
import uuid
from typing import Any
import gradio as gr
import numpy as np
import spaces
import torch
import torch.nn.functional as F
import torchaudio
from huggingface_hub import hf_hub_download, snapshot_download
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# ───────────────────────────────────────────────────────────────────────────
# LTX imports (from vendored packages)
# ───────────────────────────────────────────────────────────────────────────
from ltx_core.model.video_vae import TilingConfig
from ltx_core.quantization import QuantizationPolicy
from ltx_pipelines.distilled import DistilledPipeline
from ltx_pipelines.utils import ModelLedger
from ltx_pipelines.utils.args import ImageConditioningInput
from ltx_pipelines.utils.helpers import generate_enhanced_prompt
# ───────────────────────────────────────────────────────────────────────────
# Constants
# ───────────────────────────────────────────────────────────────────────────
MAX_SEED = np.iinfo(np.int32).max
LTX_REPO = "Lightricks/LTX-2.3"
GEMMA_REPO = "google/gemma-3-12b-it-qat-q4_0-unquantized"
CKPT_DISTILLED = "ltx-2.3-22b-distilled.safetensors"
CKPT_UPSCALER = "ltx-2.3-spatial-upscaler-x2-1.0.safetensors"
RESOLUTION_MAP = {
"16:9": (768, 512),
"1:1": (512, 512),
"9:16": (512, 768),
}
# ───────────────────────────────────────────────────────────────────────────
# Audio helper functions (ported from alexnasa/ltx-2-TURBO)
# ───────────────────────────────────────────────────────────────────────────
def _coerce_audio_path(audio_path: Any) -> str:
"""Handle Gradio's various audio path formats (tuple, dict, string)."""
if isinstance(audio_path, tuple) and len(audio_path) > 0:
audio_path = audio_path[0]
if isinstance(audio_path, dict):
audio_path = audio_path.get("name") or audio_path.get("path")
if not isinstance(audio_path, (str, bytes, os.PathLike)):
raise TypeError(
f"audio_path must be a path-like, got {type(audio_path)}: {audio_path}"
)
return os.fspath(audio_path)
def match_audio_to_duration(
audio_path: str,
target_seconds: float,
target_sr: int = 48000,
to_mono: bool = True,
pad_mode: str = "silence",
device: str = "cuda",
):
"""
Load audio, resample, (optionally) mono, then trim/pad to exactly target_seconds.
Returns: (waveform tensor, sample_rate)
"""
audio_path = _coerce_audio_path(audio_path)
wav, sr = torchaudio.load(audio_path) # [C, T] float32 CPU
if sr != target_sr:
wav = torchaudio.functional.resample(wav, sr, target_sr)
sr = target_sr
if to_mono and wav.shape[0] > 1:
wav = wav.mean(dim=0, keepdim=True)
target_len = int(round(target_seconds * sr))
cur_len = wav.shape[-1]
if cur_len > target_len:
wav = wav[..., :target_len]
elif cur_len < target_len:
pad_len = target_len - cur_len
if pad_mode == "repeat" and cur_len > 0:
reps = (target_len + cur_len - 1) // cur_len
wav = wav.repeat(1, reps)[..., :target_len]
else:
wav = F.pad(wav, (0, pad_len))
wav = wav.to(device, non_blocking=True)
return wav, sr
# ───────────────────────────────────────────────────────────────────────────
# 1) Download model files at module startup (CPU, no GPU lease)
# ───────────────────────────────────────────────────────────────────────────
logger.info("Downloading LTX model files...")
checkpoint_path = hf_hub_download(repo_id=LTX_REPO, filename=CKPT_DISTILLED)
logger.info(f" Distilled checkpoint: {checkpoint_path}")
spatial_upsampler_path = hf_hub_download(repo_id=LTX_REPO, filename=CKPT_UPSCALER)
logger.info(f" Upscaler: {spatial_upsampler_path}")
logger.info("Downloading Gemma text encoder...")
HF_TOKEN = os.environ.get("HF_TOKEN")
gemma_root = snapshot_download(repo_id=GEMMA_REPO, token=HF_TOKEN)
logger.info(f" Gemma root: {gemma_root}")
logger.info("All model files ready on disk.")
# ───────────────────────────────────────────────────────────────────────────
# 2) Construct ModelLedger (CPU — no model weights loaded to GPU)
# ───────────────────────────────────────────────────────────────────────────
logger.info("Constructing ModelLedger (with Gemma for text encoding)...")
fp8_quantization = QuantizationPolicy.fp8_cast()
model_ledger = ModelLedger(
dtype=torch.bfloat16,
device="cuda",
checkpoint_path=checkpoint_path,
gemma_root_path=gemma_root,
spatial_upsampler_path=spatial_upsampler_path,
loras=(),
quantization=fp8_quantization,
)
logger.info("ModelLedger constructed.")
# ───────────────────────────────────────────────────────────────────────────
# 3) Load text encoder at module level (kept in memory for reuse)
# ───────────────────────────────────────────────────────────────────────────
logger.info("Loading Gemma text encoder...")
text_encoder = model_ledger.text_encoder()
logger.info("Text encoder loaded and ready!")
# ───────────────────────────────────────────────────────────────────────────
# 4) Construct DistilledPipeline WITHOUT text encoder (gemma_root=None)
# ───────────────────────────────────────────────────────────────────────────
logger.info("Constructing DistilledPipeline (gemma_root=None)...")
pipeline = DistilledPipeline(
device=torch.device("cuda"),
checkpoint_path=checkpoint_path,
spatial_upsampler_path=spatial_upsampler_path,
gemma_root=None,
loras=[],
quantization=fp8_quantization,
)
# ───────────────────────────────────────────────────────────────────────────
# 5) Pre-load video encoder and transformer at module level
# ───────────────────────────────────────────────────────────────────────────
logger.info("Pre-loading video encoder and transformer...")
pipeline._video_encoder = pipeline.model_ledger.video_encoder()
pipeline._transformer = pipeline.model_ledger.transformer()
logger.info("=" * 60)
logger.info("Pipeline fully loaded and ready!")
logger.info("=" * 60)
# ───────────────────────────────────────────────────────────────────────────
# Helpers
# ───────────────────────────────────────────────────────────────────────────
def calc_frames(duration: float, fps: float) -> int:
"""Compute num_frames = 8k + 1, frames >= 9."""
raw = int(duration * fps) + 1
raw = max(raw, 9)
k = (raw - 1 + 7) // 8
return k * 8 + 1
def encode_text_simple(te, prompt: str):
"""Simple text encoding without using pipeline_utils."""
hidden_states, attention_mask = te.encode(prompt)
embeddings_processor = model_ledger.gemma_embeddings_processor()
result = embeddings_processor.process_hidden_states(hidden_states, attention_mask)
del embeddings_processor
return result.video_encoding, result.audio_encoding
def apply_resolution(resolution: str):
w, h = RESOLUTION_MAP.get(resolution, (768, 512))
return int(w), int(h)
def apply_duration(duration_str: str):
return int(duration_str[:-1])
def on_mode_change(selected: str):
is_interpolate = selected == "Interpolate"
return gr.update(visible=is_interpolate)
def get_duration(
first_frame,
end_frame,
prompt,
duration,
generation_mode,
enhance_prompt,
seed,
randomize_seed,
height,
width,
audio_path,
*args,
**kwargs,
):
"""Estimate GPU lease duration for @spaces.GPU(duration=...)."""
extra_time = 0
if audio_path is not None:
extra_time += 10
dur = float(duration)
if dur <= 6:
return 200 + extra_time
elif dur <= 8:
return 250 + extra_time
elif dur <= 10:
return 300 + extra_time
else:
return 350 + extra_time
# ───────────────────────────────────────────────────────────────────────────
# Phase 1: Text Encoding (separate GPU lease)
# ───────────────────────────────────────────────────────────────────────────
@spaces.GPU()
def encode_prompt(
prompt: str,
enhance_prompt: bool = True,
input_image=None,
seed: int = 42,
):
"""
Encode prompt using the module-level text_encoder + embeddings_processor.
Returns a dict with video_context and audio_context tensors on CPU.
"""
logger.info(f"[encode_prompt] prompt='{prompt[:80]}...', enhance={enhance_prompt}")
final_prompt = prompt
if enhance_prompt:
final_prompt = generate_enhanced_prompt(
text_encoder=text_encoder,
prompt=prompt,
image_path=input_image if input_image is not None else None,
seed=seed,
)
logger.info(f"[encode_prompt] Enhanced prompt: '{final_prompt[:120]}...'")
with torch.inference_mode():
video_context, audio_context = encode_text_simple(text_encoder, final_prompt)
embedding_data = {
"video_context": video_context.detach().cpu(),
"audio_context": audio_context.detach().cpu(),
"prompt": final_prompt,
}
logger.info("[encode_prompt] Done.")
return embedding_data, final_prompt
# ───────────────────────────────────────────────────────────────────────────
# Phase 2: Video Generation (separate GPU lease, dynamic duration)
# ───────────────────────────────────────────────────────────────────────────
@spaces.GPU(duration=get_duration)
def generate_video(
first_frame,
end_frame,
prompt: str,
duration: float,
generation_mode: str = "Image-to-Video",
enhance_prompt: bool = True,
seed: int = 42,
randomize_seed: bool = True,
height: int = 512,
width: int = 768,
audio_path=None,
progress=gr.Progress(track_tqdm=True),
):
"""
Full generation: encode prompt then run pipeline with pre-encoded contexts.
Supports Image-to-Video, Interpolate, and audio input modes.
"""
if not prompt or not prompt.strip():
raise gr.Error("Please enter a prompt.")
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
num_frames = calc_frames(duration, 24.0)
frame_rate = 24.0
logger.info(
f"[generate_video] mode={generation_mode}, seed={current_seed}, {width}x{height}, "
f"frames={num_frames}, duration={duration}s, enhance={enhance_prompt}, "
f"audio={'yes' if audio_path else 'no'}"
)
# --- Handle input images ---
images = []
image_path_for_enhance = None
if first_frame is not None:
# first_frame is filepath from gr.Image(type="filepath")
if isinstance(first_frame, str):
img_path = first_frame
else:
tmp_dir = tempfile.mkdtemp()
img_path = os.path.join(tmp_dir, f"input_{int(time.time())}.png")
if hasattr(first_frame, "save"):
first_frame.save(img_path)
else:
from PIL import Image as PILImage
PILImage.open(first_frame).save(img_path)
images.append((img_path, 0, 1.0))
image_path_for_enhance = img_path
# Interpolation: add end frame as guiding latent
if generation_mode == "Interpolate" and end_frame is not None:
if isinstance(end_frame, str):
end_path = end_frame
else:
tmp_dir = tempfile.mkdtemp()
end_path = os.path.join(tmp_dir, f"end_{int(time.time())}.png")
if hasattr(end_frame, "save"):
end_frame.save(end_path)
else:
from PIL import Image as PILImage
PILImage.open(end_frame).save(end_path)
end_idx = max(0, num_frames - 1)
images.append((end_path, end_idx, 0.5))
t0 = time.time()
try:
# Phase 1: Encode prompt
embeddings, final_prompt = encode_prompt(
prompt=prompt,
enhance_prompt=enhance_prompt,
input_image=image_path_for_enhance,
seed=current_seed,
)
video_context = embeddings["video_context"].to("cuda", non_blocking=True)
audio_context = embeddings["audio_context"].to("cuda", non_blocking=True)
del embeddings
torch.cuda.empty_cache()
# If user provided audio, use a neutral audio_context (encode empty prompt)
if audio_path is not None:
with torch.inference_mode():
_, neutral_audio_context = encode_text_simple(text_encoder, "")
del audio_context
audio_context = neutral_audio_context
# Prepare audio waveform if provided
input_waveform = None
input_waveform_sample_rate = None
if audio_path is not None:
video_seconds = (num_frames - 1) / frame_rate
input_waveform, input_waveform_sample_rate = match_audio_to_duration(
audio_path=audio_path,
target_seconds=video_seconds,
target_sr=48000,
to_mono=True,
pad_mode="silence",
device="cuda",
)
torch.cuda.empty_cache()
# Phase 2: Run pipeline with pre-encoded contexts
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
output_path = tmpfile.name
with torch.inference_mode():
pipeline(
prompt=prompt,
output_path=output_path,
seed=current_seed,
height=height,
width=width,
num_frames=num_frames,
frame_rate=frame_rate,
images=images,
tiling_config=TilingConfig.default(),
video_context=video_context,
audio_context=audio_context,
input_waveform=input_waveform,
input_waveform_sample_rate=input_waveform_sample_rate,
)
del video_context, audio_context
if input_waveform is not None:
del input_waveform
torch.cuda.empty_cache()
elapsed = time.time() - t0
logger.info(f"[generate_video] Done in {elapsed:.1f}s")
except torch.cuda.OutOfMemoryError:
elapsed = time.time() - t0
logger.error(f"OOM after {elapsed:.1f}s")
raise gr.Error("Out of GPU memory. Try a shorter duration or lower resolution.")
except Exception as e:
elapsed = time.time() - t0
tb = traceback.format_exc()
logger.error(f"Generation failed after {elapsed:.1f}s:\n{tb}")
raise gr.Error(f"Generation failed: {type(e).__name__}: {e}")
# Build metadata HTML
meta_parts = [
f'<span class="meta-chip"><b>Seed</b> {current_seed}</span>',
f'<span class="meta-chip"><b>Resolution</b> {width}&times;{height}</span>',
f'<span class="meta-chip"><b>Duration</b> {duration}s</span>',
f'<span class="meta-chip"><b>Time</b> {elapsed:.1f}s</span>',
]
meta_html = '<div class="gen-meta-card">'
meta_html += '<div class="meta-chips">' + "".join(meta_parts) + "</div>"
if enhance_prompt and final_prompt and final_prompt != prompt:
escaped = (
final_prompt.replace("&", "&amp;")
.replace("<", "&lt;")
.replace(">", "&gt;")
.replace('"', "&quot;")
)
meta_html += (
'<div class="meta-enhanced-prompt">'
'<span class="meta-ep-label">Enhanced Prompt</span>'
f'<p class="meta-ep-text">{escaped}</p>'
"</div>"
)
meta_html += "</div>"
return str(output_path), meta_html
# ───────────────────────────────────────────────────────────────────────────
# Custom UI Components (ported from alexnasa/ltx-2-TURBO)
# ───────────────────────────────────────────────────────────────────────────
class RadioAnimated(gr.HTML):
"""Animated segmented radio (like iOS pill selector)."""
def __init__(self, choices, value=None, **kwargs):
if not choices or len(choices) < 2:
raise ValueError("RadioAnimated requires at least 2 choices.")
if value is None:
value = choices[0]
uid = uuid.uuid4().hex[:8]
group_name = f"ra-{uid}"
inputs_html = "\n".join(
f'<input class="ra-input" type="radio" name="{group_name}" '
f'id="{group_name}-{i}" value="{c}">'
f'<label class="ra-label" for="{group_name}-{i}">{c}</label>'
for i, c in enumerate(choices)
)
html_template = f"""
<div class="ra-wrap" data-ra="{uid}">
<div class="ra-inner">
<div class="ra-highlight"></div>
{inputs_html}
</div>
</div>
"""
js_on_load = r"""
(() => {
const wrap = element.querySelector('.ra-wrap');
const inner = element.querySelector('.ra-inner');
const highlight = element.querySelector('.ra-highlight');
const inputs = Array.from(element.querySelectorAll('.ra-input'));
const labels = Array.from(element.querySelectorAll('.ra-label'));
if (!inputs.length || !labels.length) return;
const choices = inputs.map(i => i.value);
const PAD = 6;
let currentIdx = 0;
function setHighlightByIndex(idx) {
currentIdx = idx;
const lbl = labels[idx];
if (!lbl) return;
const innerRect = inner.getBoundingClientRect();
const lblRect = lbl.getBoundingClientRect();
highlight.style.width = `${lblRect.width}px`;
const x = (lblRect.left - innerRect.left - PAD);
highlight.style.transform = `translateX(${x}px)`;
}
function setCheckedByValue(val, shouldTrigger=false) {
const idx = Math.max(0, choices.indexOf(val));
inputs.forEach((inp, i) => { inp.checked = (i === idx); });
requestAnimationFrame(() => setHighlightByIndex(idx));
props.value = choices[idx];
if (shouldTrigger) trigger('change', props.value);
}
setCheckedByValue(props.value ?? choices[0], false);
inputs.forEach((inp) => {
inp.addEventListener('change', () => setCheckedByValue(inp.value, true));
});
window.addEventListener('resize', () => setHighlightByIndex(currentIdx));
let last = props.value;
const syncFromProps = () => {
if (props.value !== last) {
last = props.value;
setCheckedByValue(last, false);
}
requestAnimationFrame(syncFromProps);
};
requestAnimationFrame(syncFromProps);
})();
"""
super().__init__(
value=value,
html_template=html_template,
js_on_load=js_on_load,
**kwargs,
)
class PromptBox(gr.HTML):
"""Prompt textarea with an internal footer slot for embedding dropdowns."""
def __init__(self, value="", placeholder="Describe what you want...", **kwargs):
uid = uuid.uuid4().hex[:8]
html_template = f"""
<div class="ds-card" data-ds="{uid}">
<div class="ds-top">
<textarea class="ds-textarea" rows="3" placeholder="{placeholder}"></textarea>
<div class="ds-footer" aria-label="prompt-footer"></div>
</div>
</div>
"""
js_on_load = r"""
(() => {
const textarea = element.querySelector(".ds-textarea");
if (!textarea) return;
const autosize = () => {
textarea.style.height = "0px";
textarea.style.height = Math.min(textarea.scrollHeight, 240) + "px";
};
const setValue = (v, triggerChange=false) => {
const val = (v ?? "");
if (textarea.value !== val) textarea.value = val;
autosize();
props.value = textarea.value;
if (triggerChange) trigger("change", props.value);
};
setValue(props.value, false);
textarea.addEventListener("input", () => {
autosize();
props.value = textarea.value;
trigger("change", props.value);
});
const shouldAutoFocus = () => {
const ae = document.activeElement;
if (ae && ae !== document.body && ae !== document.documentElement) return false;
if (window.matchMedia && window.matchMedia("(max-width: 768px)").matches) return false;
return true;
};
const focusWithRetry = (tries = 30) => {
if (!shouldAutoFocus()) return;
if (document.activeElement !== textarea) textarea.focus({ preventScroll: true });
if (document.activeElement === textarea) return;
if (tries > 0) requestAnimationFrame(() => focusWithRetry(tries - 1));
};
requestAnimationFrame(() => focusWithRetry());
let last = props.value;
const syncFromProps = () => {
if (props.value !== last) {
last = props.value;
setValue(last, false);
}
requestAnimationFrame(syncFromProps);
};
requestAnimationFrame(syncFromProps);
})();
"""
super().__init__(
value=value,
html_template=html_template,
js_on_load=js_on_load,
**kwargs,
)
class CameraDropdown(gr.HTML):
"""Custom dropdown with optional icons per item."""
def __init__(self, choices, value="None", title="Dropdown", **kwargs):
if not choices:
raise ValueError("CameraDropdown requires choices.")
norm = []
for c in choices:
if isinstance(c, dict):
label = str(c.get("label", c.get("value", "")))
val = str(c.get("value", label))
icon = c.get("icon", None)
norm.append({"label": label, "value": val, "icon": icon})
else:
s = str(c)
norm.append({"label": s, "value": s, "icon": None})
uid = uuid.uuid4().hex[:8]
def render_item(item):
icon_html = ""
if item["icon"]:
icon_html = f'<span class="cd-icn">{item["icon"]}</span>'
return (
f'<button type="button" class="cd-item" '
f'data-value="{item["value"]}">'
f'{icon_html}<span class="cd-label">{item["label"]}</span>'
f"</button>"
)
items_html = "\n".join(render_item(item) for item in norm)
html_template = f"""
<div class="cd-wrap" data-cd="{uid}">
<button type="button" class="cd-trigger" aria-haspopup="menu" aria-expanded="false">
<span class="cd-trigger-icon"></span>
<span class="cd-trigger-text"></span>
<span class="cd-caret">&#x25BE;</span>
</button>
<div class="cd-menu" role="menu" aria-hidden="true">
<div class="cd-title">{title}</div>
<div class="cd-items">
{items_html}
</div>
</div>
</div>
"""
value_to_label = {it["value"]: it["label"] for it in norm}
value_to_icon = {it["value"]: (it["icon"] or "") for it in norm}
js_on_load = r"""
(() => {
const wrap = element.querySelector(".cd-wrap");
const trigger = element.querySelector(".cd-trigger");
const triggerIcon = element.querySelector(".cd-trigger-icon");
const triggerText = element.querySelector(".cd-trigger-text");
const menu = element.querySelector(".cd-menu");
const items = Array.from(element.querySelectorAll(".cd-item"));
if (!wrap || !trigger || !menu || !items.length) return;
const valueToLabel = __VALUE_TO_LABEL__;
const valueToIcon = __VALUE_TO_ICON__;
const safeLabel = (v) => (valueToLabel && valueToLabel[v]) ? valueToLabel[v] : (v ?? "None");
const safeIcon = (v) => (valueToIcon && valueToIcon[v]) ? valueToIcon[v] : "";
function closeMenu() {
menu.classList.remove("open");
trigger.setAttribute("aria-expanded", "false");
menu.setAttribute("aria-hidden", "true");
}
function openMenu() {
menu.classList.add("open");
trigger.setAttribute("aria-expanded", "true");
menu.setAttribute("aria-hidden", "false");
}
function setValue(val, shouldTrigger = false) {
const v = (val ?? "None");
props.value = v;
triggerText.textContent = safeLabel(v);
if (triggerIcon) {
triggerIcon.innerHTML = safeIcon(v);
triggerIcon.style.display = safeIcon(v) ? "inline-flex" : "none";
}
items.forEach(btn => {
btn.dataset.selected = (btn.dataset.value === v) ? "true" : "false";
});
if (shouldTrigger) trigger("change", props.value);
}
trigger.addEventListener("pointerdown", (e) => {
e.preventDefault();
e.stopPropagation();
if (menu.classList.contains("open")) closeMenu();
else openMenu();
});
document.addEventListener("pointerdown", (e) => {
if (!wrap.contains(e.target)) closeMenu();
}, true);
document.addEventListener("keydown", (e) => {
if (e.key === "Escape") closeMenu();
});
wrap.addEventListener("focusout", (e) => {
if (!wrap.contains(e.relatedTarget)) closeMenu();
});
items.forEach((btn) => {
btn.addEventListener("pointerdown", (e) => {
e.preventDefault();
e.stopPropagation();
closeMenu();
setValue(btn.dataset.value, true);
});
});
setValue((props.value ?? "None"), false);
let last = props.value;
const syncFromProps = () => {
if (props.value !== last) {
last = props.value;
setValue(last, false);
}
requestAnimationFrame(syncFromProps);
};
requestAnimationFrame(syncFromProps);
})();
"""
js_on_load = js_on_load.replace(
"__VALUE_TO_LABEL__", json.dumps(value_to_label)
)
js_on_load = js_on_load.replace("__VALUE_TO_ICON__", json.dumps(value_to_icon))
super().__init__(
value=value,
html_template=html_template,
js_on_load=js_on_load,
**kwargs,
)
class AudioDropUpload(gr.HTML):
"""Custom audio drop/click UI that proxies file into a hidden gr.File component."""
def __init__(self, target_audio_elem_id: str, value=None, **kwargs):
uid = uuid.uuid4().hex[:8]
html_template = f"""
<div class="aud-wrap" data-aud="{uid}">
<div class="aud-drop" role="button" tabindex="0" aria-label="Upload audio">
<div><strong>(Optional) Drag &amp; drop an audio file here</strong></div>
<div class="aud-hint">...or click to browse</div>
</div>
<div class="aud-row" aria-live="polite">
<audio class="aud-player" controls></audio>
<button class="aud-remove" type="button" aria-label="Remove audio">
<svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false">
<path d="M18 6L6 18M6 6l12 12"
stroke="currentColor" stroke-width="2.25" stroke-linecap="round"/>
</svg>
</button>
</div>
<div class="aud-filelabel"></div>
</div>
"""
js_on_load = r"""
(() => {
function grRoot() {
const ga = document.querySelector("gradio-app");
return (ga && ga.shadowRoot) ? ga.shadowRoot : document;
}
const root = grRoot();
const wrap = element.querySelector(".aud-wrap");
const drop = element.querySelector(".aud-drop");
const row = element.querySelector(".aud-row");
const player = element.querySelector(".aud-player");
const removeBtn = element.querySelector(".aud-remove");
const label = element.querySelector(".aud-filelabel");
const TARGET_ID = "__TARGET_ID__";
let currentUrl = null;
function findHiddenAudioFileInput() {
const host = root.querySelector("#" + CSS.escape(TARGET_ID));
if (!host) return null;
const inp = host.querySelector('input[type="file"]');
return inp;
}
function showDrop() {
drop.style.display = "";
row.style.display = "none";
label.style.display = "none";
label.textContent = "";
}
function showPlayer(filename) {
drop.style.display = "none";
row.style.display = "flex";
if (filename) {
label.textContent = "Loaded: " + filename;
label.style.display = "block";
}
}
function clearPreview() {
player.pause();
player.removeAttribute("src");
player.load();
if (currentUrl) {
URL.revokeObjectURL(currentUrl);
currentUrl = null;
}
}
function clearHiddenGradioAudio() {
const fileInput = findHiddenAudioFileInput();
if (!fileInput) return;
fileInput.value = "";
const dt = new DataTransfer();
fileInput.files = dt.files;
fileInput.dispatchEvent(new Event("input", { bubbles: true }));
fileInput.dispatchEvent(new Event("change", { bubbles: true }));
}
function clearAll() {
clearPreview();
clearHiddenGradioAudio();
props.value = "__CLEAR__";
trigger("change", props.value);
showDrop();
}
function loadFileToPreview(file) {
if (!file) return;
if (!file.type || !file.type.startsWith("audio/")) {
alert("Please choose an audio file.");
return;
}
clearPreview();
currentUrl = URL.createObjectURL(file);
player.src = currentUrl;
showPlayer(file.name);
}
function pushFileIntoHiddenGradioAudio(file) {
const fileInput = findHiddenAudioFileInput();
if (!fileInput) {
console.warn("Could not find hidden gr.File input. Check elem_id:", TARGET_ID);
return;
}
fileInput.value = "";
const dt = new DataTransfer();
dt.items.add(file);
fileInput.files = dt.files;
fileInput.dispatchEvent(new Event("input", { bubbles: true }));
fileInput.dispatchEvent(new Event("change", { bubbles: true }));
}
function handleFile(file) {
loadFileToPreview(file);
pushFileIntoHiddenGradioAudio(file);
}
const localPicker = document.createElement("input");
localPicker.type = "file";
localPicker.accept = "audio/*";
localPicker.style.display = "none";
wrap.appendChild(localPicker);
localPicker.addEventListener("change", () => {
const f = localPicker.files && localPicker.files[0];
if (f) handleFile(f);
localPicker.value = "";
});
drop.addEventListener("click", () => localPicker.click());
drop.addEventListener("keydown", (e) => {
if (e.key === "Enter" || e.key === " ") {
e.preventDefault();
localPicker.click();
}
});
removeBtn.addEventListener("click", clearAll);
["dragenter","dragover","dragleave","drop"].forEach(evt => {
drop.addEventListener(evt, (e) => {
e.preventDefault();
e.stopPropagation();
});
});
drop.addEventListener("dragover", () => drop.classList.add("dragover"));
drop.addEventListener("dragleave", () => drop.classList.remove("dragover"));
drop.addEventListener("drop", (e) => {
drop.classList.remove("dragover");
const f = e.dataTransfer.files && e.dataTransfer.files[0];
if (f) handleFile(f);
});
showDrop();
function setPreviewFromPath(path) {
if (path === "__CLEAR__") path = null;
if (!path) {
clearPreview();
showDrop();
return;
}
let url = path;
if (!/^https?:\/\//.test(path) && !path.startsWith("gradio_api/file=") && !path.startsWith("/file=")) {
url = "gradio_api/file=" + path;
}
clearPreview();
player.src = url;
showPlayer(path.split("/").pop());
}
let last = props.value;
const syncFromProps = () => {
const v = props.value;
if (v !== last) {
last = v;
if (!v || v === "__CLEAR__") setPreviewFromPath(null);
else setPreviewFromPath(String(v));
}
requestAnimationFrame(syncFromProps);
};
requestAnimationFrame(syncFromProps);
})();
"""
js_on_load = js_on_load.replace("__TARGET_ID__", target_audio_elem_id)
super().__init__(
value=value,
html_template=html_template,
js_on_load=js_on_load,
**kwargs,
)
# ───────────────────────────────────────────────────────────────────────────
# CSS (dark theme, ported from alexnasa/ltx-2-TURBO)
# ───────────────────────────────────────────────────────────────────────────
CSS = """
/* ---- layout ---- */
#controls-row {
display: none !important;
align-items: center;
gap: 12px;
flex-wrap: nowrap;
}
#controls-row > * {
flex: 0 0 auto !important;
width: auto !important;
min-width: 0 !important;
}
#col-container {
margin: 0 auto;
max-width: 1600px;
}
#step-column {
padding: 10px;
border-radius: 8px;
box-shadow: var(--card-shadow);
margin: 10px;
}
/* ---- generation metadata card ---- */
.gen-meta-card {
margin-top: 10px;
padding: 12px 16px;
border-radius: 10px;
background: rgba(255, 255, 255, 0.04);
border: 1px solid rgba(255, 255, 255, 0.08);
}
.meta-chips {
display: flex;
flex-wrap: wrap;
gap: 8px;
}
.meta-chip {
display: inline-flex;
align-items: center;
gap: 5px;
padding: 4px 10px;
border-radius: 6px;
background: rgba(255, 255, 255, 0.06);
font-size: 13px;
color: rgba(255, 255, 255, 0.75);
font-family: monospace;
}
.meta-chip b {
color: rgba(255, 255, 255, 0.45);
font-weight: 500;
font-family: sans-serif;
font-size: 11px;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.meta-enhanced-prompt {
margin-top: 10px;
padding-top: 10px;
border-top: 1px solid rgba(255, 255, 255, 0.06);
}
.meta-ep-label {
font-size: 11px;
font-weight: 500;
text-transform: uppercase;
letter-spacing: 0.5px;
color: rgba(255, 255, 255, 0.4);
}
.meta-ep-text {
margin: 6px 0 0 0;
font-size: 13px;
line-height: 1.5;
color: rgba(255, 255, 255, 0.65);
max-height: 120px;
overflow-y: auto;
white-space: pre-wrap;
word-break: break-word;
}
/* ---- generate button ---- */
.button-gradient {
background: linear-gradient(45deg, rgb(255, 65, 108), rgb(255, 75, 43), rgb(255, 155, 0), rgb(255, 65, 108)) 0% 0% / 400% 400%;
border: none;
padding: 14px 28px;
font-size: 16px;
font-weight: bold;
color: white;
border-radius: 10px;
cursor: pointer;
transition: 0.3s ease-in-out;
animation: 2s linear 0s infinite normal none running gradientAnimation;
box-shadow: rgba(255, 65, 108, 0.6) 0px 4px 10px;
}
@keyframes gradientAnimation {
0% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
100% { background-position: 0% 50%; }
}
/* ---- mode row ---- */
#mode-row {
display: flex !important;
justify-content: center !important;
align-items: center !important;
width: 100% !important;
}
#mode-row > * {
flex: 0 0 auto !important;
width: auto !important;
min-width: 0 !important;
}
#mode-row .gr-html,
#mode-row .gradio-html,
#mode-row .prose,
#mode-row .block {
width: auto !important;
flex: 0 0 auto !important;
display: inline-block !important;
}
#radioanimated_mode {
display: inline-flex !important;
justify-content: center !important;
width: auto !important;
}
/* ---- radioanimated ---- */
.ra-wrap { width: fit-content; }
.ra-inner {
position: relative;
display: inline-flex;
align-items: center;
gap: 0;
padding: 6px;
background: #0b0b0b;
border-radius: 9999px;
overflow: hidden;
user-select: none;
}
.ra-input { display: none; }
.ra-label {
position: relative;
z-index: 2;
padding: 10px 18px;
font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial;
font-size: 14px;
font-weight: 600;
color: rgba(255,255,255,0.7);
cursor: pointer;
transition: color 180ms ease;
white-space: nowrap;
}
.ra-highlight {
position: absolute;
z-index: 1;
top: 6px;
left: 6px;
height: calc(100% - 12px);
border-radius: 9999px;
background: #8bff97;
transition: transform 200ms ease, width 200ms ease;
}
.ra-input:checked + .ra-label { color: rgba(0,0,0,0.75); }
/* ---- prompt box ---- */
.ds-card {
width: 100%;
max-width: 720px;
margin: 0 auto;
position: relative;
z-index: 50;
}
.ds-top {
position: relative;
background: #2b2b2b;
border: 1px solid rgba(255,255,255,0.12);
border-radius: 14px;
overflow: visible !important;
}
.ds-textarea {
width: 100%;
box-sizing: border-box;
background: transparent !important;
border: none !important;
border-radius: 0 !important;
color: rgba(255,255,255,0.9);
padding: 14px 16px;
padding-bottom: 72px;
outline: none;
font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial;
font-size: 15px;
line-height: 1.35;
resize: none;
min-height: 210px;
max-height: 210px;
overflow-y: auto;
scrollbar-width: none;
position: relative;
z-index: 1;
}
.ds-textarea::-webkit-scrollbar { width: 0; height: 0; }
.ds-textarea:focus,
.ds-textarea:focus-visible { outline: none !important; box-shadow: none !important; }
.ds-textarea { outline: none !important; }
.ds-top:focus-within {
border-color: rgba(255,255,255,0.22) !important;
box-shadow: 0 0 0 3px rgba(255,255,255,0.06) !important;
border-radius: 14px !important;
}
.ds-top { border-radius: 14px !important; }
.ds-top::after {
content: "";
position: absolute;
left: 0; right: 0; bottom: 0;
height: 56px;
background: #2b2b2b;
border-bottom-left-radius: 14px !important;
border-bottom-right-radius: 14px !important;
pointer-events: none;
z-index: 2;
}
.ds-footer {
position: absolute;
right: 12px;
bottom: 10px;
display: flex;
gap: 8px;
align-items: center;
justify-content: flex-end;
z-index: 20 !important;
}
.ds-footer .cd-trigger {
min-height: 32px;
padding: 6px 10px;
font-size: 12px;
gap: 6px;
border-radius: 9999px;
}
.ds-footer .cd-trigger-icon,
.ds-footer .cd-icn { width: 14px; height: 14px; }
.ds-footer .cd-trigger-icon svg,
.ds-footer .cd-icn svg { width: 14px; height: 14px; }
.ds-footer .cd-caret { font-size: 11px; }
.ds-footer .cd-menu { z-index: 999999 !important; }
/* ---- camera dropdown ---- */
.cd-wrap { position: relative; display: inline-block; }
.cd-trigger {
margin-top: 2px;
display: inline-flex;
align-items: center;
justify-content: center;
gap: 10px;
border: none;
box-sizing: border-box;
padding: 10px 18px;
min-height: 52px;
line-height: 1.2;
border-radius: 9999px;
background: #0b0b0b;
font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial;
font-size: 14px;
color: rgba(255,255,255,0.7) !important;
font-weight: 600 !important;
cursor: pointer;
user-select: none;
white-space: nowrap;
}
.cd-trigger .cd-trigger-text,
.cd-trigger .cd-caret { color: rgba(255,255,255,0.7) !important; }
.cd-caret { opacity: 0.8; font-weight: 900; }
.cd-trigger-icon {
color: rgba(255,255,255,0.9);
display: inline-flex;
align-items: center;
justify-content: center;
width: 18px; height: 18px;
}
.cd-trigger-icon svg { width: 18px; height: 18px; display: block; }
.cd-menu {
position: absolute;
top: calc(100% + 4px);
left: 0;
min-width: 240px;
background: #2b2b2b !important;
border: 1px solid rgba(255,255,255,0.14) !important;
border-radius: 14px;
box-shadow: 0 18px 40px rgba(0,0,0,0.35);
padding: 10px;
opacity: 0;
transform: translateY(-6px);
pointer-events: none;
transition: opacity 160ms ease, transform 160ms ease;
z-index: 9999;
}
.cd-menu.open {
opacity: 1;
transform: translateY(0);
pointer-events: auto;
}
.cd-title {
font-size: 12px;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.04em;
color: rgba(255,255,255,0.55) !important;
margin-bottom: 6px;
padding: 0 6px;
pointer-events: none;
}
.cd-items { display: flex; flex-direction: column; gap: 0px; }
.cd-item {
width: 100%;
text-align: left;
border: none;
background: transparent;
color: rgba(255,255,255,0.92) !important;
padding: 8px 34px 8px 12px;
border-radius: 10px;
cursor: pointer;
font-size: 14px;
font-weight: 700;
position: relative;
transition: background 120ms ease;
display: flex;
align-items: center;
gap: 10px;
}
.cd-item * { color: rgba(255,255,255,0.92) !important; }
.cd-item:hover { background: rgba(255,255,255,0.10) !important; }
.cd-item::after {
content: "\\2713";
position: absolute;
right: 12px;
top: 50%;
transform: translateY(-50%);
opacity: 0;
transition: opacity 120ms ease;
color: rgba(255,255,255,0.92) !important;
font-weight: 900;
}
.cd-item[data-selected="true"]::after { opacity: 1; }
.cd-item.selected {
background: transparent !important;
border: none !important;
}
.cd-icn {
display: inline-flex;
align-items: center;
justify-content: center;
width: 18px; height: 18px;
flex: 0 0 18px;
}
.cd-icn svg { width: 18px; height: 18px; display: block; }
.cd-icn svg * { stroke: rgba(255,255,255,0.9); }
.cd-label { flex: 1; }
.cd-trigger, .cd-trigger * { color: rgba(255,255,255,0.75) !important; }
/* ---- AudioDropUpload ---- */
.aud-wrap { width: 100%; max-width: 720px; }
.aud-drop {
border: 2px dashed var(--body-text-color-subdued);
border-radius: 16px;
padding: 18px;
text-align: center;
cursor: pointer;
user-select: none;
color: var(--body-text-color);
background: var(--block-background-fill);
}
.aud-drop.dragover {
border-color: rgba(255,255,255,0.35);
background: rgba(255,255,255,0.06);
}
.aud-hint {
color: var(--body-text-color-subdued);
font-size: 0.95rem;
margin-top: 6px;
}
.aud-row {
display: none;
align-items: center;
gap: 10px;
background: #0b0b0b;
border-radius: 9999px;
padding: 8px 10px;
}
.aud-player {
flex: 1;
width: 100%;
height: 34px;
border-radius: 9999px;
}
.aud-remove {
appearance: none;
border: none;
background: transparent;
color: rgba(255,255,255);
cursor: pointer;
width: 36px; height: 36px;
border-radius: 9999px;
display: inline-flex;
align-items: center;
justify-content: center;
padding: 0;
transition: background 120ms ease, color 120ms ease, opacity 120ms ease;
opacity: 0.9;
flex: 0 0 auto;
}
.aud-remove:hover {
background: rgba(255,255,255,0.08);
color: rgb(255,255,255);
opacity: 1;
}
.aud-filelabel {
margin: 10px 6px 0;
color: var(--body-text-color-subdued);
font-size: 0.95rem;
display: none;
}
#audio_input_hidden { display: none !important; }
"""
# ───────────────────────────────────────────────────────────────────────────
# SVG icons for resolution dropdown
# ───────────────────────────────────────────────────────────────────────────
ICON_16_9 = """<svg viewBox="0 0 24 24" fill="none" aria-hidden="true">
<rect x="3" y="7" width="18" height="10" rx="2" stroke="currentColor" stroke-width="2"/>
</svg>"""
ICON_1_1 = """<svg viewBox="0 0 24 24" fill="none" aria-hidden="true">
<rect x="6" y="6" width="12" height="12" rx="2" stroke="currentColor" stroke-width="2"/>
</svg>"""
ICON_9_16 = """<svg viewBox="0 0 24 24" fill="none" aria-hidden="true">
<rect x="7" y="3" width="10" height="18" rx="2" stroke="currentColor" stroke-width="2"/>
</svg>"""
# ───────────────────────────────────────────────────────────────────────────
# Gradio UI
# ───────────────────────────────────────────────────────────────────────────
with gr.Blocks(title="LTX-2.3 Video [Turbo]", css=CSS) as demo:
gr.HTML(
"""
<div style="text-align: center;">
<p style="font-size:16px; display: inline; margin: 0;">
<strong>LTX-2.3 Video [Turbo]</strong> &mdash; 22B DiT audio-video model on free ZeroGPU
</p>
<a href="https://huggingface.co/Lightricks/LTX-2.3"
target="_blank" rel="noopener noreferrer"
style="display: inline-block; vertical-align: middle; margin-left: 0.5em;">
[model]
</a>
<a href="https://github.com/Lightricks/LTX-2"
target="_blank" rel="noopener noreferrer"
style="display: inline-block; vertical-align: middle; margin-left: 0.5em;">
[github]
</a>
</div>
<div style="text-align: center; margin-top: 4px;">
<strong>HF Space by:</strong>
<a href="https://huggingface.co/ZeroCollabs" target="_blank" rel="noopener noreferrer"
style="display: inline-block; vertical-align: middle; margin-left: 0.5em;">
<img src="https://img.shields.io/badge/%F0%9F%A4%97-Follow%20on%20HF-green.svg" alt="Follow on HF">
</a>
<a href="https://github.com/ZeroHackz" target="_blank" rel="noopener noreferrer"
style="display: inline-block; vertical-align: middle; margin-left: 0.5em;">
<img src="https://img.shields.io/badge/GitHub-Follow-181717?logo=github" alt="Follow on GitHub">
</a>
</div>
"""
)
with gr.Column(elem_id="col-container"):
# ---- Mode selector ----
with gr.Row(elem_id="mode-row"):
radioanimated_mode = RadioAnimated(
choices=["Image-to-Video", "Interpolate"],
value="Image-to-Video",
elem_id="radioanimated_mode",
)
with gr.Row():
# ---- Left column: controls ----
with gr.Column(elem_id="step-column"):
with gr.Row():
first_frame = gr.Image(
label="First Frame (Optional)",
type="filepath",
height=256,
)
end_frame = gr.Image(
label="Last Frame (Optional)",
type="filepath",
height=256,
visible=False,
)
# JS relocator: moves duration & resolution dropdowns into prompt footer
relocate = gr.HTML(
value="",
html_template="<div></div>",
js_on_load=r"""
(() => {
function moveIntoFooter() {
const promptRoot = document.querySelector("#prompt_ui");
if (!promptRoot) return false;
const footer = promptRoot.querySelector(".ds-footer");
if (!footer) return false;
const dur = document.querySelector("#duration_ui .cd-wrap");
const res = document.querySelector("#resolution_ui .cd-wrap");
if (!dur || !res) return false;
footer.appendChild(dur);
footer.appendChild(res);
return true;
}
const tick = () => {
if (!moveIntoFooter()) requestAnimationFrame(tick);
};
requestAnimationFrame(tick);
})();
""",
)
prompt_ui = PromptBox(
value="make this image come alive, cinematic motion, smooth animation",
elem_id="prompt_ui",
)
# Hidden real audio input (backend value)
audio_input = gr.File(
label="Audio (Optional)",
file_types=["audio"],
type="filepath",
elem_id="audio_input_hidden",
)
# Custom audio UI that feeds the hidden gr.File
audio_ui = AudioDropUpload(
target_audio_elem_id="audio_input_hidden",
elem_id="audio_ui",
)
# Hidden prompt textbox (synced from PromptBox)
prompt = gr.Textbox(
label="Prompt",
value="make this image come alive, cinematic motion, smooth animation",
lines=3,
max_lines=3,
visible=False,
)
enhance_prompt = gr.Checkbox(
label="Enhance Prompt",
value=True,
)
with gr.Accordion("Advanced Settings", open=False):
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
value=42,
step=1,
)
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
# ---- Right column: output + hidden controls ----
with gr.Column(elem_id="step-column"):
output_video = gr.Video(
label="Generated Video", autoplay=True, loop=True, height=512
)
metadata_display = gr.HTML(value="", elem_id="generation-metadata")
with gr.Row(elem_id="controls-row"):
duration_ui = CameraDropdown(
choices=["6s", "8s", "10s", "12s"],
value="6s",
title="Clip Duration",
elem_id="duration_ui",
)
duration = gr.Slider(
label="Duration (seconds)",
minimum=6.0,
maximum=12.0,
value=6.0,
step=2.0,
visible=False,
)
resolution_ui = CameraDropdown(
choices=[
{"label": "16:9", "value": "16:9", "icon": ICON_16_9},
{"label": "1:1", "value": "1:1", "icon": ICON_1_1},
{"label": "9:16", "value": "9:16", "icon": ICON_9_16},
],
value="9:16",
title="Resolution",
elem_id="resolution_ui",
)
width = gr.Number(
label="Width", value=512, precision=0, visible=False
)
height = gr.Number(
label="Height", value=768, precision=0, visible=False
)
generate_btn = gr.Button(
"Generate Video",
variant="primary",
elem_classes="button-gradient",
)
# ────────────────────────────────────────────────────────────────────
# Event wiring
# ────────────────────────────────────────────────────────────────────
# Mode selector -> show/hide end_frame
radioanimated_mode.change(
fn=on_mode_change,
inputs=radioanimated_mode,
outputs=[end_frame],
api_visibility="private",
)
# Duration dropdown -> hidden slider
duration_ui.change(
fn=apply_duration,
inputs=duration_ui,
outputs=[duration],
api_visibility="private",
)
# Resolution dropdown -> hidden width/height
resolution_ui.change(
fn=apply_resolution,
inputs=resolution_ui,
outputs=[width, height],
api_visibility="private",
)
# PromptBox -> hidden textbox
prompt_ui.change(
fn=lambda x: x,
inputs=prompt_ui,
outputs=prompt,
api_visibility="private",
)
# Audio UI clear handler
def on_audio_ui_change(v):
if v == "__CLEAR__" or v is None or v == "":
return None
return gr.update()
audio_ui.change(
fn=on_audio_ui_change,
inputs=audio_ui,
outputs=audio_input,
api_visibility="private",
)
# Generate button
generate_btn.click(
fn=generate_video,
inputs=[
first_frame,
end_frame,
prompt,
duration,
radioanimated_mode,
enhance_prompt,
seed,
randomize_seed,
height,
width,
audio_input,
],
outputs=[output_video, metadata_display],
)
# ---- Footer ----
gr.Markdown(
"""
---
**Notes:**
- ZeroGPU provides limited GPU time per request. Shorter durations are more reliable.
- Max duration is 12 seconds. Longer clips need more GPU time and may be slower.
- FP8 quantization reduces VRAM usage by ~50% with minimal quality impact.
- The 2x spatial upscaler doubles the initial generation resolution.
- This Space uses [google/gemma-3-12b-it-qat-q4_0-unquantized](https://huggingface.co/google/gemma-3-12b-it-qat-q4_0-unquantized) as the text encoder.
If you duplicate this Space, you must first accept the [Gemma license](https://huggingface.co/google/gemma-3-12b-it-qat-q4_0-unquantized) on your HuggingFace account.
Built with [Lightricks/LTX-2.3](https://huggingface.co/Lightricks/LTX-2.3)
| [GitHub](https://github.com/Lightricks/LTX-2)
| Space by [ZeroCollabs](https://huggingface.co/ZeroCollabs)
| [GitHub](https://github.com/ZeroHackz)
<sub>UI inspired by [alexnasa](https://huggingface.co/alexnasa)</sub>
"""
)
if __name__ == "__main__":
demo.launch(show_error=True)