Spaces:
Running on Zero
Running on Zero
| """ | |
| LTX-2.3 Turbo — ZeroGPU Edition | |
| Generates synchronized audio-video content using Lightricks/LTX-2.3 on | |
| free ZeroGPU hardware via Hugging Face Spaces. | |
| UI inspired by alexnasa/ltx-2-TURBO with full feature parity for LTX-2.3: | |
| - Image-to-Video mode (first frame conditioning) | |
| - Interpolate mode (first + last frame) | |
| - Audio input (user provides audio for lip-sync/soundtrack) | |
| - Custom UI components (RadioAnimated, PromptBox, CameraDropdown, AudioDropUpload) | |
| - Duration presets (2s, 3s, 5s, 6s, 8s, 10s, 12s) and resolution selector with SVG icons | |
| Architecture (following alexnasa/ltx-2-TURBO's proven ZeroGPU pattern): | |
| 1. Vendored ltx-core and ltx-pipelines added to sys.path before any imports. | |
| 2. Model files downloaded at module startup (CPU, no GPU lease). | |
| 3. ModelLedger constructed at module level (CPU-only dataclass, no CUDA init). | |
| 4. Text encoder loaded at module level (kept in memory for reuse). | |
| 5. DistilledPipeline constructed with gemma_root=None (no text encoder in pipeline). | |
| 6. Video encoder and transformer pre-loaded at module level via pipeline cache. | |
| 7. @spaces.GPU() on encode_prompt — encodes text, returns .detach().cpu() tensors. | |
| 8. @spaces.GPU(duration=callable) on generate_video — runs pipeline with pre-encoded | |
| contexts passed as video_context/audio_context kwargs. | |
| 9. FP8 quantization fits the 22B transformer on ZeroGPU's A10G (40GB VRAM). | |
| Based on the official LTX-2 codebase: https://github.com/Lightricks/LTX-2 | |
| Architecture inspired by alexnasa/ltx-2-TURBO. | |
| """ | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # 0) Add vendored packages to sys.path BEFORE any ltx imports | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| import sys | |
| from pathlib import Path | |
| _here = Path(__file__).parent | |
| sys.path.insert(0, str(_here / "packages" / "ltx-pipelines" / "src")) | |
| sys.path.insert(0, str(_here / "packages" / "ltx-core" / "src")) | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # Standard library & third-party imports | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| import json | |
| import logging | |
| import os | |
| import random | |
| import subprocess | |
| import tempfile | |
| import time | |
| import traceback | |
| import uuid | |
| from typing import Any | |
| import gradio as gr | |
| import numpy as np | |
| import spaces | |
| import torch | |
| import torch.nn.functional as F | |
| import torchaudio | |
| from huggingface_hub import hf_hub_download, snapshot_download | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # LTX imports (from vendored packages) | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| from ltx_core.model.video_vae import TilingConfig | |
| from ltx_core.quantization import QuantizationPolicy | |
| from ltx_pipelines.distilled import DistilledPipeline | |
| from ltx_pipelines.utils import ModelLedger | |
| from ltx_pipelines.utils.args import ImageConditioningInput | |
| from ltx_pipelines.utils.helpers import generate_enhanced_prompt | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # Constants | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| MAX_SEED = np.iinfo(np.int32).max | |
| LTX_REPO = "Lightricks/LTX-2.3" | |
| GEMMA_REPO = "google/gemma-3-12b-it-qat-q4_0-unquantized" | |
| CKPT_DISTILLED = "ltx-2.3-22b-distilled.safetensors" | |
| CKPT_UPSCALER = "ltx-2.3-spatial-upscaler-x2-1.0.safetensors" | |
| RESOLUTION_MAP = { | |
| "16:9": (768, 512), | |
| "1:1": (512, 512), | |
| "9:16": (512, 768), | |
| } | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # Audio helper functions (ported from alexnasa/ltx-2-TURBO) | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| def _coerce_audio_path(audio_path: Any) -> str: | |
| """Handle Gradio's various audio path formats (tuple, dict, string).""" | |
| if isinstance(audio_path, tuple) and len(audio_path) > 0: | |
| audio_path = audio_path[0] | |
| if isinstance(audio_path, dict): | |
| audio_path = audio_path.get("name") or audio_path.get("path") | |
| if not isinstance(audio_path, (str, bytes, os.PathLike)): | |
| raise TypeError( | |
| f"audio_path must be a path-like, got {type(audio_path)}: {audio_path}" | |
| ) | |
| return os.fspath(audio_path) | |
| def match_audio_to_duration( | |
| audio_path: str, | |
| target_seconds: float, | |
| target_sr: int = 48000, | |
| to_mono: bool = True, | |
| pad_mode: str = "silence", | |
| device: str = "cuda", | |
| ): | |
| """ | |
| Load audio, resample, (optionally) mono, then trim/pad to exactly target_seconds. | |
| Returns: (waveform tensor, sample_rate) | |
| """ | |
| audio_path = _coerce_audio_path(audio_path) | |
| wav, sr = torchaudio.load(audio_path) # [C, T] float32 CPU | |
| if sr != target_sr: | |
| wav = torchaudio.functional.resample(wav, sr, target_sr) | |
| sr = target_sr | |
| if to_mono and wav.shape[0] > 1: | |
| wav = wav.mean(dim=0, keepdim=True) | |
| target_len = int(round(target_seconds * sr)) | |
| cur_len = wav.shape[-1] | |
| if cur_len > target_len: | |
| wav = wav[..., :target_len] | |
| elif cur_len < target_len: | |
| pad_len = target_len - cur_len | |
| if pad_mode == "repeat" and cur_len > 0: | |
| reps = (target_len + cur_len - 1) // cur_len | |
| wav = wav.repeat(1, reps)[..., :target_len] | |
| else: | |
| wav = F.pad(wav, (0, pad_len)) | |
| wav = wav.to(device, non_blocking=True) | |
| return wav, sr | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # 1) Download model files at module startup (CPU, no GPU lease) | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| logger.info("Downloading LTX model files...") | |
| checkpoint_path = hf_hub_download(repo_id=LTX_REPO, filename=CKPT_DISTILLED) | |
| logger.info(f" Distilled checkpoint: {checkpoint_path}") | |
| spatial_upsampler_path = hf_hub_download(repo_id=LTX_REPO, filename=CKPT_UPSCALER) | |
| logger.info(f" Upscaler: {spatial_upsampler_path}") | |
| logger.info("Downloading Gemma text encoder...") | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| gemma_root = snapshot_download(repo_id=GEMMA_REPO, token=HF_TOKEN) | |
| logger.info(f" Gemma root: {gemma_root}") | |
| logger.info("All model files ready on disk.") | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # 2) Construct ModelLedger (CPU — no model weights loaded to GPU) | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| logger.info("Constructing ModelLedger (with Gemma for text encoding)...") | |
| fp8_quantization = QuantizationPolicy.fp8_cast() | |
| model_ledger = ModelLedger( | |
| dtype=torch.bfloat16, | |
| device="cuda", | |
| checkpoint_path=checkpoint_path, | |
| gemma_root_path=gemma_root, | |
| spatial_upsampler_path=spatial_upsampler_path, | |
| loras=(), | |
| quantization=fp8_quantization, | |
| ) | |
| logger.info("ModelLedger constructed.") | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # 3) Load text encoder at module level (kept in memory for reuse) | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| logger.info("Loading Gemma text encoder...") | |
| text_encoder = model_ledger.text_encoder() | |
| logger.info("Text encoder loaded and ready!") | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # 4) Construct DistilledPipeline WITHOUT text encoder (gemma_root=None) | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| logger.info("Constructing DistilledPipeline (gemma_root=None)...") | |
| pipeline = DistilledPipeline( | |
| device=torch.device("cuda"), | |
| checkpoint_path=checkpoint_path, | |
| spatial_upsampler_path=spatial_upsampler_path, | |
| gemma_root=None, | |
| loras=[], | |
| quantization=fp8_quantization, | |
| ) | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # 5) Pre-load video encoder and transformer at module level | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| logger.info("Pre-loading video encoder and transformer...") | |
| pipeline._video_encoder = pipeline.model_ledger.video_encoder() | |
| pipeline._transformer = pipeline.model_ledger.transformer() | |
| logger.info("=" * 60) | |
| logger.info("Pipeline fully loaded and ready!") | |
| logger.info("=" * 60) | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # Helpers | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| def calc_frames(duration: float, fps: float) -> int: | |
| """Compute num_frames = 8k + 1, frames >= 9.""" | |
| raw = int(duration * fps) + 1 | |
| raw = max(raw, 9) | |
| k = (raw - 1 + 7) // 8 | |
| return k * 8 + 1 | |
| def encode_text_simple(te, prompt: str): | |
| """Simple text encoding without using pipeline_utils.""" | |
| hidden_states, attention_mask = te.encode(prompt) | |
| embeddings_processor = model_ledger.gemma_embeddings_processor() | |
| result = embeddings_processor.process_hidden_states(hidden_states, attention_mask) | |
| del embeddings_processor | |
| return result.video_encoding, result.audio_encoding | |
| def apply_resolution(resolution: str): | |
| w, h = RESOLUTION_MAP.get(resolution, (768, 512)) | |
| return int(w), int(h) | |
| def apply_duration(duration_str: str): | |
| return int(duration_str[:-1]) | |
| def on_mode_change(selected: str): | |
| is_interpolate = selected == "Interpolate" | |
| return gr.update(visible=is_interpolate) | |
| def get_duration( | |
| first_frame, | |
| end_frame, | |
| prompt, | |
| duration, | |
| generation_mode, | |
| enhance_prompt, | |
| seed, | |
| randomize_seed, | |
| height, | |
| width, | |
| audio_path, | |
| *args, | |
| **kwargs, | |
| ): | |
| """Estimate GPU lease duration for @spaces.GPU(duration=...).""" | |
| extra_time = 0 | |
| if audio_path is not None: | |
| extra_time += 10 | |
| dur = float(duration) | |
| if dur <= 6: | |
| return 200 + extra_time | |
| elif dur <= 8: | |
| return 250 + extra_time | |
| elif dur <= 10: | |
| return 300 + extra_time | |
| else: | |
| return 350 + extra_time | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # Phase 1: Text Encoding (separate GPU lease) | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| def encode_prompt( | |
| prompt: str, | |
| enhance_prompt: bool = True, | |
| input_image=None, | |
| seed: int = 42, | |
| ): | |
| """ | |
| Encode prompt using the module-level text_encoder + embeddings_processor. | |
| Returns a dict with video_context and audio_context tensors on CPU. | |
| """ | |
| logger.info(f"[encode_prompt] prompt='{prompt[:80]}...', enhance={enhance_prompt}") | |
| final_prompt = prompt | |
| if enhance_prompt: | |
| final_prompt = generate_enhanced_prompt( | |
| text_encoder=text_encoder, | |
| prompt=prompt, | |
| image_path=input_image if input_image is not None else None, | |
| seed=seed, | |
| ) | |
| logger.info(f"[encode_prompt] Enhanced prompt: '{final_prompt[:120]}...'") | |
| with torch.inference_mode(): | |
| video_context, audio_context = encode_text_simple(text_encoder, final_prompt) | |
| embedding_data = { | |
| "video_context": video_context.detach().cpu(), | |
| "audio_context": audio_context.detach().cpu(), | |
| "prompt": final_prompt, | |
| } | |
| logger.info("[encode_prompt] Done.") | |
| return embedding_data, final_prompt | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # Phase 2: Video Generation (separate GPU lease, dynamic duration) | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| def generate_video( | |
| first_frame, | |
| end_frame, | |
| prompt: str, | |
| duration: float, | |
| generation_mode: str = "Image-to-Video", | |
| enhance_prompt: bool = True, | |
| seed: int = 42, | |
| randomize_seed: bool = True, | |
| height: int = 512, | |
| width: int = 768, | |
| audio_path=None, | |
| progress=gr.Progress(track_tqdm=True), | |
| ): | |
| """ | |
| Full generation: encode prompt then run pipeline with pre-encoded contexts. | |
| Supports Image-to-Video, Interpolate, and audio input modes. | |
| """ | |
| if not prompt or not prompt.strip(): | |
| raise gr.Error("Please enter a prompt.") | |
| current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed) | |
| num_frames = calc_frames(duration, 24.0) | |
| frame_rate = 24.0 | |
| logger.info( | |
| f"[generate_video] mode={generation_mode}, seed={current_seed}, {width}x{height}, " | |
| f"frames={num_frames}, duration={duration}s, enhance={enhance_prompt}, " | |
| f"audio={'yes' if audio_path else 'no'}" | |
| ) | |
| # --- Handle input images --- | |
| images = [] | |
| image_path_for_enhance = None | |
| if first_frame is not None: | |
| # first_frame is filepath from gr.Image(type="filepath") | |
| if isinstance(first_frame, str): | |
| img_path = first_frame | |
| else: | |
| tmp_dir = tempfile.mkdtemp() | |
| img_path = os.path.join(tmp_dir, f"input_{int(time.time())}.png") | |
| if hasattr(first_frame, "save"): | |
| first_frame.save(img_path) | |
| else: | |
| from PIL import Image as PILImage | |
| PILImage.open(first_frame).save(img_path) | |
| images.append((img_path, 0, 1.0)) | |
| image_path_for_enhance = img_path | |
| # Interpolation: add end frame as guiding latent | |
| if generation_mode == "Interpolate" and end_frame is not None: | |
| if isinstance(end_frame, str): | |
| end_path = end_frame | |
| else: | |
| tmp_dir = tempfile.mkdtemp() | |
| end_path = os.path.join(tmp_dir, f"end_{int(time.time())}.png") | |
| if hasattr(end_frame, "save"): | |
| end_frame.save(end_path) | |
| else: | |
| from PIL import Image as PILImage | |
| PILImage.open(end_frame).save(end_path) | |
| end_idx = max(0, num_frames - 1) | |
| images.append((end_path, end_idx, 0.5)) | |
| t0 = time.time() | |
| try: | |
| # Phase 1: Encode prompt | |
| embeddings, final_prompt = encode_prompt( | |
| prompt=prompt, | |
| enhance_prompt=enhance_prompt, | |
| input_image=image_path_for_enhance, | |
| seed=current_seed, | |
| ) | |
| video_context = embeddings["video_context"].to("cuda", non_blocking=True) | |
| audio_context = embeddings["audio_context"].to("cuda", non_blocking=True) | |
| del embeddings | |
| torch.cuda.empty_cache() | |
| # If user provided audio, use a neutral audio_context (encode empty prompt) | |
| if audio_path is not None: | |
| with torch.inference_mode(): | |
| _, neutral_audio_context = encode_text_simple(text_encoder, "") | |
| del audio_context | |
| audio_context = neutral_audio_context | |
| # Prepare audio waveform if provided | |
| input_waveform = None | |
| input_waveform_sample_rate = None | |
| if audio_path is not None: | |
| video_seconds = (num_frames - 1) / frame_rate | |
| input_waveform, input_waveform_sample_rate = match_audio_to_duration( | |
| audio_path=audio_path, | |
| target_seconds=video_seconds, | |
| target_sr=48000, | |
| to_mono=True, | |
| pad_mode="silence", | |
| device="cuda", | |
| ) | |
| torch.cuda.empty_cache() | |
| # Phase 2: Run pipeline with pre-encoded contexts | |
| with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile: | |
| output_path = tmpfile.name | |
| with torch.inference_mode(): | |
| pipeline( | |
| prompt=prompt, | |
| output_path=output_path, | |
| seed=current_seed, | |
| height=height, | |
| width=width, | |
| num_frames=num_frames, | |
| frame_rate=frame_rate, | |
| images=images, | |
| tiling_config=TilingConfig.default(), | |
| video_context=video_context, | |
| audio_context=audio_context, | |
| input_waveform=input_waveform, | |
| input_waveform_sample_rate=input_waveform_sample_rate, | |
| ) | |
| del video_context, audio_context | |
| if input_waveform is not None: | |
| del input_waveform | |
| torch.cuda.empty_cache() | |
| elapsed = time.time() - t0 | |
| logger.info(f"[generate_video] Done in {elapsed:.1f}s") | |
| except torch.cuda.OutOfMemoryError: | |
| elapsed = time.time() - t0 | |
| logger.error(f"OOM after {elapsed:.1f}s") | |
| raise gr.Error("Out of GPU memory. Try a shorter duration or lower resolution.") | |
| except Exception as e: | |
| elapsed = time.time() - t0 | |
| tb = traceback.format_exc() | |
| logger.error(f"Generation failed after {elapsed:.1f}s:\n{tb}") | |
| raise gr.Error(f"Generation failed: {type(e).__name__}: {e}") | |
| # Build metadata HTML | |
| meta_parts = [ | |
| f'<span class="meta-chip"><b>Seed</b> {current_seed}</span>', | |
| f'<span class="meta-chip"><b>Resolution</b> {width}×{height}</span>', | |
| f'<span class="meta-chip"><b>Duration</b> {duration}s</span>', | |
| f'<span class="meta-chip"><b>Time</b> {elapsed:.1f}s</span>', | |
| ] | |
| meta_html = '<div class="gen-meta-card">' | |
| meta_html += '<div class="meta-chips">' + "".join(meta_parts) + "</div>" | |
| if enhance_prompt and final_prompt and final_prompt != prompt: | |
| escaped = ( | |
| final_prompt.replace("&", "&") | |
| .replace("<", "<") | |
| .replace(">", ">") | |
| .replace('"', """) | |
| ) | |
| meta_html += ( | |
| '<div class="meta-enhanced-prompt">' | |
| '<span class="meta-ep-label">Enhanced Prompt</span>' | |
| f'<p class="meta-ep-text">{escaped}</p>' | |
| "</div>" | |
| ) | |
| meta_html += "</div>" | |
| return str(output_path), meta_html | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # Custom UI Components (ported from alexnasa/ltx-2-TURBO) | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| class RadioAnimated(gr.HTML): | |
| """Animated segmented radio (like iOS pill selector).""" | |
| def __init__(self, choices, value=None, **kwargs): | |
| if not choices or len(choices) < 2: | |
| raise ValueError("RadioAnimated requires at least 2 choices.") | |
| if value is None: | |
| value = choices[0] | |
| uid = uuid.uuid4().hex[:8] | |
| group_name = f"ra-{uid}" | |
| inputs_html = "\n".join( | |
| f'<input class="ra-input" type="radio" name="{group_name}" ' | |
| f'id="{group_name}-{i}" value="{c}">' | |
| f'<label class="ra-label" for="{group_name}-{i}">{c}</label>' | |
| for i, c in enumerate(choices) | |
| ) | |
| html_template = f""" | |
| <div class="ra-wrap" data-ra="{uid}"> | |
| <div class="ra-inner"> | |
| <div class="ra-highlight"></div> | |
| {inputs_html} | |
| </div> | |
| </div> | |
| """ | |
| js_on_load = r""" | |
| (() => { | |
| const wrap = element.querySelector('.ra-wrap'); | |
| const inner = element.querySelector('.ra-inner'); | |
| const highlight = element.querySelector('.ra-highlight'); | |
| const inputs = Array.from(element.querySelectorAll('.ra-input')); | |
| const labels = Array.from(element.querySelectorAll('.ra-label')); | |
| if (!inputs.length || !labels.length) return; | |
| const choices = inputs.map(i => i.value); | |
| const PAD = 6; | |
| let currentIdx = 0; | |
| function setHighlightByIndex(idx) { | |
| currentIdx = idx; | |
| const lbl = labels[idx]; | |
| if (!lbl) return; | |
| const innerRect = inner.getBoundingClientRect(); | |
| const lblRect = lbl.getBoundingClientRect(); | |
| highlight.style.width = `${lblRect.width}px`; | |
| const x = (lblRect.left - innerRect.left - PAD); | |
| highlight.style.transform = `translateX(${x}px)`; | |
| } | |
| function setCheckedByValue(val, shouldTrigger=false) { | |
| const idx = Math.max(0, choices.indexOf(val)); | |
| inputs.forEach((inp, i) => { inp.checked = (i === idx); }); | |
| requestAnimationFrame(() => setHighlightByIndex(idx)); | |
| props.value = choices[idx]; | |
| if (shouldTrigger) trigger('change', props.value); | |
| } | |
| setCheckedByValue(props.value ?? choices[0], false); | |
| inputs.forEach((inp) => { | |
| inp.addEventListener('change', () => setCheckedByValue(inp.value, true)); | |
| }); | |
| window.addEventListener('resize', () => setHighlightByIndex(currentIdx)); | |
| let last = props.value; | |
| const syncFromProps = () => { | |
| if (props.value !== last) { | |
| last = props.value; | |
| setCheckedByValue(last, false); | |
| } | |
| requestAnimationFrame(syncFromProps); | |
| }; | |
| requestAnimationFrame(syncFromProps); | |
| })(); | |
| """ | |
| super().__init__( | |
| value=value, | |
| html_template=html_template, | |
| js_on_load=js_on_load, | |
| **kwargs, | |
| ) | |
| class PromptBox(gr.HTML): | |
| """Prompt textarea with an internal footer slot for embedding dropdowns.""" | |
| def __init__(self, value="", placeholder="Describe what you want...", **kwargs): | |
| uid = uuid.uuid4().hex[:8] | |
| html_template = f""" | |
| <div class="ds-card" data-ds="{uid}"> | |
| <div class="ds-top"> | |
| <textarea class="ds-textarea" rows="3" placeholder="{placeholder}"></textarea> | |
| <div class="ds-footer" aria-label="prompt-footer"></div> | |
| </div> | |
| </div> | |
| """ | |
| js_on_load = r""" | |
| (() => { | |
| const textarea = element.querySelector(".ds-textarea"); | |
| if (!textarea) return; | |
| const autosize = () => { | |
| textarea.style.height = "0px"; | |
| textarea.style.height = Math.min(textarea.scrollHeight, 240) + "px"; | |
| }; | |
| const setValue = (v, triggerChange=false) => { | |
| const val = (v ?? ""); | |
| if (textarea.value !== val) textarea.value = val; | |
| autosize(); | |
| props.value = textarea.value; | |
| if (triggerChange) trigger("change", props.value); | |
| }; | |
| setValue(props.value, false); | |
| textarea.addEventListener("input", () => { | |
| autosize(); | |
| props.value = textarea.value; | |
| trigger("change", props.value); | |
| }); | |
| const shouldAutoFocus = () => { | |
| const ae = document.activeElement; | |
| if (ae && ae !== document.body && ae !== document.documentElement) return false; | |
| if (window.matchMedia && window.matchMedia("(max-width: 768px)").matches) return false; | |
| return true; | |
| }; | |
| const focusWithRetry = (tries = 30) => { | |
| if (!shouldAutoFocus()) return; | |
| if (document.activeElement !== textarea) textarea.focus({ preventScroll: true }); | |
| if (document.activeElement === textarea) return; | |
| if (tries > 0) requestAnimationFrame(() => focusWithRetry(tries - 1)); | |
| }; | |
| requestAnimationFrame(() => focusWithRetry()); | |
| let last = props.value; | |
| const syncFromProps = () => { | |
| if (props.value !== last) { | |
| last = props.value; | |
| setValue(last, false); | |
| } | |
| requestAnimationFrame(syncFromProps); | |
| }; | |
| requestAnimationFrame(syncFromProps); | |
| })(); | |
| """ | |
| super().__init__( | |
| value=value, | |
| html_template=html_template, | |
| js_on_load=js_on_load, | |
| **kwargs, | |
| ) | |
| class CameraDropdown(gr.HTML): | |
| """Custom dropdown with optional icons per item.""" | |
| def __init__(self, choices, value="None", title="Dropdown", **kwargs): | |
| if not choices: | |
| raise ValueError("CameraDropdown requires choices.") | |
| norm = [] | |
| for c in choices: | |
| if isinstance(c, dict): | |
| label = str(c.get("label", c.get("value", ""))) | |
| val = str(c.get("value", label)) | |
| icon = c.get("icon", None) | |
| norm.append({"label": label, "value": val, "icon": icon}) | |
| else: | |
| s = str(c) | |
| norm.append({"label": s, "value": s, "icon": None}) | |
| uid = uuid.uuid4().hex[:8] | |
| def render_item(item): | |
| icon_html = "" | |
| if item["icon"]: | |
| icon_html = f'<span class="cd-icn">{item["icon"]}</span>' | |
| return ( | |
| f'<button type="button" class="cd-item" ' | |
| f'data-value="{item["value"]}">' | |
| f'{icon_html}<span class="cd-label">{item["label"]}</span>' | |
| f"</button>" | |
| ) | |
| items_html = "\n".join(render_item(item) for item in norm) | |
| html_template = f""" | |
| <div class="cd-wrap" data-cd="{uid}"> | |
| <button type="button" class="cd-trigger" aria-haspopup="menu" aria-expanded="false"> | |
| <span class="cd-trigger-icon"></span> | |
| <span class="cd-trigger-text"></span> | |
| <span class="cd-caret">▾</span> | |
| </button> | |
| <div class="cd-menu" role="menu" aria-hidden="true"> | |
| <div class="cd-title">{title}</div> | |
| <div class="cd-items"> | |
| {items_html} | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| value_to_label = {it["value"]: it["label"] for it in norm} | |
| value_to_icon = {it["value"]: (it["icon"] or "") for it in norm} | |
| js_on_load = r""" | |
| (() => { | |
| const wrap = element.querySelector(".cd-wrap"); | |
| const trigger = element.querySelector(".cd-trigger"); | |
| const triggerIcon = element.querySelector(".cd-trigger-icon"); | |
| const triggerText = element.querySelector(".cd-trigger-text"); | |
| const menu = element.querySelector(".cd-menu"); | |
| const items = Array.from(element.querySelectorAll(".cd-item")); | |
| if (!wrap || !trigger || !menu || !items.length) return; | |
| const valueToLabel = __VALUE_TO_LABEL__; | |
| const valueToIcon = __VALUE_TO_ICON__; | |
| const safeLabel = (v) => (valueToLabel && valueToLabel[v]) ? valueToLabel[v] : (v ?? "None"); | |
| const safeIcon = (v) => (valueToIcon && valueToIcon[v]) ? valueToIcon[v] : ""; | |
| function closeMenu() { | |
| menu.classList.remove("open"); | |
| trigger.setAttribute("aria-expanded", "false"); | |
| menu.setAttribute("aria-hidden", "true"); | |
| } | |
| function openMenu() { | |
| menu.classList.add("open"); | |
| trigger.setAttribute("aria-expanded", "true"); | |
| menu.setAttribute("aria-hidden", "false"); | |
| } | |
| function setValue(val, shouldTrigger = false) { | |
| const v = (val ?? "None"); | |
| props.value = v; | |
| triggerText.textContent = safeLabel(v); | |
| if (triggerIcon) { | |
| triggerIcon.innerHTML = safeIcon(v); | |
| triggerIcon.style.display = safeIcon(v) ? "inline-flex" : "none"; | |
| } | |
| items.forEach(btn => { | |
| btn.dataset.selected = (btn.dataset.value === v) ? "true" : "false"; | |
| }); | |
| if (shouldTrigger) trigger("change", props.value); | |
| } | |
| trigger.addEventListener("pointerdown", (e) => { | |
| e.preventDefault(); | |
| e.stopPropagation(); | |
| if (menu.classList.contains("open")) closeMenu(); | |
| else openMenu(); | |
| }); | |
| document.addEventListener("pointerdown", (e) => { | |
| if (!wrap.contains(e.target)) closeMenu(); | |
| }, true); | |
| document.addEventListener("keydown", (e) => { | |
| if (e.key === "Escape") closeMenu(); | |
| }); | |
| wrap.addEventListener("focusout", (e) => { | |
| if (!wrap.contains(e.relatedTarget)) closeMenu(); | |
| }); | |
| items.forEach((btn) => { | |
| btn.addEventListener("pointerdown", (e) => { | |
| e.preventDefault(); | |
| e.stopPropagation(); | |
| closeMenu(); | |
| setValue(btn.dataset.value, true); | |
| }); | |
| }); | |
| setValue((props.value ?? "None"), false); | |
| let last = props.value; | |
| const syncFromProps = () => { | |
| if (props.value !== last) { | |
| last = props.value; | |
| setValue(last, false); | |
| } | |
| requestAnimationFrame(syncFromProps); | |
| }; | |
| requestAnimationFrame(syncFromProps); | |
| })(); | |
| """ | |
| js_on_load = js_on_load.replace( | |
| "__VALUE_TO_LABEL__", json.dumps(value_to_label) | |
| ) | |
| js_on_load = js_on_load.replace("__VALUE_TO_ICON__", json.dumps(value_to_icon)) | |
| super().__init__( | |
| value=value, | |
| html_template=html_template, | |
| js_on_load=js_on_load, | |
| **kwargs, | |
| ) | |
| class AudioDropUpload(gr.HTML): | |
| """Custom audio drop/click UI that proxies file into a hidden gr.File component.""" | |
| def __init__(self, target_audio_elem_id: str, value=None, **kwargs): | |
| uid = uuid.uuid4().hex[:8] | |
| html_template = f""" | |
| <div class="aud-wrap" data-aud="{uid}"> | |
| <div class="aud-drop" role="button" tabindex="0" aria-label="Upload audio"> | |
| <div><strong>(Optional) Drag & drop an audio file here</strong></div> | |
| <div class="aud-hint">...or click to browse</div> | |
| </div> | |
| <div class="aud-row" aria-live="polite"> | |
| <audio class="aud-player" controls></audio> | |
| <button class="aud-remove" type="button" aria-label="Remove audio"> | |
| <svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false"> | |
| <path d="M18 6L6 18M6 6l12 12" | |
| stroke="currentColor" stroke-width="2.25" stroke-linecap="round"/> | |
| </svg> | |
| </button> | |
| </div> | |
| <div class="aud-filelabel"></div> | |
| </div> | |
| """ | |
| js_on_load = r""" | |
| (() => { | |
| function grRoot() { | |
| const ga = document.querySelector("gradio-app"); | |
| return (ga && ga.shadowRoot) ? ga.shadowRoot : document; | |
| } | |
| const root = grRoot(); | |
| const wrap = element.querySelector(".aud-wrap"); | |
| const drop = element.querySelector(".aud-drop"); | |
| const row = element.querySelector(".aud-row"); | |
| const player = element.querySelector(".aud-player"); | |
| const removeBtn = element.querySelector(".aud-remove"); | |
| const label = element.querySelector(".aud-filelabel"); | |
| const TARGET_ID = "__TARGET_ID__"; | |
| let currentUrl = null; | |
| function findHiddenAudioFileInput() { | |
| const host = root.querySelector("#" + CSS.escape(TARGET_ID)); | |
| if (!host) return null; | |
| const inp = host.querySelector('input[type="file"]'); | |
| return inp; | |
| } | |
| function showDrop() { | |
| drop.style.display = ""; | |
| row.style.display = "none"; | |
| label.style.display = "none"; | |
| label.textContent = ""; | |
| } | |
| function showPlayer(filename) { | |
| drop.style.display = "none"; | |
| row.style.display = "flex"; | |
| if (filename) { | |
| label.textContent = "Loaded: " + filename; | |
| label.style.display = "block"; | |
| } | |
| } | |
| function clearPreview() { | |
| player.pause(); | |
| player.removeAttribute("src"); | |
| player.load(); | |
| if (currentUrl) { | |
| URL.revokeObjectURL(currentUrl); | |
| currentUrl = null; | |
| } | |
| } | |
| function clearHiddenGradioAudio() { | |
| const fileInput = findHiddenAudioFileInput(); | |
| if (!fileInput) return; | |
| fileInput.value = ""; | |
| const dt = new DataTransfer(); | |
| fileInput.files = dt.files; | |
| fileInput.dispatchEvent(new Event("input", { bubbles: true })); | |
| fileInput.dispatchEvent(new Event("change", { bubbles: true })); | |
| } | |
| function clearAll() { | |
| clearPreview(); | |
| clearHiddenGradioAudio(); | |
| props.value = "__CLEAR__"; | |
| trigger("change", props.value); | |
| showDrop(); | |
| } | |
| function loadFileToPreview(file) { | |
| if (!file) return; | |
| if (!file.type || !file.type.startsWith("audio/")) { | |
| alert("Please choose an audio file."); | |
| return; | |
| } | |
| clearPreview(); | |
| currentUrl = URL.createObjectURL(file); | |
| player.src = currentUrl; | |
| showPlayer(file.name); | |
| } | |
| function pushFileIntoHiddenGradioAudio(file) { | |
| const fileInput = findHiddenAudioFileInput(); | |
| if (!fileInput) { | |
| console.warn("Could not find hidden gr.File input. Check elem_id:", TARGET_ID); | |
| return; | |
| } | |
| fileInput.value = ""; | |
| const dt = new DataTransfer(); | |
| dt.items.add(file); | |
| fileInput.files = dt.files; | |
| fileInput.dispatchEvent(new Event("input", { bubbles: true })); | |
| fileInput.dispatchEvent(new Event("change", { bubbles: true })); | |
| } | |
| function handleFile(file) { | |
| loadFileToPreview(file); | |
| pushFileIntoHiddenGradioAudio(file); | |
| } | |
| const localPicker = document.createElement("input"); | |
| localPicker.type = "file"; | |
| localPicker.accept = "audio/*"; | |
| localPicker.style.display = "none"; | |
| wrap.appendChild(localPicker); | |
| localPicker.addEventListener("change", () => { | |
| const f = localPicker.files && localPicker.files[0]; | |
| if (f) handleFile(f); | |
| localPicker.value = ""; | |
| }); | |
| drop.addEventListener("click", () => localPicker.click()); | |
| drop.addEventListener("keydown", (e) => { | |
| if (e.key === "Enter" || e.key === " ") { | |
| e.preventDefault(); | |
| localPicker.click(); | |
| } | |
| }); | |
| removeBtn.addEventListener("click", clearAll); | |
| ["dragenter","dragover","dragleave","drop"].forEach(evt => { | |
| drop.addEventListener(evt, (e) => { | |
| e.preventDefault(); | |
| e.stopPropagation(); | |
| }); | |
| }); | |
| drop.addEventListener("dragover", () => drop.classList.add("dragover")); | |
| drop.addEventListener("dragleave", () => drop.classList.remove("dragover")); | |
| drop.addEventListener("drop", (e) => { | |
| drop.classList.remove("dragover"); | |
| const f = e.dataTransfer.files && e.dataTransfer.files[0]; | |
| if (f) handleFile(f); | |
| }); | |
| showDrop(); | |
| function setPreviewFromPath(path) { | |
| if (path === "__CLEAR__") path = null; | |
| if (!path) { | |
| clearPreview(); | |
| showDrop(); | |
| return; | |
| } | |
| let url = path; | |
| if (!/^https?:\/\//.test(path) && !path.startsWith("gradio_api/file=") && !path.startsWith("/file=")) { | |
| url = "gradio_api/file=" + path; | |
| } | |
| clearPreview(); | |
| player.src = url; | |
| showPlayer(path.split("/").pop()); | |
| } | |
| let last = props.value; | |
| const syncFromProps = () => { | |
| const v = props.value; | |
| if (v !== last) { | |
| last = v; | |
| if (!v || v === "__CLEAR__") setPreviewFromPath(null); | |
| else setPreviewFromPath(String(v)); | |
| } | |
| requestAnimationFrame(syncFromProps); | |
| }; | |
| requestAnimationFrame(syncFromProps); | |
| })(); | |
| """ | |
| js_on_load = js_on_load.replace("__TARGET_ID__", target_audio_elem_id) | |
| super().__init__( | |
| value=value, | |
| html_template=html_template, | |
| js_on_load=js_on_load, | |
| **kwargs, | |
| ) | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # CSS (dark theme, ported from alexnasa/ltx-2-TURBO) | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| CSS = """ | |
| /* ---- layout ---- */ | |
| #controls-row { | |
| display: none !important; | |
| align-items: center; | |
| gap: 12px; | |
| flex-wrap: nowrap; | |
| } | |
| #controls-row > * { | |
| flex: 0 0 auto !important; | |
| width: auto !important; | |
| min-width: 0 !important; | |
| } | |
| #col-container { | |
| margin: 0 auto; | |
| max-width: 1600px; | |
| } | |
| #step-column { | |
| padding: 10px; | |
| border-radius: 8px; | |
| box-shadow: var(--card-shadow); | |
| margin: 10px; | |
| } | |
| /* ---- generation metadata card ---- */ | |
| .gen-meta-card { | |
| margin-top: 10px; | |
| padding: 12px 16px; | |
| border-radius: 10px; | |
| background: rgba(255, 255, 255, 0.04); | |
| border: 1px solid rgba(255, 255, 255, 0.08); | |
| } | |
| .meta-chips { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 8px; | |
| } | |
| .meta-chip { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 5px; | |
| padding: 4px 10px; | |
| border-radius: 6px; | |
| background: rgba(255, 255, 255, 0.06); | |
| font-size: 13px; | |
| color: rgba(255, 255, 255, 0.75); | |
| font-family: monospace; | |
| } | |
| .meta-chip b { | |
| color: rgba(255, 255, 255, 0.45); | |
| font-weight: 500; | |
| font-family: sans-serif; | |
| font-size: 11px; | |
| text-transform: uppercase; | |
| letter-spacing: 0.5px; | |
| } | |
| .meta-enhanced-prompt { | |
| margin-top: 10px; | |
| padding-top: 10px; | |
| border-top: 1px solid rgba(255, 255, 255, 0.06); | |
| } | |
| .meta-ep-label { | |
| font-size: 11px; | |
| font-weight: 500; | |
| text-transform: uppercase; | |
| letter-spacing: 0.5px; | |
| color: rgba(255, 255, 255, 0.4); | |
| } | |
| .meta-ep-text { | |
| margin: 6px 0 0 0; | |
| font-size: 13px; | |
| line-height: 1.5; | |
| color: rgba(255, 255, 255, 0.65); | |
| max-height: 120px; | |
| overflow-y: auto; | |
| white-space: pre-wrap; | |
| word-break: break-word; | |
| } | |
| /* ---- generate button ---- */ | |
| .button-gradient { | |
| background: linear-gradient(45deg, rgb(255, 65, 108), rgb(255, 75, 43), rgb(255, 155, 0), rgb(255, 65, 108)) 0% 0% / 400% 400%; | |
| border: none; | |
| padding: 14px 28px; | |
| font-size: 16px; | |
| font-weight: bold; | |
| color: white; | |
| border-radius: 10px; | |
| cursor: pointer; | |
| transition: 0.3s ease-in-out; | |
| animation: 2s linear 0s infinite normal none running gradientAnimation; | |
| box-shadow: rgba(255, 65, 108, 0.6) 0px 4px 10px; | |
| } | |
| @keyframes gradientAnimation { | |
| 0% { background-position: 0% 50%; } | |
| 50% { background-position: 100% 50%; } | |
| 100% { background-position: 0% 50%; } | |
| } | |
| /* ---- mode row ---- */ | |
| #mode-row { | |
| display: flex !important; | |
| justify-content: center !important; | |
| align-items: center !important; | |
| width: 100% !important; | |
| } | |
| #mode-row > * { | |
| flex: 0 0 auto !important; | |
| width: auto !important; | |
| min-width: 0 !important; | |
| } | |
| #mode-row .gr-html, | |
| #mode-row .gradio-html, | |
| #mode-row .prose, | |
| #mode-row .block { | |
| width: auto !important; | |
| flex: 0 0 auto !important; | |
| display: inline-block !important; | |
| } | |
| #radioanimated_mode { | |
| display: inline-flex !important; | |
| justify-content: center !important; | |
| width: auto !important; | |
| } | |
| /* ---- radioanimated ---- */ | |
| .ra-wrap { width: fit-content; } | |
| .ra-inner { | |
| position: relative; | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 0; | |
| padding: 6px; | |
| background: #0b0b0b; | |
| border-radius: 9999px; | |
| overflow: hidden; | |
| user-select: none; | |
| } | |
| .ra-input { display: none; } | |
| .ra-label { | |
| position: relative; | |
| z-index: 2; | |
| padding: 10px 18px; | |
| font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial; | |
| font-size: 14px; | |
| font-weight: 600; | |
| color: rgba(255,255,255,0.7); | |
| cursor: pointer; | |
| transition: color 180ms ease; | |
| white-space: nowrap; | |
| } | |
| .ra-highlight { | |
| position: absolute; | |
| z-index: 1; | |
| top: 6px; | |
| left: 6px; | |
| height: calc(100% - 12px); | |
| border-radius: 9999px; | |
| background: #8bff97; | |
| transition: transform 200ms ease, width 200ms ease; | |
| } | |
| .ra-input:checked + .ra-label { color: rgba(0,0,0,0.75); } | |
| /* ---- prompt box ---- */ | |
| .ds-card { | |
| width: 100%; | |
| max-width: 720px; | |
| margin: 0 auto; | |
| position: relative; | |
| z-index: 50; | |
| } | |
| .ds-top { | |
| position: relative; | |
| background: #2b2b2b; | |
| border: 1px solid rgba(255,255,255,0.12); | |
| border-radius: 14px; | |
| overflow: visible !important; | |
| } | |
| .ds-textarea { | |
| width: 100%; | |
| box-sizing: border-box; | |
| background: transparent !important; | |
| border: none !important; | |
| border-radius: 0 !important; | |
| color: rgba(255,255,255,0.9); | |
| padding: 14px 16px; | |
| padding-bottom: 72px; | |
| outline: none; | |
| font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial; | |
| font-size: 15px; | |
| line-height: 1.35; | |
| resize: none; | |
| min-height: 210px; | |
| max-height: 210px; | |
| overflow-y: auto; | |
| scrollbar-width: none; | |
| position: relative; | |
| z-index: 1; | |
| } | |
| .ds-textarea::-webkit-scrollbar { width: 0; height: 0; } | |
| .ds-textarea:focus, | |
| .ds-textarea:focus-visible { outline: none !important; box-shadow: none !important; } | |
| .ds-textarea { outline: none !important; } | |
| .ds-top:focus-within { | |
| border-color: rgba(255,255,255,0.22) !important; | |
| box-shadow: 0 0 0 3px rgba(255,255,255,0.06) !important; | |
| border-radius: 14px !important; | |
| } | |
| .ds-top { border-radius: 14px !important; } | |
| .ds-top::after { | |
| content: ""; | |
| position: absolute; | |
| left: 0; right: 0; bottom: 0; | |
| height: 56px; | |
| background: #2b2b2b; | |
| border-bottom-left-radius: 14px !important; | |
| border-bottom-right-radius: 14px !important; | |
| pointer-events: none; | |
| z-index: 2; | |
| } | |
| .ds-footer { | |
| position: absolute; | |
| right: 12px; | |
| bottom: 10px; | |
| display: flex; | |
| gap: 8px; | |
| align-items: center; | |
| justify-content: flex-end; | |
| z-index: 20 !important; | |
| } | |
| .ds-footer .cd-trigger { | |
| min-height: 32px; | |
| padding: 6px 10px; | |
| font-size: 12px; | |
| gap: 6px; | |
| border-radius: 9999px; | |
| } | |
| .ds-footer .cd-trigger-icon, | |
| .ds-footer .cd-icn { width: 14px; height: 14px; } | |
| .ds-footer .cd-trigger-icon svg, | |
| .ds-footer .cd-icn svg { width: 14px; height: 14px; } | |
| .ds-footer .cd-caret { font-size: 11px; } | |
| .ds-footer .cd-menu { z-index: 999999 !important; } | |
| /* ---- camera dropdown ---- */ | |
| .cd-wrap { position: relative; display: inline-block; } | |
| .cd-trigger { | |
| margin-top: 2px; | |
| display: inline-flex; | |
| align-items: center; | |
| justify-content: center; | |
| gap: 10px; | |
| border: none; | |
| box-sizing: border-box; | |
| padding: 10px 18px; | |
| min-height: 52px; | |
| line-height: 1.2; | |
| border-radius: 9999px; | |
| background: #0b0b0b; | |
| font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial; | |
| font-size: 14px; | |
| color: rgba(255,255,255,0.7) !important; | |
| font-weight: 600 !important; | |
| cursor: pointer; | |
| user-select: none; | |
| white-space: nowrap; | |
| } | |
| .cd-trigger .cd-trigger-text, | |
| .cd-trigger .cd-caret { color: rgba(255,255,255,0.7) !important; } | |
| .cd-caret { opacity: 0.8; font-weight: 900; } | |
| .cd-trigger-icon { | |
| color: rgba(255,255,255,0.9); | |
| display: inline-flex; | |
| align-items: center; | |
| justify-content: center; | |
| width: 18px; height: 18px; | |
| } | |
| .cd-trigger-icon svg { width: 18px; height: 18px; display: block; } | |
| .cd-menu { | |
| position: absolute; | |
| top: calc(100% + 4px); | |
| left: 0; | |
| min-width: 240px; | |
| background: #2b2b2b !important; | |
| border: 1px solid rgba(255,255,255,0.14) !important; | |
| border-radius: 14px; | |
| box-shadow: 0 18px 40px rgba(0,0,0,0.35); | |
| padding: 10px; | |
| opacity: 0; | |
| transform: translateY(-6px); | |
| pointer-events: none; | |
| transition: opacity 160ms ease, transform 160ms ease; | |
| z-index: 9999; | |
| } | |
| .cd-menu.open { | |
| opacity: 1; | |
| transform: translateY(0); | |
| pointer-events: auto; | |
| } | |
| .cd-title { | |
| font-size: 12px; | |
| font-weight: 600; | |
| text-transform: uppercase; | |
| letter-spacing: 0.04em; | |
| color: rgba(255,255,255,0.55) !important; | |
| margin-bottom: 6px; | |
| padding: 0 6px; | |
| pointer-events: none; | |
| } | |
| .cd-items { display: flex; flex-direction: column; gap: 0px; } | |
| .cd-item { | |
| width: 100%; | |
| text-align: left; | |
| border: none; | |
| background: transparent; | |
| color: rgba(255,255,255,0.92) !important; | |
| padding: 8px 34px 8px 12px; | |
| border-radius: 10px; | |
| cursor: pointer; | |
| font-size: 14px; | |
| font-weight: 700; | |
| position: relative; | |
| transition: background 120ms ease; | |
| display: flex; | |
| align-items: center; | |
| gap: 10px; | |
| } | |
| .cd-item * { color: rgba(255,255,255,0.92) !important; } | |
| .cd-item:hover { background: rgba(255,255,255,0.10) !important; } | |
| .cd-item::after { | |
| content: "\\2713"; | |
| position: absolute; | |
| right: 12px; | |
| top: 50%; | |
| transform: translateY(-50%); | |
| opacity: 0; | |
| transition: opacity 120ms ease; | |
| color: rgba(255,255,255,0.92) !important; | |
| font-weight: 900; | |
| } | |
| .cd-item[data-selected="true"]::after { opacity: 1; } | |
| .cd-item.selected { | |
| background: transparent !important; | |
| border: none !important; | |
| } | |
| .cd-icn { | |
| display: inline-flex; | |
| align-items: center; | |
| justify-content: center; | |
| width: 18px; height: 18px; | |
| flex: 0 0 18px; | |
| } | |
| .cd-icn svg { width: 18px; height: 18px; display: block; } | |
| .cd-icn svg * { stroke: rgba(255,255,255,0.9); } | |
| .cd-label { flex: 1; } | |
| .cd-trigger, .cd-trigger * { color: rgba(255,255,255,0.75) !important; } | |
| /* ---- AudioDropUpload ---- */ | |
| .aud-wrap { width: 100%; max-width: 720px; } | |
| .aud-drop { | |
| border: 2px dashed var(--body-text-color-subdued); | |
| border-radius: 16px; | |
| padding: 18px; | |
| text-align: center; | |
| cursor: pointer; | |
| user-select: none; | |
| color: var(--body-text-color); | |
| background: var(--block-background-fill); | |
| } | |
| .aud-drop.dragover { | |
| border-color: rgba(255,255,255,0.35); | |
| background: rgba(255,255,255,0.06); | |
| } | |
| .aud-hint { | |
| color: var(--body-text-color-subdued); | |
| font-size: 0.95rem; | |
| margin-top: 6px; | |
| } | |
| .aud-row { | |
| display: none; | |
| align-items: center; | |
| gap: 10px; | |
| background: #0b0b0b; | |
| border-radius: 9999px; | |
| padding: 8px 10px; | |
| } | |
| .aud-player { | |
| flex: 1; | |
| width: 100%; | |
| height: 34px; | |
| border-radius: 9999px; | |
| } | |
| .aud-remove { | |
| appearance: none; | |
| border: none; | |
| background: transparent; | |
| color: rgba(255,255,255); | |
| cursor: pointer; | |
| width: 36px; height: 36px; | |
| border-radius: 9999px; | |
| display: inline-flex; | |
| align-items: center; | |
| justify-content: center; | |
| padding: 0; | |
| transition: background 120ms ease, color 120ms ease, opacity 120ms ease; | |
| opacity: 0.9; | |
| flex: 0 0 auto; | |
| } | |
| .aud-remove:hover { | |
| background: rgba(255,255,255,0.08); | |
| color: rgb(255,255,255); | |
| opacity: 1; | |
| } | |
| .aud-filelabel { | |
| margin: 10px 6px 0; | |
| color: var(--body-text-color-subdued); | |
| font-size: 0.95rem; | |
| display: none; | |
| } | |
| #audio_input_hidden { display: none !important; } | |
| """ | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # SVG icons for resolution dropdown | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| ICON_16_9 = """<svg viewBox="0 0 24 24" fill="none" aria-hidden="true"> | |
| <rect x="3" y="7" width="18" height="10" rx="2" stroke="currentColor" stroke-width="2"/> | |
| </svg>""" | |
| ICON_1_1 = """<svg viewBox="0 0 24 24" fill="none" aria-hidden="true"> | |
| <rect x="6" y="6" width="12" height="12" rx="2" stroke="currentColor" stroke-width="2"/> | |
| </svg>""" | |
| ICON_9_16 = """<svg viewBox="0 0 24 24" fill="none" aria-hidden="true"> | |
| <rect x="7" y="3" width="10" height="18" rx="2" stroke="currentColor" stroke-width="2"/> | |
| </svg>""" | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| # Gradio UI | |
| # ─────────────────────────────────────────────────────────────────────────── | |
| with gr.Blocks(title="LTX-2.3 Video [Turbo]", css=CSS) as demo: | |
| gr.HTML( | |
| """ | |
| <div style="text-align: center;"> | |
| <p style="font-size:16px; display: inline; margin: 0;"> | |
| <strong>LTX-2.3 Video [Turbo]</strong> — 22B DiT audio-video model on free ZeroGPU | |
| </p> | |
| <a href="https://huggingface.co/Lightricks/LTX-2.3" | |
| target="_blank" rel="noopener noreferrer" | |
| style="display: inline-block; vertical-align: middle; margin-left: 0.5em;"> | |
| [model] | |
| </a> | |
| <a href="https://github.com/Lightricks/LTX-2" | |
| target="_blank" rel="noopener noreferrer" | |
| style="display: inline-block; vertical-align: middle; margin-left: 0.5em;"> | |
| [github] | |
| </a> | |
| </div> | |
| <div style="text-align: center; margin-top: 4px;"> | |
| <strong>HF Space by:</strong> | |
| <a href="https://huggingface.co/ZeroCollabs" target="_blank" rel="noopener noreferrer" | |
| style="display: inline-block; vertical-align: middle; margin-left: 0.5em;"> | |
| <img src="https://img.shields.io/badge/%F0%9F%A4%97-Follow%20on%20HF-green.svg" alt="Follow on HF"> | |
| </a> | |
| <a href="https://github.com/ZeroHackz" target="_blank" rel="noopener noreferrer" | |
| style="display: inline-block; vertical-align: middle; margin-left: 0.5em;"> | |
| <img src="https://img.shields.io/badge/GitHub-Follow-181717?logo=github" alt="Follow on GitHub"> | |
| </a> | |
| </div> | |
| """ | |
| ) | |
| with gr.Column(elem_id="col-container"): | |
| # ---- Mode selector ---- | |
| with gr.Row(elem_id="mode-row"): | |
| radioanimated_mode = RadioAnimated( | |
| choices=["Image-to-Video", "Interpolate"], | |
| value="Image-to-Video", | |
| elem_id="radioanimated_mode", | |
| ) | |
| with gr.Row(): | |
| # ---- Left column: controls ---- | |
| with gr.Column(elem_id="step-column"): | |
| with gr.Row(): | |
| first_frame = gr.Image( | |
| label="First Frame (Optional)", | |
| type="filepath", | |
| height=256, | |
| ) | |
| end_frame = gr.Image( | |
| label="Last Frame (Optional)", | |
| type="filepath", | |
| height=256, | |
| visible=False, | |
| ) | |
| # JS relocator: moves duration & resolution dropdowns into prompt footer | |
| relocate = gr.HTML( | |
| value="", | |
| html_template="<div></div>", | |
| js_on_load=r""" | |
| (() => { | |
| function moveIntoFooter() { | |
| const promptRoot = document.querySelector("#prompt_ui"); | |
| if (!promptRoot) return false; | |
| const footer = promptRoot.querySelector(".ds-footer"); | |
| if (!footer) return false; | |
| const dur = document.querySelector("#duration_ui .cd-wrap"); | |
| const res = document.querySelector("#resolution_ui .cd-wrap"); | |
| if (!dur || !res) return false; | |
| footer.appendChild(dur); | |
| footer.appendChild(res); | |
| return true; | |
| } | |
| const tick = () => { | |
| if (!moveIntoFooter()) requestAnimationFrame(tick); | |
| }; | |
| requestAnimationFrame(tick); | |
| })(); | |
| """, | |
| ) | |
| prompt_ui = PromptBox( | |
| value="make this image come alive, cinematic motion, smooth animation", | |
| elem_id="prompt_ui", | |
| ) | |
| # Hidden real audio input (backend value) | |
| audio_input = gr.File( | |
| label="Audio (Optional)", | |
| file_types=["audio"], | |
| type="filepath", | |
| elem_id="audio_input_hidden", | |
| ) | |
| # Custom audio UI that feeds the hidden gr.File | |
| audio_ui = AudioDropUpload( | |
| target_audio_elem_id="audio_input_hidden", | |
| elem_id="audio_ui", | |
| ) | |
| # Hidden prompt textbox (synced from PromptBox) | |
| prompt = gr.Textbox( | |
| label="Prompt", | |
| value="make this image come alive, cinematic motion, smooth animation", | |
| lines=3, | |
| max_lines=3, | |
| visible=False, | |
| ) | |
| enhance_prompt = gr.Checkbox( | |
| label="Enhance Prompt", | |
| value=True, | |
| ) | |
| with gr.Accordion("Advanced Settings", open=False): | |
| seed = gr.Slider( | |
| label="Seed", | |
| minimum=0, | |
| maximum=MAX_SEED, | |
| value=42, | |
| step=1, | |
| ) | |
| randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) | |
| # ---- Right column: output + hidden controls ---- | |
| with gr.Column(elem_id="step-column"): | |
| output_video = gr.Video( | |
| label="Generated Video", autoplay=True, loop=True, height=512 | |
| ) | |
| metadata_display = gr.HTML(value="", elem_id="generation-metadata") | |
| with gr.Row(elem_id="controls-row"): | |
| duration_ui = CameraDropdown( | |
| choices=["6s", "8s", "10s", "12s"], | |
| value="6s", | |
| title="Clip Duration", | |
| elem_id="duration_ui", | |
| ) | |
| duration = gr.Slider( | |
| label="Duration (seconds)", | |
| minimum=6.0, | |
| maximum=12.0, | |
| value=6.0, | |
| step=2.0, | |
| visible=False, | |
| ) | |
| resolution_ui = CameraDropdown( | |
| choices=[ | |
| {"label": "16:9", "value": "16:9", "icon": ICON_16_9}, | |
| {"label": "1:1", "value": "1:1", "icon": ICON_1_1}, | |
| {"label": "9:16", "value": "9:16", "icon": ICON_9_16}, | |
| ], | |
| value="9:16", | |
| title="Resolution", | |
| elem_id="resolution_ui", | |
| ) | |
| width = gr.Number( | |
| label="Width", value=512, precision=0, visible=False | |
| ) | |
| height = gr.Number( | |
| label="Height", value=768, precision=0, visible=False | |
| ) | |
| generate_btn = gr.Button( | |
| "Generate Video", | |
| variant="primary", | |
| elem_classes="button-gradient", | |
| ) | |
| # ──────────────────────────────────────────────────────────────────── | |
| # Event wiring | |
| # ──────────────────────────────────────────────────────────────────── | |
| # Mode selector -> show/hide end_frame | |
| radioanimated_mode.change( | |
| fn=on_mode_change, | |
| inputs=radioanimated_mode, | |
| outputs=[end_frame], | |
| api_visibility="private", | |
| ) | |
| # Duration dropdown -> hidden slider | |
| duration_ui.change( | |
| fn=apply_duration, | |
| inputs=duration_ui, | |
| outputs=[duration], | |
| api_visibility="private", | |
| ) | |
| # Resolution dropdown -> hidden width/height | |
| resolution_ui.change( | |
| fn=apply_resolution, | |
| inputs=resolution_ui, | |
| outputs=[width, height], | |
| api_visibility="private", | |
| ) | |
| # PromptBox -> hidden textbox | |
| prompt_ui.change( | |
| fn=lambda x: x, | |
| inputs=prompt_ui, | |
| outputs=prompt, | |
| api_visibility="private", | |
| ) | |
| # Audio UI clear handler | |
| def on_audio_ui_change(v): | |
| if v == "__CLEAR__" or v is None or v == "": | |
| return None | |
| return gr.update() | |
| audio_ui.change( | |
| fn=on_audio_ui_change, | |
| inputs=audio_ui, | |
| outputs=audio_input, | |
| api_visibility="private", | |
| ) | |
| # Generate button | |
| generate_btn.click( | |
| fn=generate_video, | |
| inputs=[ | |
| first_frame, | |
| end_frame, | |
| prompt, | |
| duration, | |
| radioanimated_mode, | |
| enhance_prompt, | |
| seed, | |
| randomize_seed, | |
| height, | |
| width, | |
| audio_input, | |
| ], | |
| outputs=[output_video, metadata_display], | |
| ) | |
| # ---- Footer ---- | |
| gr.Markdown( | |
| """ | |
| --- | |
| **Notes:** | |
| - ZeroGPU provides limited GPU time per request. Shorter durations are more reliable. | |
| - Max duration is 12 seconds. Longer clips need more GPU time and may be slower. | |
| - FP8 quantization reduces VRAM usage by ~50% with minimal quality impact. | |
| - The 2x spatial upscaler doubles the initial generation resolution. | |
| - This Space uses [google/gemma-3-12b-it-qat-q4_0-unquantized](https://huggingface.co/google/gemma-3-12b-it-qat-q4_0-unquantized) as the text encoder. | |
| If you duplicate this Space, you must first accept the [Gemma license](https://huggingface.co/google/gemma-3-12b-it-qat-q4_0-unquantized) on your HuggingFace account. | |
| Built with [Lightricks/LTX-2.3](https://huggingface.co/Lightricks/LTX-2.3) | |
| | [GitHub](https://github.com/Lightricks/LTX-2) | |
| | Space by [ZeroCollabs](https://huggingface.co/ZeroCollabs) | |
| | [GitHub](https://github.com/ZeroHackz) | |
| <sub>UI inspired by [alexnasa](https://huggingface.co/alexnasa)</sub> | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(show_error=True) | |