Spaces:
Running on Zero
Running on Zero
| import os | |
| import subprocess | |
| import sys | |
| import logging | |
| import random | |
| import tempfile | |
| import uuid | |
| import shutil | |
| from pathlib import Path | |
| # Disable torch.compile / dynamo before any torch import | |
| os.environ["TORCH_COMPILE_DISABLE"] = "1" | |
| os.environ["TORCHDYNAMO_DISABLE"] = "1" | |
| # Runtime dependencies | |
| subprocess.run([sys.executable, "-m", "pip", "install", "xformers==0.0.32.post2", "--no-build-isolation"], check=False) | |
| subprocess.run([ | |
| sys.executable, "-m", "pip", "install", | |
| "dwpose", "onnxruntime-gpu", "imageio[ffmpeg]", "scikit-image", | |
| "opencv-python-headless", "decord", "num2words" | |
| ], check=False) | |
| subprocess.run([sys.executable, "-m", "pip", "install", "num2words"], check=False) | |
| _tv = subprocess.run([sys.executable, "-c", "import torch; print(torch.__version__)"], capture_output=True, text=True) | |
| if _tv.returncode == 0: | |
| _full_ver = _tv.stdout.strip() | |
| _cuda_suffix = _full_ver.split("+")[-1] if "+" in _full_ver else "cu124" | |
| _base_ver = _full_ver.split("+")[0] | |
| print(f"Detected torch {_full_ver}, reinstalling matching torchaudio...") | |
| subprocess.run([ | |
| sys.executable, "-m", "pip", "install", "--force-reinstall", "--no-deps", | |
| f"torchaudio=={_base_ver}", | |
| "--index-url", f"https://download.pytorch.org/whl/{_cuda_suffix}", | |
| ], check=False) | |
| LTX_REPO_URL = "https://github.com/Lightricks/LTX-2.git" | |
| LTX_REPO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "LTX-2") | |
| LTX_COMMIT = "ae855f8538843825f9015a419cf4ba5edaf5eec2" | |
| if os.path.exists(LTX_REPO_DIR): | |
| print(f"Removing existing repo at {LTX_REPO_DIR}...") | |
| shutil.rmtree(LTX_REPO_DIR, ignore_errors=True) | |
| print(f"Cloning {LTX_REPO_URL}...") | |
| subprocess.run(["git", "clone", LTX_REPO_URL, LTX_REPO_DIR], check=True) | |
| print(f"Checking out commit {LTX_COMMIT}...") | |
| subprocess.run(["git", "-C", LTX_REPO_DIR, "checkout", LTX_COMMIT], check=True) | |
| print("Installing ltx-core and ltx-pipelines from pinned repo commit...") | |
| subprocess.run( | |
| [ | |
| sys.executable, "-m", "pip", "install", | |
| "--force-reinstall", "--no-deps", | |
| "-e", os.path.join(LTX_REPO_DIR, "packages", "ltx-core"), | |
| "-e", os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines"), | |
| ], | |
| check=True, | |
| ) | |
| sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines", "src")) | |
| sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-core", "src")) | |
| import cv2 | |
| import imageio | |
| import numpy as np | |
| import spaces | |
| import gradio as gr | |
| import torch | |
| from PIL import Image | |
| from huggingface_hub import hf_hub_download, snapshot_download | |
| from safetensors import safe_open | |
| torch._dynamo.config.suppress_errors = True | |
| torch._dynamo.config.disable = True | |
| from ltx_core.components.diffusion_steps import EulerDiffusionStep | |
| from ltx_core.components.noisers import GaussianNoiser | |
| from ltx_core.conditioning import ( | |
| ConditioningItem, | |
| ConditioningItemAttentionStrengthWrapper, | |
| VideoConditionByReferenceLatent, | |
| ) | |
| from ltx_core.loader import LTXV_LORA_COMFY_RENAMING_MAP, LoraPathStrengthAndSDOps | |
| from ltx_core.model.audio_vae import decode_audio as vae_decode_audio | |
| from ltx_core.model.audio_vae import encode_audio as vae_encode_audio | |
| from ltx_core.model.upsampler import upsample_video | |
| from ltx_core.model.video_vae import TilingConfig, VideoEncoder, get_video_chunks_number | |
| from ltx_core.model.video_vae import decode_video as vae_decode_video | |
| from ltx_core.quantization import QuantizationPolicy | |
| from ltx_core.types import Audio, AudioLatentShape, VideoPixelShape | |
| from ltx_pipelines.utils import ModelLedger, euler_denoising_loop | |
| from ltx_pipelines.utils.args import ImageConditioningInput | |
| from ltx_pipelines.utils.constants import DISTILLED_SIGMA_VALUES, STAGE_2_DISTILLED_SIGMA_VALUES | |
| from ltx_pipelines.utils.helpers import ( | |
| assert_resolution, | |
| cleanup_memory, | |
| combined_image_conditionings, | |
| denoise_audio_video, | |
| denoise_video_only, | |
| encode_prompts, | |
| generate_enhanced_prompt, | |
| get_device, | |
| simple_denoising_func, | |
| ) | |
| from ltx_pipelines.utils.media_io import decode_audio_from_file, encode_video, load_video_conditioning | |
| from ltx_pipelines.utils.types import PipelineComponents | |
| from ltx_core.model.transformer import attention as _attn_mod | |
| print(f"[ATTN] Before patch: memory_efficient_attention={_attn_mod.memory_efficient_attention}") | |
| try: | |
| from xformers.ops import memory_efficient_attention as _mea | |
| _attn_mod.memory_efficient_attention = _mea | |
| print(f"[ATTN] After patch: memory_efficient_attention={_attn_mod.memory_efficient_attention}") | |
| except Exception as e: | |
| print(f"[ATTN] xformers patch FAILED: {type(e).__name__}: {e}") | |
| logging.getLogger().setLevel(logging.INFO) | |
| MAX_SEED = np.iinfo(np.int32).max | |
| DEFAULT_FRAME_RATE = 24.0 | |
| DEFAULT_PROMPT = "Make this image come alive with cinematic motion, smooth animation" | |
| RESOLUTIONS = { | |
| "high": {"16:9": (1536, 1024), "9:16": (1024, 1536), "1:1": (1024, 1024)}, | |
| "low": {"16:9": (768, 512), "9:16": (512, 768), "1:1": (768, 768)}, | |
| } | |
| IC_LORA_OPTIONS = { | |
| "Union Control (Depth + Edge)": { | |
| "repo": "Lightricks/LTX-2.3-22b-IC-LoRA-Union-Control", | |
| "filename": "ltx-2.3-22b-ic-lora-union-control-ref0.5.safetensors", | |
| }, | |
| "Motion Track Control": { | |
| "repo": "Lightricks/LTX-2.3-22b-IC-LoRA-Motion-Track-Control", | |
| "filename": "ltx-2.3-22b-ic-lora-motion-track-control-ref0.5.safetensors", | |
| }, | |
| } | |
| DEFAULT_IC_LORA = "Union Control (Depth + Edge)" | |
| # Motion presets - appended to user prompt | |
| MOTION_PRESETS = { | |
| "None": "", | |
| "πΈ Anime Float": "anime style, character floating gently in air with soft flowing hair and glowing particles, dreamy atmospheric motion", | |
| "β‘ Dynamic Action": "high energy action sequence, fast dynamic movement, speed lines, dramatic anime-style motion blur", | |
| "π Fluid Wave": "smooth fluid motion like waves, gentle swaying movement, flowing fabric and hair, tranquil atmospheric", | |
| "π₯ Dramatic Zoom": "slow dramatic zoom in, cinematic depth of field, film grain, epic atmospheric lighting reveal", | |
| "β¨ Magic Sparkle": "magical sparkle effects, glowing particles swirling, enchanted fantasy atmosphere, soft luminous motion", | |
| "π Emotional Pan": "slow cinematic pan across scene, emotional depth, soft bokeh background, contemplative mood", | |
| "π« Orbit Rotate": "smooth orbital camera movement around subject, 360-degree reveal, depth parallax effect", | |
| "π Moonlit Drift": "peaceful moonlit scene with gentle drifting motion, soft silver light, calm ethereal atmosphere", | |
| "βοΈ Battle Scene": "intense battle sequence with rapid movement, dramatic poses, energy blasts and impact effects", | |
| "πΊ Nature Bloom": "flowers blooming in fast motion, nature coming alive, organic growth animation, vibrant colors", | |
| "π Explosion Burst": "dramatic explosion with shockwave, debris flying, cinematic slow-motion impact, epic scale", | |
| "Custom": "__custom__", | |
| } | |
| PROMPT_PRESETS = { | |
| "π Sci-Fi": "An astronaut hatches from a fragile egg on the surface of the Moon, the shell cracking and peeling apart in gentle low-gravity motion. Fine lunar dust lifts and drifts outward with each movement.", | |
| "π Ocean": "Crystal clear ocean waves crash onto a pristine white sand beach at golden hour. Foam swirls in slow motion around smooth pebbles as the tide retreats.", | |
| "π Urban": "A rain-soaked neon-lit street in a futuristic city at night. Reflections of glowing signs shimmer on the wet pavement as a lone figure walks through pools of violet and cyan light.", | |
| "π Abstract": "Swirling ink clouds dissolve in slow motion through crystal clear water, forming intricate fractal patterns of deep indigo, violet, and gold.", | |
| "πΏ Fantasy": "A glowing forest at twilight where bioluminescent plants pulse softly with ethereal blue and green light. Tiny glowing particles drift through misty air like fireflies.", | |
| "π¬ Animate": "Make this image come alive with smooth, natural cinematic motion. Animate with subtle camera movement, realistic physics, and atmospheric depth.", | |
| } | |
| STYLE_MODIFIERS = { | |
| "None": "", | |
| "π Cinematic 35mm": ", shot on 35mm film, cinematic bokeh, anamorphic lens flare, filmic grain", | |
| "πΈ 8K Ultra Real": ", photorealistic, 8K resolution, ultra-detailed, sharp focus", | |
| "πΈ Anime / Ghibli": ", anime style, Studio Ghibli aesthetic, hand-drawn, warm soft colors", | |
| "π Dark & Moody": ", dark atmospheric lighting, high contrast shadows, noir mood", | |
| "π Dreamy / Surreal": ", dreamy surreal atmosphere, soft glow, painterly, ethereal light", | |
| "πΌ Vintage VHS": ", VHS aesthetic, scanlines, retro 80s color grading, film grain, lo-fi", | |
| } | |
| _pose_processor = None | |
| _depth_processor = None | |
| def log_memory(tag: str): | |
| if torch.cuda.is_available(): | |
| allocated = torch.cuda.memory_allocated() / 1024**3 | |
| peak = torch.cuda.max_memory_allocated() / 1024**3 | |
| free, total = torch.cuda.mem_get_info() | |
| print( | |
| f"[VRAM {tag}] allocated={allocated:.2f}GB peak={peak:.2f}GB " | |
| f"free={free / 1024**3:.2f}GB total={total / 1024**3:.2f}GB" | |
| ) | |
| def _get_pose_processor(): | |
| global _pose_processor | |
| if _pose_processor is None: | |
| from dwpose import DwposeDetector | |
| _pose_processor = DwposeDetector.from_pretrained_default() | |
| print("[Preprocess] DWPose processor loaded") | |
| return _pose_processor | |
| def load_video_frames(video_path: str) -> list[np.ndarray]: | |
| frames = [] | |
| with imageio.get_reader(video_path) as reader: | |
| for frame in reader: | |
| frames.append(frame) | |
| return frames | |
| def write_video_mp4(frames_float_01: list[np.ndarray], fps: float, out_path: str) -> str: | |
| frames_uint8 = [(np.clip(f, 0.0, 1.0) * 255).astype(np.uint8) for f in frames_float_01] | |
| with imageio.get_writer(out_path, fps=fps, macro_block_size=1) as writer: | |
| for fr in frames_uint8: | |
| writer.append_data(fr) | |
| return out_path | |
| def preprocess_video_pose(frames: list[np.ndarray], width: int, height: int) -> list[np.ndarray]: | |
| processor = _get_pose_processor() | |
| result = [] | |
| for frame in frames: | |
| pil = Image.fromarray(frame.astype(np.uint8)).convert("RGB") | |
| pose_img = processor(pil, include_body=True, include_hand=True, include_face=True) | |
| if not isinstance(pose_img, Image.Image): | |
| pose_img = Image.fromarray(np.array(pose_img).astype(np.uint8)) | |
| pose_img = pose_img.convert("RGB").resize((width, height), Image.BILINEAR) | |
| result.append(np.array(pose_img).astype(np.float32) / 255.0) | |
| return result | |
| def preprocess_video_canny(frames: list[np.ndarray], width: int, height: int, low_threshold: int = 50, high_threshold: int = 100) -> list[np.ndarray]: | |
| result = [] | |
| for frame in frames: | |
| resized = cv2.resize(frame, (width, height), interpolation=cv2.INTER_AREA) | |
| gray = cv2.cvtColor(resized, cv2.COLOR_RGB2GRAY) | |
| edges = cv2.Canny(gray, low_threshold, high_threshold) | |
| edges_3ch = np.stack([edges, edges, edges], axis=-1) | |
| result.append(edges_3ch.astype(np.float32) / 255.0) | |
| return result | |
| def preprocess_video_depth(frames: list[np.ndarray], width: int, height: int) -> list[np.ndarray]: | |
| result = [] | |
| for frame in frames: | |
| resized = cv2.resize(frame, (width, height), interpolation=cv2.INTER_AREA) | |
| gray = cv2.cvtColor(resized, cv2.COLOR_RGB2GRAY).astype(np.float32) | |
| lap = np.abs(cv2.Laplacian(gray, cv2.CV_32F, ksize=5)) | |
| lap = lap / (lap.max() + 1e-8) | |
| depth_3ch = np.stack([lap, lap, lap], axis=-1) | |
| result.append(depth_3ch) | |
| return result | |
| def preprocess_conditioning_video(video_path: str, mode: str, width: int, height: int, num_frames: int, fps: float) -> tuple[str, str]: | |
| frames = load_video_frames(video_path) | |
| if not frames: | |
| raise ValueError("No frames decoded from video") | |
| frames = frames[:num_frames] | |
| first_png = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name | |
| Image.fromarray(frames[0]).save(first_png) | |
| if mode == "Pose (DWPose)": | |
| processed = preprocess_video_pose(frames, width, height) | |
| elif mode == "Canny Edge": | |
| processed = preprocess_video_canny(frames, width, height) | |
| elif mode == "Depth (Laplacian)": | |
| processed = preprocess_video_depth(frames, width, height) | |
| else: | |
| processed = [f.astype(np.float32) / 255.0 for f in frames] | |
| cond_mp4 = tempfile.mktemp(suffix=".mp4") | |
| write_video_mp4(processed, fps=fps, out_path=cond_mp4) | |
| return cond_mp4, first_png | |
| def _read_lora_reference_downscale_factor(lora_path: str) -> int: | |
| try: | |
| with safe_open(lora_path, framework="pt") as f: | |
| metadata = f.metadata() or {} | |
| return int(metadata.get("reference_downscale_factor", 1)) | |
| except Exception as e: | |
| logging.warning(f"Failed to read metadata from LoRA file '{lora_path}': {e}") | |
| return 1 | |
| class LTX23UnifiedPipeline: | |
| def __init__( | |
| self, | |
| distilled_checkpoint_path: str, | |
| spatial_upsampler_path: str, | |
| gemma_root: str, | |
| ic_loras: list[LoraPathStrengthAndSDOps] | None = None, | |
| device: torch.device | None = None, | |
| quantization: QuantizationPolicy | None = None, | |
| reference_downscale_factor: int | None = None, | |
| ): | |
| self.device = device or get_device() | |
| self.dtype = torch.bfloat16 | |
| ic_loras = ic_loras or [] | |
| self.has_ic_lora = len(ic_loras) > 0 | |
| self.stage_1_model_ledger = ModelLedger( | |
| dtype=self.dtype, | |
| device=self.device, | |
| checkpoint_path=distilled_checkpoint_path, | |
| spatial_upsampler_path=spatial_upsampler_path, | |
| gemma_root_path=gemma_root, | |
| loras=ic_loras, | |
| quantization=quantization, | |
| ) | |
| if self.has_ic_lora: | |
| self.stage_2_model_ledger = ModelLedger( | |
| dtype=self.dtype, | |
| device=self.device, | |
| checkpoint_path=distilled_checkpoint_path, | |
| spatial_upsampler_path=spatial_upsampler_path, | |
| gemma_root_path=gemma_root, | |
| loras=[], | |
| quantization=quantization, | |
| ) | |
| else: | |
| self.stage_2_model_ledger = self.stage_1_model_ledger | |
| self.pipeline_components = PipelineComponents(dtype=self.dtype, device=self.device) | |
| if reference_downscale_factor is not None: | |
| self.reference_downscale_factor = reference_downscale_factor | |
| else: | |
| self.reference_downscale_factor = 1 | |
| for lora in ic_loras: | |
| scale = _read_lora_reference_downscale_factor(lora.path) | |
| if scale != 1: | |
| if self.reference_downscale_factor not in (1, scale): | |
| raise ValueError( | |
| f"Conflicting reference_downscale_factor: already {self.reference_downscale_factor}, got {scale}" | |
| ) | |
| self.reference_downscale_factor = scale | |
| logging.info(f"[Pipeline] reference_downscale_factor={self.reference_downscale_factor}") | |
| def _create_ic_conditionings( | |
| self, | |
| video_conditioning: list[tuple[str, float]], | |
| height: int, | |
| width: int, | |
| num_frames: int, | |
| video_encoder: VideoEncoder, | |
| conditioning_strength: float = 1.0, | |
| ) -> list[ConditioningItem]: | |
| conditionings: list[ConditioningItem] = [] | |
| scale = self.reference_downscale_factor | |
| ref_height = height // scale | |
| ref_width = width // scale | |
| for video_path, strength in video_conditioning: | |
| video = load_video_conditioning( | |
| video_path=video_path, | |
| height=ref_height, | |
| width=ref_width, | |
| frame_cap=num_frames, | |
| dtype=self.dtype, | |
| device=self.device, | |
| ) | |
| encoded_video = video_encoder(video) | |
| cond = VideoConditionByReferenceLatent(latent=encoded_video, downscale_factor=scale, strength=strength) | |
| if conditioning_strength < 1.0: | |
| cond = ConditioningItemAttentionStrengthWrapper(cond, attention_mask=conditioning_strength) | |
| conditionings.append(cond) | |
| if conditionings: | |
| logging.info(f"[IC-LoRA] Added {len(conditionings)} video conditioning(s)") | |
| return conditionings | |
| def __call__( | |
| self, | |
| prompt: str, | |
| seed: int, | |
| height: int, | |
| width: int, | |
| num_frames: int, | |
| frame_rate: float, | |
| images: list[ImageConditioningInput], | |
| audio_path: str | None = None, | |
| video_conditioning: list[tuple[str, float]] | None = None, | |
| tiling_config: TilingConfig | None = None, | |
| enhance_prompt: bool = False, | |
| conditioning_strength: float = 1.0, | |
| ): | |
| assert_resolution(height=height, width=width, is_two_stage=True) | |
| has_audio = audio_path is not None | |
| has_video_cond = bool(video_conditioning) | |
| generator = torch.Generator(device=self.device).manual_seed(seed) | |
| noiser = GaussianNoiser(generator=generator) | |
| stepper = EulerDiffusionStep() | |
| dtype = torch.bfloat16 | |
| prompt_for_model = prompt | |
| if has_audio: | |
| prompt_for_model = (prompt_for_model + " synchronized lipsync").strip() | |
| (ctx_p,) = encode_prompts( | |
| [prompt_for_model], | |
| self.stage_1_model_ledger, | |
| enhance_first_prompt=enhance_prompt, | |
| enhance_prompt_image=images[0].path if len(images) > 0 else None, | |
| ) | |
| video_context, audio_context = ctx_p.video_encoding, ctx_p.audio_encoding | |
| encoded_audio_latent = None | |
| decoded_audio_for_output = None | |
| if has_audio: | |
| video_duration = num_frames / frame_rate | |
| decoded_audio = decode_audio_from_file(audio_path, self.device, 0.0, video_duration) | |
| if decoded_audio is None: | |
| raise ValueError(f"Could not extract audio stream from {audio_path}") | |
| encoded_audio_latent = vae_encode_audio(decoded_audio, self.stage_1_model_ledger.audio_encoder()) | |
| audio_shape = AudioLatentShape.from_duration(batch=1, duration=video_duration, channels=8, mel_bins=16) | |
| expected_frames = audio_shape.frames | |
| actual_frames = encoded_audio_latent.shape[2] | |
| if actual_frames > expected_frames: | |
| encoded_audio_latent = encoded_audio_latent[:, :, :expected_frames, :] | |
| elif actual_frames < expected_frames: | |
| pad = torch.zeros( | |
| encoded_audio_latent.shape[0], | |
| encoded_audio_latent.shape[1], | |
| expected_frames - actual_frames, | |
| encoded_audio_latent.shape[3], | |
| device=encoded_audio_latent.device, | |
| dtype=encoded_audio_latent.dtype, | |
| ) | |
| encoded_audio_latent = torch.cat([encoded_audio_latent, pad], dim=2) | |
| decoded_audio_for_output = Audio( | |
| waveform=decoded_audio.waveform.squeeze(0), | |
| sampling_rate=decoded_audio.sampling_rate, | |
| ) | |
| video_encoder = self.stage_1_model_ledger.video_encoder() | |
| stage_1_output_shape = VideoPixelShape(batch=1, frames=num_frames, width=width // 2, height=height // 2, fps=frame_rate) | |
| stage_1_conditionings = combined_image_conditionings( | |
| images=images, | |
| height=stage_1_output_shape.height, | |
| width=stage_1_output_shape.width, | |
| video_encoder=video_encoder, | |
| dtype=dtype, | |
| device=self.device, | |
| ) | |
| if has_video_cond: | |
| stage_1_conditionings.extend( | |
| self._create_ic_conditionings( | |
| video_conditioning=video_conditioning, | |
| height=stage_1_output_shape.height, | |
| width=stage_1_output_shape.width, | |
| num_frames=num_frames, | |
| video_encoder=video_encoder, | |
| conditioning_strength=conditioning_strength, | |
| ) | |
| ) | |
| transformer = self.stage_1_model_ledger.transformer() | |
| stage_1_sigmas = torch.tensor(DISTILLED_SIGMA_VALUES, device=self.device) | |
| def denoising_loop(sigmas, video_state, audio_state, stepper): | |
| return euler_denoising_loop( | |
| sigmas=sigmas, | |
| video_state=video_state, | |
| audio_state=audio_state, | |
| stepper=stepper, | |
| denoise_fn=simple_denoising_func( | |
| video_context=video_context, | |
| audio_context=audio_context, | |
| transformer=transformer, | |
| ), | |
| ) | |
| if has_audio: | |
| video_state = denoise_video_only( | |
| output_shape=stage_1_output_shape, | |
| conditionings=stage_1_conditionings, | |
| noiser=noiser, | |
| sigmas=stage_1_sigmas, | |
| stepper=stepper, | |
| denoising_loop_fn=denoising_loop, | |
| components=self.pipeline_components, | |
| dtype=dtype, | |
| device=self.device, | |
| initial_audio_latent=encoded_audio_latent, | |
| ) | |
| audio_state = None | |
| else: | |
| video_state, audio_state = denoise_audio_video( | |
| output_shape=stage_1_output_shape, | |
| conditionings=stage_1_conditionings, | |
| noiser=noiser, | |
| sigmas=stage_1_sigmas, | |
| stepper=stepper, | |
| denoising_loop_fn=denoising_loop, | |
| components=self.pipeline_components, | |
| dtype=dtype, | |
| device=self.device, | |
| ) | |
| torch.cuda.synchronize() | |
| cleanup_memory() | |
| upscaled_video_latent = upsample_video( | |
| latent=video_state.latent[:1], | |
| video_encoder=video_encoder, | |
| upsampler=self.stage_2_model_ledger.spatial_upsampler(), | |
| ) | |
| torch.cuda.synchronize() | |
| cleanup_memory() | |
| transformer_s2 = self.stage_2_model_ledger.transformer() | |
| stage_2_sigmas = torch.tensor(STAGE_2_DISTILLED_SIGMA_VALUES, device=self.device) | |
| def denoising_loop_s2(sigmas, video_state, audio_state, stepper): | |
| return euler_denoising_loop( | |
| sigmas=sigmas, | |
| video_state=video_state, | |
| audio_state=audio_state, | |
| stepper=stepper, | |
| denoise_fn=simple_denoising_func( | |
| video_context=video_context, | |
| audio_context=audio_context, | |
| transformer=transformer_s2, | |
| ), | |
| ) | |
| stage_2_output_shape = VideoPixelShape(batch=1, frames=num_frames, width=width, height=height, fps=frame_rate) | |
| stage_2_conditionings = combined_image_conditionings( | |
| images=images, | |
| height=stage_2_output_shape.height, | |
| width=stage_2_output_shape.width, | |
| video_encoder=video_encoder, | |
| dtype=dtype, | |
| device=self.device, | |
| ) | |
| if has_audio: | |
| video_state = denoise_video_only( | |
| output_shape=stage_2_output_shape, | |
| conditionings=stage_2_conditionings, | |
| noiser=noiser, | |
| sigmas=stage_2_sigmas, | |
| stepper=stepper, | |
| denoising_loop_fn=denoising_loop_s2, | |
| components=self.pipeline_components, | |
| dtype=dtype, | |
| device=self.device, | |
| noise_scale=stage_2_sigmas[0], | |
| initial_video_latent=upscaled_video_latent, | |
| initial_audio_latent=encoded_audio_latent, | |
| ) | |
| audio_state = None | |
| else: | |
| video_state, audio_state = denoise_audio_video( | |
| output_shape=stage_2_output_shape, | |
| conditionings=stage_2_conditionings, | |
| noiser=noiser, | |
| sigmas=stage_2_sigmas, | |
| stepper=stepper, | |
| denoising_loop_fn=denoising_loop_s2, | |
| components=self.pipeline_components, | |
| dtype=dtype, | |
| device=self.device, | |
| noise_scale=stage_2_sigmas[0], | |
| initial_video_latent=upscaled_video_latent, | |
| initial_audio_latent=audio_state.latent, | |
| ) | |
| torch.cuda.synchronize() | |
| del transformer, transformer_s2, video_encoder | |
| cleanup_memory() | |
| decoded_video = vae_decode_video( | |
| video_state.latent, | |
| self.stage_2_model_ledger.video_decoder(), | |
| tiling_config, | |
| generator, | |
| ) | |
| if has_audio: | |
| output_audio = decoded_audio_for_output | |
| else: | |
| output_audio = vae_decode_audio( | |
| audio_state.latent, | |
| self.stage_2_model_ledger.audio_decoder(), | |
| self.stage_2_model_ledger.vocoder(), | |
| ) | |
| return decoded_video, output_audio | |
| LTX_MODEL_REPO = "Lightricks/LTX-2.3" | |
| CHECKPOINT_REPO = "linoyts/ltx-2.3-22b-distilled-1.1-fused-union-control" | |
| GEMMA_REPO = "google/gemma-3-12b-it-qat-q4_0-unquantized" | |
| print("=" * 80) | |
| print("Downloading LTX-2.3 distilled model + Gemma + IC-LoRA...") | |
| print("=" * 80) | |
| checkpoint_path = hf_hub_download( | |
| repo_id=CHECKPOINT_REPO, | |
| filename="ltx-2.3-22b-distilled-1.1-fused-union-control.safetensors", | |
| ) | |
| spatial_upsampler_path = hf_hub_download( | |
| repo_id=LTX_MODEL_REPO, | |
| filename="ltx-2.3-spatial-upscaler-x2-1.1.safetensors", | |
| ) | |
| gemma_root = snapshot_download(repo_id=GEMMA_REPO) | |
| default_lora_info = IC_LORA_OPTIONS[DEFAULT_IC_LORA] | |
| default_ic_lora_path = hf_hub_download( | |
| repo_id=default_lora_info["repo"], | |
| filename=default_lora_info["filename"], | |
| ) | |
| print(f"Checkpoint: {checkpoint_path}") | |
| print(f"Spatial upsampler: {spatial_upsampler_path}") | |
| print(f"Gemma root: {gemma_root}") | |
| print(f"IC-LoRA: {default_ic_lora_path}") | |
| pipeline = LTX23UnifiedPipeline( | |
| distilled_checkpoint_path=checkpoint_path, | |
| spatial_upsampler_path=spatial_upsampler_path, | |
| gemma_root=gemma_root, | |
| quantization=QuantizationPolicy.fp8_cast(), | |
| reference_downscale_factor=2, | |
| ) | |
| print("Preloading all models...") | |
| _ledger_1 = pipeline.stage_1_model_ledger | |
| _ledger_2 = pipeline.stage_2_model_ledger | |
| _shared = _ledger_1 is _ledger_2 | |
| _s1_transformer = _ledger_1.transformer() | |
| _s1_video_encoder = _ledger_1.video_encoder() | |
| _s1_text_encoder = _ledger_1.text_encoder() | |
| _s1_embeddings = _ledger_1.gemma_embeddings_processor() | |
| _s1_audio_encoder = _ledger_1.audio_encoder() | |
| _ledger_1.transformer = lambda: _s1_transformer | |
| _ledger_1.video_encoder = lambda: _s1_video_encoder | |
| _ledger_1.text_encoder = lambda: _s1_text_encoder | |
| _ledger_1.gemma_embeddings_processor = lambda: _s1_embeddings | |
| _ledger_1.audio_encoder = lambda: _s1_audio_encoder | |
| if _shared: | |
| _video_decoder = _ledger_1.video_decoder() | |
| _audio_decoder = _ledger_1.audio_decoder() | |
| _vocoder = _ledger_1.vocoder() | |
| _spatial_upsampler = _ledger_1.spatial_upsampler() | |
| _ledger_1.video_decoder = lambda: _video_decoder | |
| _ledger_1.audio_decoder = lambda: _audio_decoder | |
| _ledger_1.vocoder = lambda: _vocoder | |
| _ledger_1.spatial_upsampler = lambda: _spatial_upsampler | |
| else: | |
| _s2_transformer = _ledger_2.transformer() | |
| _s2_video_encoder = _ledger_2.video_encoder() | |
| _s2_video_decoder = _ledger_2.video_decoder() | |
| _s2_audio_decoder = _ledger_2.audio_decoder() | |
| _s2_vocoder = _ledger_2.vocoder() | |
| _s2_spatial_upsampler = _ledger_2.spatial_upsampler() | |
| _s2_text_encoder = _ledger_2.text_encoder() | |
| _s2_embeddings = _ledger_2.gemma_embeddings_processor() | |
| _s2_audio_encoder = _ledger_2.audio_encoder() | |
| _ledger_2.transformer = lambda: _s2_transformer | |
| _ledger_2.video_encoder = lambda: _s2_video_encoder | |
| _ledger_2.video_decoder = lambda: _s2_video_decoder | |
| _ledger_2.audio_decoder = lambda: _s2_audio_decoder | |
| _ledger_2.vocoder = lambda: _s2_vocoder | |
| _ledger_2.spatial_upsampler = lambda: _s2_spatial_upsampler | |
| _ledger_2.text_encoder = lambda: _s2_text_encoder | |
| _ledger_2.gemma_embeddings_processor = lambda: _s2_embeddings | |
| _ledger_2.audio_encoder = lambda: _s2_audio_encoder | |
| print("All models preloaded!") | |
| print("=" * 80) | |
| def detect_aspect_ratio_from_media(*media_candidates) -> str: | |
| for media in media_candidates: | |
| if media is None: | |
| continue | |
| if hasattr(media, "size") and not isinstance(media, (str, Path)): | |
| try: | |
| w, h = media.size | |
| ratio = w / h | |
| candidates = {"16:9": 16 / 9, "9:16": 9 / 16, "1:1": 1.0} | |
| return min(candidates, key=lambda k: abs(ratio - candidates[k])) | |
| except Exception: | |
| pass | |
| media_str = str(media) | |
| ext = media_str.lower().rsplit(".", 1)[-1] if "." in media_str else "" | |
| if ext in ("jpg", "jpeg", "png", "bmp", "webp", "gif", "tiff"): | |
| try: | |
| with Image.open(media_str) as img: | |
| w, h = img.size | |
| except Exception: | |
| continue | |
| else: | |
| try: | |
| import av | |
| with av.open(media_str) as container: | |
| stream = container.streams.video[0] | |
| w, h = stream.codec_context.width, stream.codec_context.height | |
| except Exception: | |
| continue | |
| ratio = w / h | |
| candidates = {"16:9": 16 / 9, "9:16": 9 / 16, "1:1": 1.0} | |
| return min(candidates, key=lambda k: abs(ratio - candidates[k])) | |
| return "16:9" | |
| def update_resolution(first_image, last_image, input_video, high_res): | |
| aspect = detect_aspect_ratio_from_media(first_image, last_image, input_video) | |
| tier = "high" if high_res else "low" | |
| w, h = RESOLUTIONS[tier][aspect] | |
| return gr.update(value=w), gr.update(value=h) | |
| def update_mode_visibility(ui_mode: str): | |
| is_interpolate = ui_mode == "Interpolate" | |
| is_motion = ui_mode == "Motion Control" | |
| return ( | |
| gr.update(visible=is_interpolate), | |
| gr.update(visible=is_motion), | |
| gr.update(visible=is_motion), | |
| gr.update(visible=not is_motion), | |
| ) | |
| def apply_duration_choice(duration_value: str): | |
| mapping = {"3s": 3.0, "5s": 5.0, "10s": 10.0, "15s": 15.0} | |
| return mapping.get(duration_value, 3.0) | |
| def choose_duration_option(seconds: float | None): | |
| if seconds is None: | |
| return gr.update(value="3s") | |
| if seconds <= 3: | |
| return gr.update(value="3s") | |
| if seconds <= 5: | |
| return gr.update(value="5s") | |
| if seconds <= 10: | |
| return gr.update(value="10s") | |
| return gr.update(value="15s") | |
| def _get_video_duration(video_path) -> float | None: | |
| if video_path is None: | |
| return None | |
| try: | |
| result = subprocess.run( | |
| [ | |
| "ffprobe", "-v", "error", "-select_streams", "v:0", | |
| "-show_entries", "format=duration", "-of", "default=nw=1:nk=1", str(video_path), | |
| ], | |
| capture_output=True, | |
| text=True, | |
| ) | |
| return float(result.stdout.strip()) | |
| except Exception: | |
| return None | |
| def on_video_upload(input_video, first_image, last_image, high_res): | |
| w_update, h_update = update_resolution(first_image, last_image, input_video, high_res) | |
| vid_dur = _get_video_duration(input_video) | |
| dur = round(min(vid_dur, 15.0), 1) if vid_dur is not None else 3.0 | |
| return w_update, h_update, choose_duration_option(dur), dur | |
| def apply_prompt_preset(preset_name: str) -> str: | |
| """Return the full prompt for a given preset key.""" | |
| return PROMPT_PRESETS.get(preset_name, "") | |
| def apply_style_modifier(style_name: str, current_prompt: str) -> str: | |
| """Append style modifier to current prompt.""" | |
| suffix = STYLE_MODIFIERS.get(style_name, "") | |
| base = current_prompt.strip() | |
| if not suffix: | |
| return base | |
| # Remove any existing style suffix before appending new one | |
| for s in STYLE_MODIFIERS.values(): | |
| if s and base.endswith(s.strip(",")): | |
| base = base[: -len(s.strip(","))].rstrip(", ") | |
| return base + suffix | |
| def on_motion_preset_to_prompt(preset_name: str, custom_motion: str) -> tuple: | |
| """Write motion preset text directly into prompt box; show custom input if needed.""" | |
| show_custom = preset_name == "Custom" | |
| if preset_name == "None": | |
| return gr.update(visible=False), gr.update() | |
| if preset_name == "Custom": | |
| text = custom_motion.strip() if custom_motion else "" | |
| else: | |
| text = MOTION_PRESETS.get(preset_name, "") | |
| return gr.update(visible=show_custom), gr.update(value=text) if text else gr.update() | |
| def on_custom_motion_change(custom_text: str, preset_name: str) -> object: | |
| """Live-update prompt when user types in custom motion box.""" | |
| if preset_name == "Custom": | |
| return gr.update(value=custom_text.strip()) | |
| return gr.update() | |
| def apply_motion_preset(preset_name: str, current_prompt: str, custom_motion: str = "") -> str: | |
| """Append selected motion preset text to the current prompt.""" | |
| if preset_name == "None": | |
| return (current_prompt or "").strip() | |
| if preset_name == "Custom": | |
| extra = (custom_motion or "").strip() | |
| else: | |
| extra = (MOTION_PRESETS.get(preset_name, "") or "").strip() | |
| base = (current_prompt or "").strip() | |
| if not extra: | |
| return base | |
| if base: | |
| return f"{base}, {extra}" | |
| return extra | |
| def _extract_audio_from_video(video_path: str) -> str | None: | |
| out_path = tempfile.mktemp(suffix=".wav") | |
| try: | |
| probe = subprocess.run( | |
| [ | |
| "ffprobe", "-v", "error", "-select_streams", "a:0", | |
| "-show_entries", "stream=codec_type", "-of", "default=nw=1:nk=1", video_path, | |
| ], | |
| capture_output=True, | |
| text=True, | |
| ) | |
| if not probe.stdout.strip(): | |
| return None | |
| subprocess.run( | |
| [ | |
| "ffmpeg", "-y", "-v", "error", "-i", video_path, | |
| "-vn", "-ac", "2", "-ar", "48000", "-c:a", "pcm_s16le", out_path, | |
| ], | |
| check=True, | |
| ) | |
| return out_path | |
| except (subprocess.CalledProcessError, FileNotFoundError): | |
| return None | |
| def get_duration( | |
| prompt, first_image, last_image, input_video, input_audio, | |
| ui_mode, clip_duration, video_preprocess, conditioning_strength, | |
| enhance_prompt, use_video_audio, seed, randomize_seed, height, width, | |
| motion_preset, custom_motion, progress, | |
| ): | |
| if ui_mode in ("Image-to-Video", "Interpolate"): | |
| return 60 | |
| extra_time = 0 | |
| if input_audio is not None: | |
| extra_time += 10 | |
| if input_video is not None: | |
| extra_time += 60 | |
| if clip_duration <= 3: | |
| return 60 + extra_time | |
| elif clip_duration <= 5: | |
| return 80 + extra_time | |
| elif clip_duration <= 10: | |
| return 120 + extra_time | |
| else: | |
| return 180 + extra_time | |
| def generate_video( | |
| prompt: str, | |
| first_image=None, | |
| last_image=None, | |
| input_video=None, | |
| input_audio=None, | |
| ui_mode="Image-to-Video", | |
| clip_duration=3, | |
| video_preprocess="Raw (no preprocessing)", | |
| conditioning_strength=0.85, | |
| enhance_prompt=False, | |
| use_video_audio=True, | |
| seed=42, | |
| randomize_seed=True, | |
| height=512, | |
| width=768, | |
| motion_preset="None", | |
| custom_motion="", | |
| progress=gr.Progress(track_tqdm=True), | |
| ): | |
| current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed) | |
| # Apply motion preset | |
| final_prompt = apply_motion_preset(motion_preset, prompt, custom_motion) | |
| if ui_mode == "Image-to-Video": | |
| last_image = None | |
| input_video = None | |
| elif ui_mode == "Interpolate": | |
| input_video = None | |
| elif ui_mode == "Motion Control": | |
| last_image = None | |
| try: | |
| torch.cuda.reset_peak_memory_stats() | |
| log_memory("start") | |
| frame_rate = DEFAULT_FRAME_RATE | |
| num_frames = int(clip_duration * frame_rate) + 1 | |
| num_frames = ((num_frames - 1 + 7) // 8) * 8 + 1 | |
| images: list[ImageConditioningInput] = [] | |
| if first_image is not None: | |
| if hasattr(first_image, "save"): | |
| first_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name | |
| first_image.save(first_path) | |
| first_path = str(first_path) | |
| else: | |
| first_path = str(first_image) | |
| images.append(ImageConditioningInput(path=first_path, frame_idx=0, strength=1.0)) | |
| if last_image is not None: | |
| if hasattr(last_image, "save"): | |
| last_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name | |
| last_image.save(last_path) | |
| last_path = str(last_path) | |
| else: | |
| last_path = str(last_image) | |
| images.append(ImageConditioningInput(path=last_path, frame_idx=num_frames - 1, strength=1.0)) | |
| video_conditioning = None | |
| if input_video is not None: | |
| video_path = str(input_video) | |
| if video_preprocess != "Raw (no preprocessing)": | |
| cond_mp4, first_frame_png = preprocess_conditioning_video( | |
| video_path=video_path, | |
| mode=video_preprocess, | |
| width=int(width) // 2, | |
| height=int(height) // 2, | |
| num_frames=num_frames, | |
| fps=frame_rate, | |
| ) | |
| video_conditioning = [(cond_mp4, 1.0)] | |
| if not images: | |
| images.append(ImageConditioningInput(path=first_frame_png, frame_idx=0, strength=1.0)) | |
| else: | |
| video_conditioning = [(video_path, 1.0)] | |
| if input_audio is None and use_video_audio: | |
| extracted_audio = _extract_audio_from_video(video_path) | |
| if extracted_audio is not None: | |
| input_audio = extracted_audio | |
| if not final_prompt or not final_prompt.strip(): | |
| final_prompt = DEFAULT_PROMPT | |
| tiling_config = TilingConfig.default() | |
| video_chunks_number = get_video_chunks_number(num_frames, tiling_config) | |
| video, audio = pipeline( | |
| prompt=final_prompt, | |
| seed=current_seed, | |
| height=int(height), | |
| width=int(width), | |
| num_frames=num_frames, | |
| frame_rate=frame_rate, | |
| images=images, | |
| audio_path=input_audio, | |
| video_conditioning=video_conditioning, | |
| tiling_config=tiling_config, | |
| enhance_prompt=enhance_prompt, | |
| conditioning_strength=conditioning_strength, | |
| ) | |
| output_path = tempfile.mktemp(suffix=".mp4") | |
| encode_video( | |
| video=video, | |
| fps=frame_rate, | |
| audio=audio, | |
| output_path=output_path, | |
| video_chunks_number=video_chunks_number, | |
| ) | |
| log_memory("done") | |
| return str(output_path), current_seed | |
| except Exception as e: | |
| import traceback | |
| log_memory("error") | |
| print(f"Error: {str(e)}\n{traceback.format_exc()}") | |
| return None, current_seed | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # CSS β Purple Space Star Theme | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| css = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700;900&family=Rajdhani:wght@300;400;600;700&display=swap'); | |
| /* βββ Reset / Base βββ */ | |
| *, *::before, *::after { box-sizing: border-box; } | |
| :root { | |
| --bg-deep: #04020f; | |
| --bg-mid: #08041a; | |
| --bg-card: #0d0820; | |
| --bg-card2: #120d28; | |
| --border: rgba(138, 43, 226, 0.25); | |
| --border-bright: rgba(180, 100, 255, 0.5); | |
| --purple-1: #8a2be2; | |
| --purple-2: #b45fff; | |
| --purple-3: #d4aaff; | |
| --purple-glow: rgba(138, 43, 226, 0.4); | |
| --cyan-acc: #00e5ff; | |
| --pink-acc: #ff6ec7; | |
| --text-primary: #e8d8ff; | |
| --text-muted: rgba(200, 170, 255, 0.55); | |
| --radius-lg: 16px; | |
| --radius-pill: 9999px; | |
| --font-display: 'Orbitron', sans-serif; | |
| --font-body: 'Rajdhani', sans-serif; | |
| } | |
| /* βββ Starfield Background βββ */ | |
| body, .gradio-container { | |
| background: var(--bg-deep) !important; | |
| font-family: var(--font-body) !important; | |
| color: var(--text-primary) !important; | |
| min-height: 100vh; | |
| position: relative; | |
| overflow-x: hidden; | |
| } | |
| .gradio-container::before { | |
| content: ''; | |
| position: fixed; | |
| inset: 0; | |
| background: | |
| radial-gradient(ellipse 80% 50% at 20% 10%, rgba(90,20,180,0.18) 0%, transparent 60%), | |
| radial-gradient(ellipse 60% 40% at 80% 80%, rgba(0,229,255,0.07) 0%, transparent 55%), | |
| radial-gradient(ellipse 50% 60% at 50% 50%, rgba(138,43,226,0.06) 0%, transparent 70%); | |
| pointer-events: none; | |
| z-index: 0; | |
| } | |
| /* Animated stars */ | |
| .gradio-container::after { | |
| content: ''; | |
| position: fixed; | |
| inset: 0; | |
| background-image: | |
| radial-gradient(1px 1px at 10% 15%, rgba(255,255,255,0.9) 0%, transparent 100%), | |
| radial-gradient(1px 1px at 25% 35%, rgba(255,255,255,0.6) 0%, transparent 100%), | |
| radial-gradient(1.5px 1.5px at 40% 8%, rgba(200,150,255,0.8) 0%, transparent 100%), | |
| radial-gradient(1px 1px at 55% 60%, rgba(255,255,255,0.7) 0%, transparent 100%), | |
| radial-gradient(1px 1px at 70% 25%, rgba(180,100,255,0.9) 0%, transparent 100%), | |
| radial-gradient(1.5px 1.5px at 85% 45%, rgba(255,255,255,0.5) 0%, transparent 100%), | |
| radial-gradient(1px 1px at 15% 75%, rgba(0,229,255,0.8) 0%, transparent 100%), | |
| radial-gradient(1px 1px at 90% 10%, rgba(255,255,255,0.6) 0%, transparent 100%), | |
| radial-gradient(1px 1px at 35% 88%, rgba(255,255,255,0.7) 0%, transparent 100%), | |
| radial-gradient(1.5px 1.5px at 60% 92%, rgba(180,100,255,0.6) 0%, transparent 100%), | |
| radial-gradient(1px 1px at 78% 68%, rgba(255,255,255,0.8) 0%, transparent 100%), | |
| radial-gradient(1px 1px at 5% 50%, rgba(0,229,255,0.5) 0%, transparent 100%), | |
| radial-gradient(1px 1px at 48% 42%, rgba(255,255,255,0.4) 0%, transparent 100%), | |
| radial-gradient(1px 1px at 92% 82%, rgba(200,150,255,0.7) 0%, transparent 100%), | |
| radial-gradient(1.5px 1.5px at 22% 55%, rgba(255,255,255,0.5) 0%, transparent 100%); | |
| pointer-events: none; | |
| z-index: 0; | |
| animation: starTwinkle 6s ease-in-out infinite alternate; | |
| } | |
| @keyframes starTwinkle { | |
| 0% { opacity: 0.6; } | |
| 50% { opacity: 1; } | |
| 100% { opacity: 0.7; } | |
| } | |
| /* βββ Gradio overrides βββ */ | |
| .gradio-container > * { position: relative; z-index: 1; } | |
| footer { display: none !important; } | |
| .tabs > .tab-nav { display: none !important; } | |
| /* Hide default Gradio chrome */ | |
| .app.svelte-182fdeq.svelte-182fdeq { padding: 0 !important; } | |
| /* Blocks */ | |
| .block, .form, .gap, .contain { | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| .gr-padded { padding: 0 !important; } | |
| /* βββ Page wrapper βββ */ | |
| #ltx-root { | |
| max-width: 1300px; | |
| margin: 0 auto; | |
| padding: 24px 20px 60px; | |
| } | |
| /* βββ Header βββ */ | |
| #ltx-header { | |
| text-align: center; | |
| padding: 40px 0 32px; | |
| position: relative; | |
| } | |
| .ltx-logo-text { | |
| font-family: var(--font-display); | |
| font-size: clamp(32px, 5vw, 56px); | |
| font-weight: 900; | |
| letter-spacing: 0.08em; | |
| background: linear-gradient(135deg, #b45fff 0%, #e8d8ff 40%, #00e5ff 80%, #ff6ec7 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| background-clip: text; | |
| text-shadow: none; | |
| filter: drop-shadow(0 0 30px rgba(180,95,255,0.5)); | |
| animation: logoGlow 3s ease-in-out infinite alternate; | |
| } | |
| @keyframes logoGlow { | |
| from { filter: drop-shadow(0 0 20px rgba(180,95,255,0.4)); } | |
| to { filter: drop-shadow(0 0 45px rgba(0,229,255,0.5)); } | |
| } | |
| .ltx-tagline { | |
| font-family: var(--font-body); | |
| font-size: 15px; | |
| font-weight: 400; | |
| color: var(--text-muted); | |
| letter-spacing: 0.12em; | |
| text-transform: uppercase; | |
| margin-top: 8px; | |
| } | |
| .ltx-site-link { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 6px; | |
| margin-top: 14px; | |
| padding: 7px 18px; | |
| border-radius: var(--radius-pill); | |
| border: 1px solid var(--border-bright); | |
| background: rgba(138,43,226,0.12); | |
| color: var(--purple-3); | |
| font-family: var(--font-body); | |
| font-size: 13px; | |
| font-weight: 600; | |
| letter-spacing: 0.06em; | |
| text-decoration: none; | |
| transition: all 0.25s ease; | |
| } | |
| .ltx-site-link:hover { | |
| background: rgba(138,43,226,0.28); | |
| border-color: var(--purple-2); | |
| color: #fff; | |
| box-shadow: 0 0 18px var(--purple-glow); | |
| transform: translateY(-1px); | |
| } | |
| /* Nebula decoration */ | |
| .ltx-nebula { | |
| position: absolute; | |
| top: -30px; left: 50%; | |
| transform: translateX(-50%); | |
| width: 500px; height: 180px; | |
| background: radial-gradient(ellipse at center, rgba(138,43,226,0.15) 0%, transparent 70%); | |
| pointer-events: none; | |
| } | |
| /* βββ Mode Selector βββ */ | |
| #mode-selector-wrap { | |
| display: flex; | |
| justify-content: center; | |
| margin-bottom: 28px; | |
| } | |
| .mode-pill-group { | |
| display: inline-flex; | |
| background: rgba(10,5,25,0.8); | |
| border: 1px solid var(--border); | |
| border-radius: var(--radius-pill); | |
| padding: 5px; | |
| gap: 2px; | |
| backdrop-filter: blur(12px); | |
| } | |
| .mode-pill { | |
| position: relative; | |
| padding: 10px 24px; | |
| border-radius: var(--radius-pill); | |
| font-family: var(--font-body); | |
| font-size: 14px; | |
| font-weight: 700; | |
| letter-spacing: 0.05em; | |
| color: var(--text-muted); | |
| cursor: pointer; | |
| border: none; | |
| background: transparent; | |
| transition: color 0.2s ease; | |
| user-select: none; | |
| white-space: nowrap; | |
| } | |
| .mode-pill.active { | |
| color: #04020f; | |
| background: linear-gradient(135deg, var(--purple-2), var(--cyan-acc)); | |
| box-shadow: 0 0 20px rgba(180,95,255,0.4); | |
| } | |
| .mode-pill:not(.active):hover { color: var(--purple-3); } | |
| /* βββ Luminous Radio Pills (all groups) βββ */ | |
| /* Kill the default Gradio radio layout */ | |
| #mode-radio .wrap, | |
| #preset-radio .wrap, | |
| #duration-radio .wrap, | |
| #preprocess-radio .wrap, | |
| #style-radio .wrap, | |
| #prompt-preset-radio .wrap { | |
| display: flex !important; | |
| flex-wrap: wrap !important; | |
| gap: 8px !important; | |
| background: transparent !important; | |
| border: none !important; | |
| padding: 4px 0 !important; | |
| } | |
| /* Hide the actual radio circle */ | |
| #mode-radio input[type="radio"], | |
| #preset-radio input[type="radio"], | |
| #duration-radio input[type="radio"], | |
| #preprocess-radio input[type="radio"], | |
| #style-radio input[type="radio"], | |
| #prompt-preset-radio input[type="radio"] { | |
| display: none !important; | |
| } | |
| /* ββ Base pill style ββ */ | |
| #mode-radio label, | |
| #preset-radio label, | |
| #duration-radio label, | |
| #preprocess-radio label, | |
| #style-radio label, | |
| #prompt-preset-radio label { | |
| display: inline-flex !important; | |
| align-items: center !important; | |
| justify-content: center !important; | |
| gap: 5px !important; | |
| padding: 8px 16px !important; | |
| border-radius: 9999px !important; | |
| border: 1.5px solid rgba(138,43,226,0.28) !important; | |
| background: rgba(8,4,20,0.8) !important; | |
| color: rgba(190,160,255,0.6) !important; | |
| font-family: 'Rajdhani', sans-serif !important; | |
| font-size: 13px !important; | |
| font-weight: 700 !important; | |
| letter-spacing: 0.03em !important; | |
| cursor: pointer !important; | |
| transition: border-color 0.18s ease, color 0.18s ease, | |
| box-shadow 0.18s ease, transform 0.15s ease, | |
| background 0.18s ease !important; | |
| text-transform: none !important; | |
| white-space: nowrap !important; | |
| position: relative !important; | |
| user-select: none !important; | |
| -webkit-user-select: none !important; | |
| } | |
| /* ββ Hover β subtle lift ββ */ | |
| #mode-radio label:hover, | |
| #preset-radio label:hover, | |
| #duration-radio label:hover, | |
| #preprocess-radio label:hover, | |
| #style-radio label:hover, | |
| #prompt-preset-radio label:hover { | |
| border-color: rgba(180,100,255,0.55) !important; | |
| color: rgba(220,190,255,0.9) !important; | |
| box-shadow: 0 0 12px rgba(138,43,226,0.18) !important; | |
| transform: translateY(-1px) !important; | |
| } | |
| /* ββ SELECTED STATE β unmistakably obvious ββ */ | |
| /* checkmark prepended via ::before on the label itself */ | |
| #mode-radio input:checked + label::before, | |
| #preset-radio input:checked + label::before, | |
| #duration-radio input:checked + label::before, | |
| #preprocess-radio input:checked + label::before, | |
| #style-radio input:checked + label::before, | |
| #prompt-preset-radio input:checked + label::before, | |
| #mode-radio label:has(input:checked)::before, | |
| #preset-radio label:has(input:checked)::before, | |
| #duration-radio label:has(input:checked)::before, | |
| #preprocess-radio label:has(input:checked)::before, | |
| #style-radio label:has(input:checked)::before, | |
| #prompt-preset-radio label:has(input:checked)::before, | |
| #mode-radio label.selected::before, | |
| #preset-radio label.selected::before, | |
| #duration-radio label.selected::before, | |
| #preprocess-radio label.selected::before, | |
| #style-radio label.selected::before, | |
| #prompt-preset-radio label.selected::before { | |
| content: 'β' !important; | |
| font-size: 12px !important; | |
| font-weight: 900 !important; | |
| line-height: 1 !important; | |
| flex-shrink: 0 !important; | |
| } | |
| /* Pulse ring animation */ | |
| @keyframes pillPulse { | |
| 0% { box-shadow: 0 0 0 0 rgba(180,100,255,0.6), 0 0 20px rgba(138,43,226,0.4); } | |
| 50% { box-shadow: 0 0 0 5px rgba(180,100,255,0), 0 0 30px rgba(138,43,226,0.25); } | |
| 100% { box-shadow: 0 0 0 0 rgba(180,100,255,0), 0 0 20px rgba(138,43,226,0.4); } | |
| } | |
| /* Bounce in on select */ | |
| @keyframes pillBounce { | |
| 0% { transform: scale(1); } | |
| 40% { transform: scale(1.10) translateY(-2px); } | |
| 70% { transform: scale(0.97); } | |
| 100% { transform: scale(1) translateY(-1px); } | |
| } | |
| /* Base selected */ | |
| #mode-radio input:checked + label, | |
| #preset-radio input:checked + label, | |
| #duration-radio input:checked + label, | |
| #preprocess-radio input:checked + label, | |
| #style-radio input:checked + label, | |
| #prompt-preset-radio input:checked + label, | |
| #mode-radio label:has(input:checked), | |
| #preset-radio label:has(input:checked), | |
| #duration-radio label:has(input:checked), | |
| #preprocess-radio label:has(input:checked), | |
| #style-radio label:has(input:checked), | |
| #prompt-preset-radio label:has(input:checked), | |
| #mode-radio label.selected, | |
| #preset-radio label.selected, | |
| #duration-radio label.selected, | |
| #preprocess-radio label.selected, | |
| #style-radio label.selected, | |
| #prompt-preset-radio label.selected { | |
| background: linear-gradient(135deg, | |
| rgba(107,32,200,0.65) 0%, | |
| rgba(154,64,224,0.55) 50%, | |
| rgba(0,188,212,0.3) 100%) !important; | |
| border-color: var(--purple-2) !important; | |
| border-width: 2px !important; | |
| color: #fff !important; | |
| font-weight: 900 !important; | |
| text-shadow: 0 0 10px rgba(220,180,255,0.8) !important; | |
| animation: pillBounce 0.32s ease forwards, pillPulse 2s ease 0.32s infinite !important; | |
| transform: translateY(-1px) !important; | |
| } | |
| /* ββ Per-group selected color themes ββ */ | |
| /* Duration β cyan */ | |
| #duration-radio label { padding: 9px 22px !important; font-size: 14px !important; } | |
| #duration-radio input:checked + label, | |
| #duration-radio label:has(input:checked), | |
| #duration-radio label.selected { | |
| background: linear-gradient(135deg, rgba(0,180,200,0.55), rgba(0,100,180,0.45)) !important; | |
| border-color: #00e5ff !important; | |
| box-shadow: 0 0 0 2px rgba(0,229,255,0.25), 0 0 22px rgba(0,229,255,0.45) !important; | |
| text-shadow: 0 0 12px rgba(0,255,255,0.8) !important; | |
| animation: pillBounce 0.32s ease forwards, pillPulseCyan 2s ease 0.32s infinite !important; | |
| } | |
| @keyframes pillPulseCyan { | |
| 0% { box-shadow: 0 0 0 0 rgba(0,229,255,0.6), 0 0 22px rgba(0,229,255,0.4); } | |
| 50% { box-shadow: 0 0 0 6px rgba(0,229,255,0), 0 0 30px rgba(0,229,255,0.2); } | |
| 100% { box-shadow: 0 0 0 0 rgba(0,229,255,0), 0 0 22px rgba(0,229,255,0.4); } | |
| } | |
| /* Mode β bright gradient, bigger pills */ | |
| #mode-radio .wrap { | |
| background: rgba(10,5,25,0.75) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 9999px !important; | |
| padding: 5px !important; | |
| backdrop-filter: blur(14px) !important; | |
| display: inline-flex !important; | |
| width: fit-content !important; | |
| margin: 0 auto !important; | |
| } | |
| #mode-radio label { | |
| padding: 11px 28px !important; | |
| font-size: 14px !important; | |
| font-family: 'Orbitron', sans-serif !important; | |
| letter-spacing: 0.07em !important; | |
| } | |
| #mode-radio input:checked + label, | |
| #mode-radio label:has(input:checked), | |
| #mode-radio label.selected { | |
| background: linear-gradient(135deg, #5c18b8, #9a40e0 45%, #00bcd4) !important; | |
| border-color: rgba(255,255,255,0.3) !important; | |
| border-width: 1.5px !important; | |
| text-shadow: 0 0 18px rgba(255,255,255,0.7) !important; | |
| box-shadow: 0 0 0 3px rgba(138,43,226,0.25), 0 0 28px rgba(138,43,226,0.5) !important; | |
| animation: pillBounce 0.32s ease forwards, pillPulseMode 2.5s ease 0.32s infinite !important; | |
| } | |
| @keyframes pillPulseMode { | |
| 0% { box-shadow: 0 0 0 0 rgba(138,43,226,0.55), 0 0 28px rgba(138,43,226,0.5); } | |
| 50% { box-shadow: 0 0 0 7px rgba(138,43,226,0), 0 0 40px rgba(138,43,226,0.25); } | |
| 100% { box-shadow: 0 0 0 0 rgba(138,43,226,0), 0 0 28px rgba(138,43,226,0.5); } | |
| } | |
| /* Prompt preset β teal */ | |
| #prompt-preset-radio input:checked + label, | |
| #prompt-preset-radio label:has(input:checked), | |
| #prompt-preset-radio label.selected { | |
| background: linear-gradient(135deg, rgba(0,140,130,0.6), rgba(0,188,212,0.4)) !important; | |
| border-color: #00e5c8 !important; | |
| text-shadow: 0 0 10px rgba(0,255,210,0.8) !important; | |
| animation: pillBounce 0.32s ease forwards, pillPulseTeal 2s ease 0.32s infinite !important; | |
| } | |
| @keyframes pillPulseTeal { | |
| 0% { box-shadow: 0 0 0 0 rgba(0,229,200,0.55), 0 0 20px rgba(0,200,180,0.4); } | |
| 50% { box-shadow: 0 0 0 5px rgba(0,229,200,0), 0 0 28px rgba(0,200,180,0.2); } | |
| 100% { box-shadow: 0 0 0 0 rgba(0,229,200,0), 0 0 20px rgba(0,200,180,0.4); } | |
| } | |
| /* Style modifier β pink */ | |
| #style-radio input:checked + label, | |
| #style-radio label:has(input:checked), | |
| #style-radio label.selected { | |
| background: linear-gradient(135deg, rgba(180,30,140,0.55), rgba(138,43,226,0.45)) !important; | |
| border-color: #ff6ec7 !important; | |
| text-shadow: 0 0 10px rgba(255,150,220,0.8) !important; | |
| animation: pillBounce 0.32s ease forwards, pillPulsePink 2s ease 0.32s infinite !important; | |
| } | |
| @keyframes pillPulsePink { | |
| 0% { box-shadow: 0 0 0 0 rgba(255,110,199,0.55), 0 0 20px rgba(255,110,199,0.35); } | |
| 50% { box-shadow: 0 0 0 5px rgba(255,110,199,0), 0 0 28px rgba(255,110,199,0.15); } | |
| 100% { box-shadow: 0 0 0 0 rgba(255,110,199,0), 0 0 20px rgba(255,110,199,0.35); } | |
| } | |
| /* Motion preset β amber/gold */ | |
| #preset-radio input:checked + label, | |
| #preset-radio label:has(input:checked), | |
| #preset-radio label.selected { | |
| background: linear-gradient(135deg, rgba(180,90,0,0.55), rgba(220,160,0,0.4)) !important; | |
| border-color: #ffab40 !important; | |
| text-shadow: 0 0 10px rgba(255,200,100,0.8) !important; | |
| animation: pillBounce 0.32s ease forwards, pillPulseAmber 2s ease 0.32s infinite !important; | |
| } | |
| @keyframes pillPulseAmber { | |
| 0% { box-shadow: 0 0 0 0 rgba(255,171,64,0.55), 0 0 20px rgba(255,171,64,0.35); } | |
| 50% { box-shadow: 0 0 0 5px rgba(255,171,64,0), 0 0 28px rgba(255,171,64,0.15); } | |
| 100% { box-shadow: 0 0 0 0 rgba(255,171,64,0), 0 0 20px rgba(255,171,64,0.35); } | |
| } | |
| /* Preprocess β green */ | |
| #preprocess-radio input:checked + label, | |
| #preprocess-radio label:has(input:checked), | |
| #preprocess-radio label.selected { | |
| background: linear-gradient(135deg, rgba(20,140,70,0.55), rgba(0,200,120,0.35)) !important; | |
| border-color: #00e676 !important; | |
| text-shadow: 0 0 10px rgba(100,255,180,0.8) !important; | |
| animation: pillBounce 0.32s ease forwards, pillPulseGreen 2s ease 0.32s infinite !important; | |
| } | |
| @keyframes pillPulseGreen { | |
| 0% { box-shadow: 0 0 0 0 rgba(0,230,118,0.55), 0 0 20px rgba(0,230,118,0.35); } | |
| 50% { box-shadow: 0 0 0 5px rgba(0,230,118,0), 0 0 28px rgba(0,230,118,0.15); } | |
| 100% { box-shadow: 0 0 0 0 rgba(0,230,118,0), 0 0 20px rgba(0,230,118,0.35); } | |
| } | |
| /* Center mode selector */ | |
| #mode-selector-wrap, | |
| #mode-selector-wrap > div { | |
| display: flex !important; | |
| justify-content: center !important; | |
| } | |
| #mode-selector-wrap .wrap { justify-content: center !important; } | |
| /* Hide default radio headings for these selector groups */ | |
| #mode-radio > .label-wrap, | |
| #duration-radio > .label-wrap, | |
| #preprocess-radio > .label-wrap { | |
| display: none !important; | |
| } | |
| /* βββ Duration Pills βββ */ | |
| #main-layout { | |
| display: grid; | |
| grid-template-columns: 1fr 1fr; | |
| gap: 20px; | |
| align-items: start; | |
| } | |
| @media (max-width: 900px) { | |
| #main-layout { grid-template-columns: 1fr; } | |
| } | |
| /* βββ Cards βββ */ | |
| .ltx-card { | |
| background: linear-gradient(135deg, rgba(13,8,32,0.95) 0%, rgba(18,13,40,0.9) 100%); | |
| border: 1px solid var(--border); | |
| border-radius: var(--radius-lg); | |
| padding: 20px; | |
| backdrop-filter: blur(20px); | |
| box-shadow: 0 8px 32px rgba(0,0,0,0.4), inset 0 1px 0 rgba(180,100,255,0.1); | |
| position: relative; | |
| overflow: hidden; | |
| transition: border-color 0.3s ease, box-shadow 0.3s ease; | |
| } | |
| .ltx-card:hover { | |
| border-color: rgba(180,100,255,0.4); | |
| box-shadow: 0 8px 40px rgba(0,0,0,0.5), 0 0 20px rgba(138,43,226,0.1); | |
| } | |
| .ltx-card::before { | |
| content: ''; | |
| position: absolute; | |
| top: 0; left: 0; right: 0; | |
| height: 1px; | |
| background: linear-gradient(90deg, transparent, rgba(180,100,255,0.5), transparent); | |
| } | |
| .ltx-card-label { | |
| font-family: var(--font-display); | |
| font-size: 11px; | |
| font-weight: 700; | |
| letter-spacing: 0.15em; | |
| text-transform: uppercase; | |
| color: var(--purple-2); | |
| margin-bottom: 14px; | |
| display: flex; | |
| align-items: center; | |
| gap: 8px; | |
| } | |
| .ltx-card-label::after { | |
| content: ''; | |
| flex: 1; | |
| height: 1px; | |
| background: linear-gradient(90deg, var(--border), transparent); | |
| } | |
| /* βββ Image Upload βββ */ | |
| .media-upload-row { | |
| display: grid; | |
| grid-template-columns: 1fr 1fr; | |
| gap: 12px; | |
| margin-bottom: 16px; | |
| } | |
| .media-upload-row.single { grid-template-columns: 1fr; } | |
| .media-upload-row.triple { grid-template-columns: 1fr 1fr 1fr; } | |
| /* Override Gradio image/video upload */ | |
| .ltx-card .image-container, | |
| .ltx-card .video-container, | |
| .ltx-card .wrap { | |
| border-radius: 12px !important; | |
| border: 1px solid var(--border) !important; | |
| background: rgba(8,4,20,0.6) !important; | |
| overflow: hidden !important; | |
| } | |
| .ltx-card .image-container:hover, | |
| .ltx-card .wrap:hover { | |
| border-color: var(--border-bright) !important; | |
| } | |
| /* βββ Prompt Box βββ */ | |
| .ltx-prompt-wrap { position: relative; } | |
| .ltx-prompt-area { | |
| width: 100%; | |
| min-height: 120px; | |
| max-height: 220px; | |
| background: rgba(8,4,20,0.8) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 12px !important; | |
| color: var(--text-primary) !important; | |
| font-family: var(--font-body) !important; | |
| font-size: 15px !important; | |
| font-weight: 400 !important; | |
| padding: 14px 16px !important; | |
| resize: none !important; | |
| outline: none !important; | |
| transition: border-color 0.25s ease, box-shadow 0.25s ease !important; | |
| line-height: 1.5 !important; | |
| } | |
| .ltx-prompt-area:focus { | |
| border-color: var(--purple-2) !important; | |
| box-shadow: 0 0 0 3px rgba(138,43,226,0.18) !important; | |
| } | |
| /* Override Gradio textbox */ | |
| .ltx-card textarea { | |
| background: rgba(8,4,20,0.8) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 12px !important; | |
| color: var(--text-primary) !important; | |
| font-family: var(--font-body) !important; | |
| font-size: 15px !important; | |
| transition: border-color 0.25s ease, box-shadow 0.25s ease !important; | |
| } | |
| .ltx-card textarea:focus { | |
| border-color: var(--purple-2) !important; | |
| box-shadow: 0 0 0 3px rgba(138,43,226,0.18) !important; | |
| } | |
| /* βββ Output Video βββ */ | |
| .ltx-btn { | |
| display: inline-flex; | |
| align-items: center; | |
| justify-content: center; | |
| gap: 8px; | |
| padding: 12px 24px; | |
| border-radius: var(--radius-pill); | |
| font-family: var(--font-body); | |
| font-size: 15px; | |
| font-weight: 700; | |
| letter-spacing: 0.04em; | |
| cursor: pointer; | |
| border: none; | |
| outline: none; | |
| transition: all 0.22s ease; | |
| position: relative; | |
| overflow: hidden; | |
| user-select: none; | |
| } | |
| .ltx-btn::after { | |
| content: ''; | |
| position: absolute; | |
| inset: 0; | |
| background: rgba(255,255,255,0); | |
| transition: background 0.18s ease; | |
| border-radius: inherit; | |
| } | |
| .ltx-btn:hover::after { background: rgba(255,255,255,0.07); } | |
| .ltx-btn:active { transform: scale(0.97); } | |
| .ltx-btn:active::after { background: rgba(255,255,255,0.12); } | |
| /* Primary Generate */ | |
| .ltx-btn-generate { | |
| width: 100%; | |
| padding: 16px 32px; | |
| font-size: 17px; | |
| font-family: var(--font-display) !important; | |
| font-weight: 700 !important; | |
| letter-spacing: 0.1em; | |
| background: linear-gradient(135deg, #6b20c8 0%, #9a40e0 40%, #00bcd4 100%); | |
| color: #fff !important; | |
| box-shadow: 0 4px 24px rgba(138,43,226,0.45), 0 0 0 1px rgba(180,100,255,0.3); | |
| } | |
| .ltx-btn-generate:hover { | |
| box-shadow: 0 6px 32px rgba(138,43,226,0.6), 0 0 40px rgba(0,229,255,0.2), 0 0 0 1px rgba(180,100,255,0.5) !important; | |
| transform: translateY(-2px); | |
| } | |
| .ltx-btn-generate:active { transform: scale(0.98) translateY(0); } | |
| /* Cancel */ | |
| .ltx-btn-cancel { | |
| width: 100%; | |
| padding: 12px 24px; | |
| background: rgba(255,60,100,0.12); | |
| border: 1px solid rgba(255,60,100,0.35) !important; | |
| color: rgba(255,130,150,0.9) !important; | |
| font-family: var(--font-body) !important; | |
| font-size: 14px !important; | |
| font-weight: 700 !important; | |
| letter-spacing: 0.06em; | |
| } | |
| .ltx-btn-cancel:hover { | |
| background: rgba(255,60,100,0.22) !important; | |
| border-color: rgba(255,60,100,0.6) !important; | |
| color: #ffb0bc !important; | |
| box-shadow: 0 0 20px rgba(255,60,100,0.2) !important; | |
| transform: translateY(-1px); | |
| } | |
| .ltx-btn-cancel:active { transform: scale(0.98); } | |
| /* Override Gradio button defaults */ | |
| button.primary { | |
| background: linear-gradient(135deg, #6b20c8 0%, #9a40e0 40%, #00bcd4 100%) !important; | |
| border: none !important; | |
| font-family: var(--font-display) !important; | |
| letter-spacing: 0.08em !important; | |
| box-shadow: 0 4px 24px rgba(138,43,226,0.45) !important; | |
| transition: all 0.22s ease !important; | |
| } | |
| button.primary:hover { | |
| box-shadow: 0 6px 32px rgba(138,43,226,0.65), 0 0 30px rgba(0,229,255,0.15) !important; | |
| transform: translateY(-2px) !important; | |
| } | |
| button.primary:active { transform: scale(0.98) !important; } | |
| button.secondary { | |
| background: rgba(138,43,226,0.1) !important; | |
| border: 1px solid var(--border) !important; | |
| color: var(--text-primary) !important; | |
| transition: all 0.22s ease !important; | |
| } | |
| button.secondary:hover { | |
| background: rgba(138,43,226,0.22) !important; | |
| border-color: var(--border-bright) !important; | |
| box-shadow: 0 0 16px rgba(138,43,226,0.2) !important; | |
| } | |
| /* βββ Sliders, Dropdowns, Checkboxes βββ */ | |
| input[type="range"] { | |
| accent-color: var(--purple-2) !important; | |
| } | |
| .gradio-slider input[type="range"]::-webkit-slider-thumb { | |
| background: var(--purple-2) !important; | |
| box-shadow: 0 0 10px var(--purple-glow) !important; | |
| } | |
| select, .gr-dropdown select { | |
| background: rgba(8,4,20,0.85) !important; | |
| border: 1px solid var(--border) !important; | |
| color: var(--text-primary) !important; | |
| border-radius: 8px !important; | |
| font-family: var(--font-body) !important; | |
| } | |
| select:focus { | |
| border-color: var(--purple-2) !important; | |
| box-shadow: 0 0 0 3px rgba(138,43,226,0.18) !important; | |
| outline: none !important; | |
| } | |
| input[type="checkbox"] { | |
| accent-color: var(--purple-2) !important; | |
| } | |
| /* βββ Labels βββ */ | |
| label, .label-wrap span, .gr-form label { | |
| color: var(--text-muted) !important; | |
| font-family: var(--font-body) !important; | |
| font-size: 13px !important; | |
| font-weight: 600 !important; | |
| letter-spacing: 0.04em !important; | |
| text-transform: uppercase !important; | |
| } | |
| /* βββ Main Layout βββ */ | |
| .output-wrap { | |
| background: rgba(8,4,20,0.9); | |
| border: 1px solid var(--border); | |
| border-radius: var(--radius-lg); | |
| overflow: hidden; | |
| position: relative; | |
| min-height: 320px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| } | |
| .output-wrap video { | |
| width: 100%; | |
| border-radius: var(--radius-lg); | |
| } | |
| .output-placeholder { | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| justify-content: center; | |
| gap: 12px; | |
| color: var(--text-muted); | |
| font-family: var(--font-body); | |
| font-size: 14px; | |
| padding: 40px; | |
| text-align: center; | |
| } | |
| .output-placeholder .icon { | |
| font-size: 48px; | |
| opacity: 0.5; | |
| animation: pulse 2.5s ease-in-out infinite; | |
| } | |
| @keyframes pulse { | |
| 0%, 100% { opacity: 0.4; transform: scale(1); } | |
| 50% { opacity: 0.7; transform: scale(1.05); } | |
| } | |
| /* βββ Accordion / Advanced βββ */ | |
| .gr-accordion { | |
| background: rgba(8,4,20,0.5) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 12px !important; | |
| overflow: hidden !important; | |
| } | |
| .gr-accordion > .label-wrap { | |
| background: rgba(138,43,226,0.08) !important; | |
| padding: 12px 16px !important; | |
| cursor: pointer !important; | |
| transition: background 0.2s ease !important; | |
| } | |
| .gr-accordion > .label-wrap:hover { | |
| background: rgba(138,43,226,0.15) !important; | |
| } | |
| /* βββ Number inputs βββ */ | |
| input[type="number"] { | |
| background: rgba(8,4,20,0.8) !important; | |
| border: 1px solid var(--border) !important; | |
| color: var(--text-primary) !important; | |
| border-radius: 8px !important; | |
| font-family: var(--font-body) !important; | |
| } | |
| input[type="number"]:focus { | |
| border-color: var(--purple-2) !important; | |
| box-shadow: 0 0 0 3px rgba(138,43,226,0.18) !important; | |
| outline: none !important; | |
| } | |
| /* βββ Status / Seed display βββ */ | |
| .seed-display { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 8px; | |
| padding: 6px 14px; | |
| border-radius: var(--radius-pill); | |
| border: 1px solid var(--border); | |
| background: rgba(8,4,20,0.6); | |
| color: var(--text-muted); | |
| font-family: var(--font-body); | |
| font-size: 12px; | |
| font-weight: 600; | |
| letter-spacing: 0.06em; | |
| } | |
| /* βββ Custom textarea override βββ */ | |
| .gr-textbox textarea { | |
| background: rgba(8,4,20,0.8) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 12px !important; | |
| color: var(--text-primary) !important; | |
| font-family: var(--font-body) !important; | |
| font-size: 15px !important; | |
| } | |
| /* βββ Glow divider βββ */ | |
| .glow-divider { | |
| height: 1px; | |
| background: linear-gradient(90deg, transparent, var(--purple-1), var(--cyan-acc), var(--purple-1), transparent); | |
| margin: 20px 0; | |
| opacity: 0.5; | |
| } | |
| /* βββ Badges βββ */ | |
| .badge { | |
| display: inline-flex; | |
| align-items: center; | |
| padding: 3px 10px; | |
| border-radius: var(--radius-pill); | |
| font-family: var(--font-body); | |
| font-size: 11px; | |
| font-weight: 700; | |
| letter-spacing: 0.08em; | |
| text-transform: uppercase; | |
| } | |
| .badge-purple { | |
| background: rgba(138,43,226,0.2); | |
| border: 1px solid rgba(138,43,226,0.4); | |
| color: var(--purple-3); | |
| } | |
| .badge-cyan { | |
| background: rgba(0,229,255,0.1); | |
| border: 1px solid rgba(0,229,255,0.3); | |
| color: var(--cyan-acc); | |
| } | |
| /* βββ Scrollbar βββ */ | |
| ::-webkit-scrollbar { width: 6px; height: 6px; } | |
| ::-webkit-scrollbar-track { background: var(--bg-deep); } | |
| ::-webkit-scrollbar-thumb { background: rgba(138,43,226,0.4); border-radius: 3px; } | |
| ::-webkit-scrollbar-thumb:hover { background: var(--purple-1); } | |
| /* βββ Gradio structural overrides βββ */ | |
| #col-container { max-width: 1300px; margin: 0 auto; } | |
| .gap { gap: 12px !important; } | |
| .contain { background: transparent !important; } | |
| .padded { padding: 0 !important; } | |
| /* Image upload styling */ | |
| .svelte-116rqfv { | |
| background: rgba(8,4,20,0.6) !important; | |
| border: 1px dashed var(--border) !important; | |
| border-radius: 12px !important; | |
| } | |
| /* Progress bar */ | |
| .progress-bar { background: linear-gradient(90deg, var(--purple-1), var(--cyan-acc)) !important; } | |
| /* Generating animation on button */ | |
| @keyframes generating { | |
| 0%, 100% { box-shadow: 0 4px 24px rgba(138,43,226,0.45), 0 0 0 1px rgba(180,100,255,0.3); } | |
| 50% { box-shadow: 0 4px 36px rgba(138,43,226,0.7), 0 0 50px rgba(0,229,255,0.3), 0 0 0 2px rgba(180,100,255,0.5); } | |
| } | |
| /* Custom motion input */ | |
| .custom-motion-wrap { margin-top: 10px; } | |
| .custom-motion-wrap textarea { | |
| background: rgba(8,4,20,0.8) !important; | |
| border: 1px solid rgba(138,43,226,0.3) !important; | |
| border-radius: 10px !important; | |
| color: var(--text-primary) !important; | |
| font-family: var(--font-body) !important; | |
| font-size: 14px !important; | |
| min-height: 60px !important; | |
| } | |
| /* Tooltips / info text */ | |
| .info-text { | |
| color: var(--text-muted); | |
| font-family: var(--font-body); | |
| font-size: 12px; | |
| margin-top: 4px; | |
| } | |
| /* βββ High-res toggle chip βββ */ | |
| #high-res-toggle label { | |
| display: inline-flex !important; | |
| align-items: center !important; | |
| gap: 10px !important; | |
| padding: 10px 18px !important; | |
| border-radius: var(--radius-pill) !important; | |
| border: 1px solid var(--border) !important; | |
| background: rgba(0,229,255,0.05) !important; | |
| color: var(--text-muted) !important; | |
| font-size: 14px !important; | |
| font-weight: 700 !important; | |
| cursor: pointer !important; | |
| transition: all 0.22s ease !important; | |
| text-transform: none !important; | |
| letter-spacing: 0.03em !important; | |
| } | |
| #high-res-toggle input:checked + label, | |
| #high-res-toggle label:has(input:checked) { | |
| border-color: var(--cyan-acc) !important; | |
| background: rgba(0,229,255,0.12) !important; | |
| color: var(--cyan-acc) !important; | |
| box-shadow: 0 0 16px rgba(0,229,255,0.2) !important; | |
| } | |
| #high-res-toggle label:hover { | |
| border-color: var(--border-bright) !important; | |
| color: var(--purple-3) !important; | |
| } | |
| """ | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # UI helper functions | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def update_mode_ui(mode): | |
| is_interp = mode == "Interpolate" | |
| is_motion = mode == "Motion Control" | |
| return ( | |
| gr.update(visible=is_interp), # last_image | |
| gr.update(visible=is_motion), # input_video | |
| gr.update(visible=is_motion), # preprocess section | |
| gr.update(visible=not is_motion), # video_preprocess hidden | |
| ) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Gradio App | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks( | |
| title="LTX Studio β Anime Video Generator", | |
| ) as demo: | |
| # ββ State ββ | |
| ui_mode = gr.State("Image-to-Video") | |
| duration = gr.State(3.0) | |
| sel_preset = gr.State("None") | |
| with gr.Column(elem_id="col-container"): | |
| # ββ Header ββ | |
| gr.HTML(""" | |
| <div id="ltx-header"> | |
| <div class="ltx-nebula"></div> | |
| <div class="ltx-logo-text">β‘ LTX STUDIO</div> | |
| <div class="ltx-tagline">Anime Image Studio Β· Qwen Β· Upscale Gen Β· AI Video Synthesis</div> | |
| <a class="ltx-site-link" href="https://animara.space" target="_blank" rel="noopener noreferrer"> | |
| β¦ Visit Anime Studio Site | |
| </a> | |
| </div> | |
| """) | |
| # ββ Mode Selector ββ | |
| with gr.Row(elem_id="mode-selector-wrap"): | |
| mode_radio = gr.Radio( | |
| choices=["Image-to-Video", "Interpolate", "Motion Control"], | |
| value="Image-to-Video", | |
| label="", | |
| show_label=False, | |
| interactive=True, | |
| elem_id="mode-radio", | |
| ) | |
| # ββ Main Two-Column Layout ββ | |
| with gr.Row(equal_height=False): | |
| # ββββββββββββββββββββββββββββββββββββ | |
| # LEFT PANEL β Inputs | |
| # ββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=1, elem_classes=["ltx-card"]): | |
| gr.HTML('<div class="ltx-card-label">β¦ Media Input</div>') | |
| with gr.Row(): | |
| first_image = gr.Image( | |
| label="First Frame", | |
| type="filepath", | |
| height=220, | |
| elem_id="first_image", | |
| ) | |
| last_image = gr.Image( | |
| label="Last Frame", | |
| type="filepath", | |
| visible=False, | |
| height=220, | |
| elem_id="last_image", | |
| ) | |
| input_video = gr.Video( | |
| label="Reference Video", | |
| visible=False, | |
| height=220, | |
| elem_id="input_video", | |
| ) | |
| # ββ Video Preprocess (Motion Control only β shown at TOP) ββ | |
| with gr.Column(visible=False, elem_id="preprocess-section") as preprocess_section: | |
| gr.HTML('<div class="ltx-card-label">β¦ Reference Video Processing</div>') | |
| video_preprocess = gr.Radio( | |
| choices=["Raw (no preprocessing)", "Pose (DWPose)", "Canny Edge", "Depth (Laplacian)"], | |
| value="Raw (no preprocessing)", | |
| label="", | |
| show_label=False, | |
| interactive=True, | |
| elem_id="preprocess-radio", | |
| ) | |
| gr.HTML('<div class="glow-divider"></div>') | |
| # Hidden preprocess for non-motion modes | |
| video_preprocess_hidden = gr.Textbox( | |
| value="Raw (no preprocessing)", | |
| visible=False, | |
| elem_id="preprocess-hidden", | |
| ) | |
| gr.HTML('<div class="glow-divider"></div>') | |
| # ββ Prompt Presets ββ | |
| gr.HTML('<div class="ltx-card-label">β¦ Prompt Presets</div>') | |
| prompt_preset_radio = gr.Radio( | |
| choices=list(PROMPT_PRESETS.keys()), | |
| value=None, | |
| label="", | |
| interactive=True, | |
| elem_id="prompt-preset-radio", | |
| ) | |
| gr.HTML('<div class="glow-divider"></div>') | |
| gr.HTML('<div class="ltx-card-label">β¦ Style Modifier</div>') | |
| style_radio = gr.Radio( | |
| choices=list(STYLE_MODIFIERS.keys()), | |
| value="None", | |
| label="", | |
| interactive=True, | |
| elem_id="style-radio", | |
| ) | |
| gr.HTML('<div class="glow-divider"></div>') | |
| gr.HTML('<div class="ltx-card-label">β¦ Prompt</div>') | |
| prompt = gr.Textbox( | |
| label="", | |
| value=DEFAULT_PROMPT, | |
| placeholder="Describe motion, scene dynamics, facial expression, camera movement, or style...", | |
| lines=4, | |
| max_lines=8, | |
| elem_id="prompt-box", | |
| ) | |
| gr.HTML('<div class="glow-divider"></div>') | |
| # ββ Motion Presets ββ | |
| gr.HTML('<div class="ltx-card-label">β¦ Motion Presets</div>') | |
| preset_radio = gr.Radio( | |
| choices=list(MOTION_PRESETS.keys()), | |
| value="None", | |
| label="", | |
| interactive=True, | |
| elem_id="preset-radio", | |
| ) | |
| with gr.Column(visible=False, elem_id="custom-motion-wrap") as custom_motion_wrap: | |
| custom_motion = gr.Textbox( | |
| label="Custom Motion Description", | |
| placeholder="e.g. slow spiral camera orbit with lens flare bloom...", | |
| lines=2, | |
| elem_id="custom-motion-input", | |
| ) | |
| gr.HTML('<div class="glow-divider"></div>') | |
| # ββ Duration ββ | |
| gr.HTML('<div class="ltx-card-label">β¦ Clip Duration</div>') | |
| duration_radio = gr.Radio( | |
| choices=["3s", "5s", "10s", "15s"], | |
| value="3s", | |
| label="", | |
| show_label=False, | |
| interactive=True, | |
| elem_id="duration-radio", | |
| ) | |
| gr.HTML('<div class="glow-divider"></div>') | |
| # ββ High Res Toggle ββ | |
| gr.HTML('<div class="ltx-card-label">β¦ Output Quality</div>') | |
| high_res = gr.Checkbox( | |
| label="β¬ High Resolution (2Γ) β slower, uses more VRAM", | |
| value=False, | |
| elem_id="high-res-toggle", | |
| ) | |
| gr.HTML('<div class="glow-divider"></div>') | |
| # ββ Advanced Settings ββ | |
| with gr.Accordion("β Advanced Settings", open=False): | |
| conditioning_strength = gr.Slider( | |
| label="Video Conditioning Strength", | |
| minimum=0.0, maximum=1.0, value=0.85, step=0.05, | |
| ) | |
| enhance_prompt = gr.Checkbox(label="Enhance Prompt (auto-rewrite)", value=False) | |
| use_video_audio = gr.Checkbox( | |
| label="Extract Audio from Reference Video", | |
| value=True, | |
| ) | |
| with gr.Row(): | |
| seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, value=42, step=1) | |
| randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) | |
| with gr.Row(): | |
| width = gr.Number(label="Width", value=768, precision=0) | |
| height = gr.Number(label="Height", value=512, precision=0) | |
| # ββββββββββββββββββββββββββββββββββββ | |
| # RIGHT PANEL β Output | |
| # ββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=1, elem_classes=["ltx-card"]): | |
| gr.HTML('<div class="ltx-card-label">β¦ Generated Video</div>') | |
| output_video = gr.Video( | |
| label="", | |
| autoplay=True, | |
| height=400, | |
| elem_id="output-video", | |
| ) | |
| gr.HTML('<div class="glow-divider"></div>') | |
| generate_btn = gr.Button( | |
| "β‘ Generate Video", | |
| variant="primary", | |
| size="lg", | |
| elem_id="generate-btn", | |
| ) | |
| cancel_btn = gr.Button( | |
| "β Cancel Generation", | |
| variant="secondary", | |
| size="sm", | |
| elem_id="cancel-btn", | |
| elem_classes=["ltx-btn-cancel"], | |
| ) | |
| seed_display = gr.Number( | |
| label="Used Seed", | |
| value=42, | |
| interactive=False, | |
| elem_id="seed-display", | |
| ) | |
| gr.HTML('<div class="glow-divider"></div>') | |
| # Info badges | |
| gr.HTML(""" | |
| <div style="display:flex; gap:8px; flex-wrap:wrap; margin-top:4px;"> | |
| <span class="badge badge-purple">LTX 2.3 22B</span> | |
| <span class="badge badge-cyan">Distilled</span> | |
| <span class="badge badge-purple">Audio-Video</span> | |
| <span class="badge badge-cyan">IC-LoRA</span> | |
| </div> | |
| """) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Event Wiring | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Mode changes | |
| mode_radio.change( | |
| fn=update_mode_visibility, | |
| inputs=[mode_radio], | |
| outputs=[last_image, input_video, preprocess_section, video_preprocess_hidden], | |
| api_name=False, | |
| ).then( | |
| fn=lambda x: x, | |
| inputs=[mode_radio], | |
| outputs=[ui_mode], | |
| api_name=False, | |
| ) | |
| # Motion preset β writes directly to prompt + shows/hides custom input | |
| preset_radio.change( | |
| fn=on_motion_preset_to_prompt, | |
| inputs=[preset_radio, custom_motion], | |
| outputs=[custom_motion_wrap, prompt], | |
| api_name=False, | |
| ) | |
| # Custom motion text β live update prompt when typing | |
| custom_motion.change( | |
| fn=on_custom_motion_change, | |
| inputs=[custom_motion, preset_radio], | |
| outputs=[prompt], | |
| api_name=False, | |
| ) | |
| # Prompt preset β fills prompt box | |
| def on_prompt_preset_select(preset_name): | |
| text = PROMPT_PRESETS.get(preset_name, "") | |
| return gr.update(value=text) if text else gr.update() | |
| prompt_preset_radio.change( | |
| fn=on_prompt_preset_select, | |
| inputs=[prompt_preset_radio], | |
| outputs=[prompt], | |
| api_name=False, | |
| ) | |
| # Style modifier β appends to current prompt | |
| style_radio.change( | |
| fn=apply_style_modifier, | |
| inputs=[style_radio, prompt], | |
| outputs=[prompt], | |
| api_name=False, | |
| ) | |
| # Duration | |
| duration_radio.change( | |
| fn=apply_duration_choice, | |
| inputs=[duration_radio], | |
| outputs=[duration], | |
| api_name=False, | |
| ) | |
| # Image / video change β auto resolution | |
| first_image.change( | |
| fn=update_resolution, | |
| inputs=[first_image, last_image, input_video, high_res], | |
| outputs=[width, height], | |
| api_name=False, | |
| ) | |
| last_image.change( | |
| fn=update_resolution, | |
| inputs=[first_image, last_image, input_video, high_res], | |
| outputs=[width, height], | |
| api_name=False, | |
| ) | |
| input_video.change( | |
| fn=on_video_upload, | |
| inputs=[input_video, first_image, last_image, high_res], | |
| outputs=[width, height, duration_radio, duration], | |
| api_name=False, | |
| ) | |
| high_res.change( | |
| fn=update_resolution, | |
| inputs=[first_image, last_image, input_video, high_res], | |
| outputs=[width, height], | |
| api_name=False, | |
| ) | |
| # Generate | |
| gen_event = generate_btn.click( | |
| fn=generate_video, | |
| inputs=[ | |
| prompt, first_image, last_image, input_video, | |
| gr.State(None), # input_audio = None (removed from UI) | |
| ui_mode, duration, video_preprocess, | |
| conditioning_strength, enhance_prompt, use_video_audio, | |
| seed, randomize_seed, height, width, | |
| preset_radio, custom_motion, | |
| ], | |
| outputs=[output_video, seed_display], | |
| ) | |
| # Cancel | |
| cancel_btn.click( | |
| fn=None, | |
| inputs=[], | |
| outputs=[], | |
| cancels=[gen_event], | |
| api_name=False, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| ssr_mode=False, | |
| mcp_server=True, | |
| css=css, | |
| theme=gr.themes.Base( | |
| primary_hue=gr.themes.colors.purple, | |
| secondary_hue=gr.themes.colors.blue, | |
| neutral_hue=gr.themes.colors.gray, | |
| font=gr.themes.GoogleFont("Rajdhani"), | |
| ), | |
| ) |