import os
import subprocess
import sys
import logging
import random
import tempfile
import uuid
import shutil
from pathlib import Path

# Disable torch.compile / dynamo before any torch import
os.environ["TORCH_COMPILE_DISABLE"] = "1"
os.environ["TORCHDYNAMO_DISABLE"] = "1"

# Runtime dependencies
subprocess.run([sys.executable, "-m", "pip", "install", "xformers==0.0.32.post2", "--no-build-isolation"], check=False)
subprocess.run([
    sys.executable, "-m", "pip", "install",
    "dwpose", "onnxruntime-gpu", "imageio[ffmpeg]", "scikit-image",
    "opencv-python-headless", "decord", "num2words"
], check=False)

subprocess.run([sys.executable, "-m", "pip", "install", "num2words"], check=False)

_tv = subprocess.run([sys.executable, "-c", "import torch; print(torch.__version__)"], capture_output=True, text=True)
if _tv.returncode == 0:
    _full_ver = _tv.stdout.strip()
    _cuda_suffix = _full_ver.split("+")[-1] if "+" in _full_ver else "cu124"
    _base_ver = _full_ver.split("+")[0]
    print(f"Detected torch {_full_ver}, reinstalling matching torchaudio...")
    subprocess.run([
        sys.executable, "-m", "pip", "install", "--force-reinstall", "--no-deps",
        f"torchaudio=={_base_ver}",
        "--index-url", f"https://download.pytorch.org/whl/{_cuda_suffix}",
    ], check=False)

LTX_REPO_URL = "https://github.com/Lightricks/LTX-2.git"
LTX_REPO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "LTX-2")
LTX_COMMIT = "ae855f8538843825f9015a419cf4ba5edaf5eec2"

if os.path.exists(LTX_REPO_DIR):
    print(f"Removing existing repo at {LTX_REPO_DIR}...")
    shutil.rmtree(LTX_REPO_DIR, ignore_errors=True)

print(f"Cloning {LTX_REPO_URL}...")
subprocess.run(["git", "clone", LTX_REPO_URL, LTX_REPO_DIR], check=True)
print(f"Checking out commit {LTX_COMMIT}...")
subprocess.run(["git", "-C", LTX_REPO_DIR, "checkout", LTX_COMMIT], check=True)
print("Installing ltx-core and ltx-pipelines from pinned repo commit...")
subprocess.run(
    [
        sys.executable, "-m", "pip", "install",
        "--force-reinstall", "--no-deps",
        "-e", os.path.join(LTX_REPO_DIR, "packages", "ltx-core"),
        "-e", os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines"),
    ],
    check=True,
)

sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines", "src"))
sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-core", "src"))

import cv2
import imageio
import numpy as np
import spaces
import gradio as gr
import torch
from PIL import Image
from huggingface_hub import hf_hub_download, snapshot_download
from safetensors import safe_open

torch._dynamo.config.suppress_errors = True
torch._dynamo.config.disable = True

from ltx_core.components.diffusion_steps import EulerDiffusionStep
from ltx_core.components.noisers import GaussianNoiser
from ltx_core.conditioning import (
    ConditioningItem,
    ConditioningItemAttentionStrengthWrapper,
    VideoConditionByReferenceLatent,
)
from ltx_core.loader import LTXV_LORA_COMFY_RENAMING_MAP, LoraPathStrengthAndSDOps
from ltx_core.model.audio_vae import decode_audio as vae_decode_audio
from ltx_core.model.audio_vae import encode_audio as vae_encode_audio
from ltx_core.model.upsampler import upsample_video
from ltx_core.model.video_vae import TilingConfig, VideoEncoder, get_video_chunks_number
from ltx_core.model.video_vae import decode_video as vae_decode_video
from ltx_core.quantization import QuantizationPolicy
from ltx_core.types import Audio, AudioLatentShape, VideoPixelShape
from ltx_pipelines.utils import ModelLedger, euler_denoising_loop
from ltx_pipelines.utils.args import ImageConditioningInput
from ltx_pipelines.utils.constants import DISTILLED_SIGMA_VALUES, STAGE_2_DISTILLED_SIGMA_VALUES
from ltx_pipelines.utils.helpers import (
    assert_resolution,
    cleanup_memory,
    combined_image_conditionings,
    denoise_audio_video,
    denoise_video_only,
    encode_prompts,
    generate_enhanced_prompt,
    get_device,
    simple_denoising_func,
)
from ltx_pipelines.utils.media_io import decode_audio_from_file, encode_video, load_video_conditioning
from ltx_pipelines.utils.types import PipelineComponents

from ltx_core.model.transformer import attention as _attn_mod
print(f"[ATTN] Before patch: memory_efficient_attention={_attn_mod.memory_efficient_attention}")
try:
    from xformers.ops import memory_efficient_attention as _mea
    _attn_mod.memory_efficient_attention = _mea
    print(f"[ATTN] After patch: memory_efficient_attention={_attn_mod.memory_efficient_attention}")
except Exception as e:
    print(f"[ATTN] xformers patch FAILED: {type(e).__name__}: {e}")

logging.getLogger().setLevel(logging.INFO)

MAX_SEED = np.iinfo(np.int32).max
DEFAULT_FRAME_RATE = 24.0
DEFAULT_PROMPT = "Make this image come alive with cinematic motion, smooth animation"

RESOLUTIONS = {
    "high": {"16:9": (1536, 1024), "9:16": (1024, 1536), "1:1": (1024, 1024)},
    "low": {"16:9": (768, 512), "9:16": (512, 768), "1:1": (768, 768)},
}

IC_LORA_OPTIONS = {
    "Union Control (Depth + Edge)": {
        "repo": "Lightricks/LTX-2.3-22b-IC-LoRA-Union-Control",
        "filename": "ltx-2.3-22b-ic-lora-union-control-ref0.5.safetensors",
    },
    "Motion Track Control": {
        "repo": "Lightricks/LTX-2.3-22b-IC-LoRA-Motion-Track-Control",
        "filename": "ltx-2.3-22b-ic-lora-motion-track-control-ref0.5.safetensors",
    },
}
DEFAULT_IC_LORA = "Union Control (Depth + Edge)"

# Motion presets - appended to user prompt
MOTION_PRESETS = {
    "None": "",
    "🌸 Anime Float": "anime style, character floating gently in air with soft flowing hair and glowing particles, dreamy atmospheric motion",
    "⚡ Dynamic Action": "high energy action sequence, fast dynamic movement, speed lines, dramatic anime-style motion blur",
    "🌊 Fluid Wave": "smooth fluid motion like waves, gentle swaying movement, flowing fabric and hair, tranquil atmospheric",
    "🔥 Dramatic Zoom": "slow dramatic zoom in, cinematic depth of field, film grain, epic atmospheric lighting reveal",
    "✨ Magic Sparkle": "magical sparkle effects, glowing particles swirling, enchanted fantasy atmosphere, soft luminous motion",
    "🎭 Emotional Pan": "slow cinematic pan across scene, emotional depth, soft bokeh background, contemplative mood",
    "💫 Orbit Rotate": "smooth orbital camera movement around subject, 360-degree reveal, depth parallax effect",
    "🌙 Moonlit Drift": "peaceful moonlit scene with gentle drifting motion, soft silver light, calm ethereal atmosphere",
    "⚔️ Battle Scene": "intense battle sequence with rapid movement, dramatic poses, energy blasts and impact effects",
    "🌺 Nature Bloom": "flowers blooming in fast motion, nature coming alive, organic growth animation, vibrant colors",
    "🎆 Explosion Burst": "dramatic explosion with shockwave, debris flying, cinematic slow-motion impact, epic scale",
    "Custom": "__custom__",
}

PROMPT_PRESETS = {
    "🚀 Sci-Fi":    "An astronaut hatches from a fragile egg on the surface of the Moon, the shell cracking and peeling apart in gentle low-gravity motion. Fine lunar dust lifts and drifts outward with each movement.",
    "🌊 Ocean":     "Crystal clear ocean waves crash onto a pristine white sand beach at golden hour. Foam swirls in slow motion around smooth pebbles as the tide retreats.",
    "🌆 Urban":     "A rain-soaked neon-lit street in a futuristic city at night. Reflections of glowing signs shimmer on the wet pavement as a lone figure walks through pools of violet and cyan light.",
    "🌀 Abstract":  "Swirling ink clouds dissolve in slow motion through crystal clear water, forming intricate fractal patterns of deep indigo, violet, and gold.",
    "🌿 Fantasy":   "A glowing forest at twilight where bioluminescent plants pulse softly with ethereal blue and green light. Tiny glowing particles drift through misty air like fireflies.",
    "🎬 Animate":   "Make this image come alive with smooth, natural cinematic motion. Animate with subtle camera movement, realistic physics, and atmospheric depth.",
}

STYLE_MODIFIERS = {
    "None":               "",
    "🎞 Cinematic 35mm":   ", shot on 35mm film, cinematic bokeh, anamorphic lens flare, filmic grain",
    "📸 8K Ultra Real":    ", photorealistic, 8K resolution, ultra-detailed, sharp focus",
    "🌸 Anime / Ghibli":   ", anime style, Studio Ghibli aesthetic, hand-drawn, warm soft colors",
    "🌑 Dark & Moody":     ", dark atmospheric lighting, high contrast shadows, noir mood",
    "💭 Dreamy / Surreal": ", dreamy surreal atmosphere, soft glow, painterly, ethereal light",
    "📼 Vintage VHS":      ", VHS aesthetic, scanlines, retro 80s color grading, film grain, lo-fi",
}

_pose_processor = None
_depth_processor = None


def log_memory(tag: str):
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1024**3
        peak = torch.cuda.max_memory_allocated() / 1024**3
        free, total = torch.cuda.mem_get_info()
        print(
            f"[VRAM {tag}] allocated={allocated:.2f}GB peak={peak:.2f}GB "
            f"free={free / 1024**3:.2f}GB total={total / 1024**3:.2f}GB"
        )


def _get_pose_processor():
    global _pose_processor
    if _pose_processor is None:
        from dwpose import DwposeDetector
        _pose_processor = DwposeDetector.from_pretrained_default()
        print("[Preprocess] DWPose processor loaded")
    return _pose_processor


def load_video_frames(video_path: str) -> list[np.ndarray]:
    frames = []
    with imageio.get_reader(video_path) as reader:
        for frame in reader:
            frames.append(frame)
    return frames


def write_video_mp4(frames_float_01: list[np.ndarray], fps: float, out_path: str) -> str:
    frames_uint8 = [(np.clip(f, 0.0, 1.0) * 255).astype(np.uint8) for f in frames_float_01]
    with imageio.get_writer(out_path, fps=fps, macro_block_size=1) as writer:
        for fr in frames_uint8:
            writer.append_data(fr)
    return out_path


def preprocess_video_pose(frames: list[np.ndarray], width: int, height: int) -> list[np.ndarray]:
    processor = _get_pose_processor()
    result = []
    for frame in frames:
        pil = Image.fromarray(frame.astype(np.uint8)).convert("RGB")
        pose_img = processor(pil, include_body=True, include_hand=True, include_face=True)
        if not isinstance(pose_img, Image.Image):
            pose_img = Image.fromarray(np.array(pose_img).astype(np.uint8))
        pose_img = pose_img.convert("RGB").resize((width, height), Image.BILINEAR)
        result.append(np.array(pose_img).astype(np.float32) / 255.0)
    return result


def preprocess_video_canny(frames: list[np.ndarray], width: int, height: int, low_threshold: int = 50, high_threshold: int = 100) -> list[np.ndarray]:
    result = []
    for frame in frames:
        resized = cv2.resize(frame, (width, height), interpolation=cv2.INTER_AREA)
        gray = cv2.cvtColor(resized, cv2.COLOR_RGB2GRAY)
        edges = cv2.Canny(gray, low_threshold, high_threshold)
        edges_3ch = np.stack([edges, edges, edges], axis=-1)
        result.append(edges_3ch.astype(np.float32) / 255.0)
    return result


def preprocess_video_depth(frames: list[np.ndarray], width: int, height: int) -> list[np.ndarray]:
    result = []
    for frame in frames:
        resized = cv2.resize(frame, (width, height), interpolation=cv2.INTER_AREA)
        gray = cv2.cvtColor(resized, cv2.COLOR_RGB2GRAY).astype(np.float32)
        lap = np.abs(cv2.Laplacian(gray, cv2.CV_32F, ksize=5))
        lap = lap / (lap.max() + 1e-8)
        depth_3ch = np.stack([lap, lap, lap], axis=-1)
        result.append(depth_3ch)
    return result


def preprocess_conditioning_video(video_path: str, mode: str, width: int, height: int, num_frames: int, fps: float) -> tuple[str, str]:
    frames = load_video_frames(video_path)
    if not frames:
        raise ValueError("No frames decoded from video")
    frames = frames[:num_frames]
    first_png = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
    Image.fromarray(frames[0]).save(first_png)

    if mode == "Pose (DWPose)":
        processed = preprocess_video_pose(frames, width, height)
    elif mode == "Canny Edge":
        processed = preprocess_video_canny(frames, width, height)
    elif mode == "Depth (Laplacian)":
        processed = preprocess_video_depth(frames, width, height)
    else:
        processed = [f.astype(np.float32) / 255.0 for f in frames]

    cond_mp4 = tempfile.mktemp(suffix=".mp4")
    write_video_mp4(processed, fps=fps, out_path=cond_mp4)
    return cond_mp4, first_png


def _read_lora_reference_downscale_factor(lora_path: str) -> int:
    try:
        with safe_open(lora_path, framework="pt") as f:
            metadata = f.metadata() or {}
            return int(metadata.get("reference_downscale_factor", 1))
    except Exception as e:
        logging.warning(f"Failed to read metadata from LoRA file '{lora_path}': {e}")
        return 1


class LTX23UnifiedPipeline:
    def __init__(
        self,
        distilled_checkpoint_path: str,
        spatial_upsampler_path: str,
        gemma_root: str,
        ic_loras: list[LoraPathStrengthAndSDOps] | None = None,
        device: torch.device | None = None,
        quantization: QuantizationPolicy | None = None,
        reference_downscale_factor: int | None = None,
    ):
        self.device = device or get_device()
        self.dtype = torch.bfloat16
        ic_loras = ic_loras or []
        self.has_ic_lora = len(ic_loras) > 0

        self.stage_1_model_ledger = ModelLedger(
            dtype=self.dtype,
            device=self.device,
            checkpoint_path=distilled_checkpoint_path,
            spatial_upsampler_path=spatial_upsampler_path,
            gemma_root_path=gemma_root,
            loras=ic_loras,
            quantization=quantization,
        )

        if self.has_ic_lora:
            self.stage_2_model_ledger = ModelLedger(
                dtype=self.dtype,
                device=self.device,
                checkpoint_path=distilled_checkpoint_path,
                spatial_upsampler_path=spatial_upsampler_path,
                gemma_root_path=gemma_root,
                loras=[],
                quantization=quantization,
            )
        else:
            self.stage_2_model_ledger = self.stage_1_model_ledger

        self.pipeline_components = PipelineComponents(dtype=self.dtype, device=self.device)

        if reference_downscale_factor is not None:
            self.reference_downscale_factor = reference_downscale_factor
        else:
            self.reference_downscale_factor = 1
            for lora in ic_loras:
                scale = _read_lora_reference_downscale_factor(lora.path)
                if scale != 1:
                    if self.reference_downscale_factor not in (1, scale):
                        raise ValueError(
                            f"Conflicting reference_downscale_factor: already {self.reference_downscale_factor}, got {scale}"
                        )
                    self.reference_downscale_factor = scale

        logging.info(f"[Pipeline] reference_downscale_factor={self.reference_downscale_factor}")

    def _create_ic_conditionings(
        self,
        video_conditioning: list[tuple[str, float]],
        height: int,
        width: int,
        num_frames: int,
        video_encoder: VideoEncoder,
        conditioning_strength: float = 1.0,
    ) -> list[ConditioningItem]:
        conditionings: list[ConditioningItem] = []
        scale = self.reference_downscale_factor
        ref_height = height // scale
        ref_width = width // scale

        for video_path, strength in video_conditioning:
            video = load_video_conditioning(
                video_path=video_path,
                height=ref_height,
                width=ref_width,
                frame_cap=num_frames,
                dtype=self.dtype,
                device=self.device,
            )
            encoded_video = video_encoder(video)
            cond = VideoConditionByReferenceLatent(latent=encoded_video, downscale_factor=scale, strength=strength)
            if conditioning_strength < 1.0:
                cond = ConditioningItemAttentionStrengthWrapper(cond, attention_mask=conditioning_strength)
            conditionings.append(cond)

        if conditionings:
            logging.info(f"[IC-LoRA] Added {len(conditionings)} video conditioning(s)")
        return conditionings

    def __call__(
        self,
        prompt: str,
        seed: int,
        height: int,
        width: int,
        num_frames: int,
        frame_rate: float,
        images: list[ImageConditioningInput],
        audio_path: str | None = None,
        video_conditioning: list[tuple[str, float]] | None = None,
        tiling_config: TilingConfig | None = None,
        enhance_prompt: bool = False,
        conditioning_strength: float = 1.0,
    ):
        assert_resolution(height=height, width=width, is_two_stage=True)
        has_audio = audio_path is not None
        has_video_cond = bool(video_conditioning)

        generator = torch.Generator(device=self.device).manual_seed(seed)
        noiser = GaussianNoiser(generator=generator)
        stepper = EulerDiffusionStep()
        dtype = torch.bfloat16

        prompt_for_model = prompt
        if has_audio:
            prompt_for_model = (prompt_for_model + " synchronized lipsync").strip()

        (ctx_p,) = encode_prompts(
            [prompt_for_model],
            self.stage_1_model_ledger,
            enhance_first_prompt=enhance_prompt,
            enhance_prompt_image=images[0].path if len(images) > 0 else None,
        )
        video_context, audio_context = ctx_p.video_encoding, ctx_p.audio_encoding

        encoded_audio_latent = None
        decoded_audio_for_output = None
        if has_audio:
            video_duration = num_frames / frame_rate
            decoded_audio = decode_audio_from_file(audio_path, self.device, 0.0, video_duration)
            if decoded_audio is None:
                raise ValueError(f"Could not extract audio stream from {audio_path}")

            encoded_audio_latent = vae_encode_audio(decoded_audio, self.stage_1_model_ledger.audio_encoder())
            audio_shape = AudioLatentShape.from_duration(batch=1, duration=video_duration, channels=8, mel_bins=16)
            expected_frames = audio_shape.frames
            actual_frames = encoded_audio_latent.shape[2]

            if actual_frames > expected_frames:
                encoded_audio_latent = encoded_audio_latent[:, :, :expected_frames, :]
            elif actual_frames < expected_frames:
                pad = torch.zeros(
                    encoded_audio_latent.shape[0],
                    encoded_audio_latent.shape[1],
                    expected_frames - actual_frames,
                    encoded_audio_latent.shape[3],
                    device=encoded_audio_latent.device,
                    dtype=encoded_audio_latent.dtype,
                )
                encoded_audio_latent = torch.cat([encoded_audio_latent, pad], dim=2)

            decoded_audio_for_output = Audio(
                waveform=decoded_audio.waveform.squeeze(0),
                sampling_rate=decoded_audio.sampling_rate,
            )

        video_encoder = self.stage_1_model_ledger.video_encoder()
        stage_1_output_shape = VideoPixelShape(batch=1, frames=num_frames, width=width // 2, height=height // 2, fps=frame_rate)

        stage_1_conditionings = combined_image_conditionings(
            images=images,
            height=stage_1_output_shape.height,
            width=stage_1_output_shape.width,
            video_encoder=video_encoder,
            dtype=dtype,
            device=self.device,
        )

        if has_video_cond:
            stage_1_conditionings.extend(
                self._create_ic_conditionings(
                    video_conditioning=video_conditioning,
                    height=stage_1_output_shape.height,
                    width=stage_1_output_shape.width,
                    num_frames=num_frames,
                    video_encoder=video_encoder,
                    conditioning_strength=conditioning_strength,
                )
            )

        transformer = self.stage_1_model_ledger.transformer()
        stage_1_sigmas = torch.tensor(DISTILLED_SIGMA_VALUES, device=self.device)

        def denoising_loop(sigmas, video_state, audio_state, stepper):
            return euler_denoising_loop(
                sigmas=sigmas,
                video_state=video_state,
                audio_state=audio_state,
                stepper=stepper,
                denoise_fn=simple_denoising_func(
                    video_context=video_context,
                    audio_context=audio_context,
                    transformer=transformer,
                ),
            )

        if has_audio:
            video_state = denoise_video_only(
                output_shape=stage_1_output_shape,
                conditionings=stage_1_conditionings,
                noiser=noiser,
                sigmas=stage_1_sigmas,
                stepper=stepper,
                denoising_loop_fn=denoising_loop,
                components=self.pipeline_components,
                dtype=dtype,
                device=self.device,
                initial_audio_latent=encoded_audio_latent,
            )
            audio_state = None
        else:
            video_state, audio_state = denoise_audio_video(
                output_shape=stage_1_output_shape,
                conditionings=stage_1_conditionings,
                noiser=noiser,
                sigmas=stage_1_sigmas,
                stepper=stepper,
                denoising_loop_fn=denoising_loop,
                components=self.pipeline_components,
                dtype=dtype,
                device=self.device,
            )

        torch.cuda.synchronize()
        cleanup_memory()

        upscaled_video_latent = upsample_video(
            latent=video_state.latent[:1],
            video_encoder=video_encoder,
            upsampler=self.stage_2_model_ledger.spatial_upsampler(),
        )

        torch.cuda.synchronize()
        cleanup_memory()

        transformer_s2 = self.stage_2_model_ledger.transformer()
        stage_2_sigmas = torch.tensor(STAGE_2_DISTILLED_SIGMA_VALUES, device=self.device)

        def denoising_loop_s2(sigmas, video_state, audio_state, stepper):
            return euler_denoising_loop(
                sigmas=sigmas,
                video_state=video_state,
                audio_state=audio_state,
                stepper=stepper,
                denoise_fn=simple_denoising_func(
                    video_context=video_context,
                    audio_context=audio_context,
                    transformer=transformer_s2,
                ),
            )

        stage_2_output_shape = VideoPixelShape(batch=1, frames=num_frames, width=width, height=height, fps=frame_rate)
        stage_2_conditionings = combined_image_conditionings(
            images=images,
            height=stage_2_output_shape.height,
            width=stage_2_output_shape.width,
            video_encoder=video_encoder,
            dtype=dtype,
            device=self.device,
        )

        if has_audio:
            video_state = denoise_video_only(
                output_shape=stage_2_output_shape,
                conditionings=stage_2_conditionings,
                noiser=noiser,
                sigmas=stage_2_sigmas,
                stepper=stepper,
                denoising_loop_fn=denoising_loop_s2,
                components=self.pipeline_components,
                dtype=dtype,
                device=self.device,
                noise_scale=stage_2_sigmas[0],
                initial_video_latent=upscaled_video_latent,
                initial_audio_latent=encoded_audio_latent,
            )
            audio_state = None
        else:
            video_state, audio_state = denoise_audio_video(
                output_shape=stage_2_output_shape,
                conditionings=stage_2_conditionings,
                noiser=noiser,
                sigmas=stage_2_sigmas,
                stepper=stepper,
                denoising_loop_fn=denoising_loop_s2,
                components=self.pipeline_components,
                dtype=dtype,
                device=self.device,
                noise_scale=stage_2_sigmas[0],
                initial_video_latent=upscaled_video_latent,
                initial_audio_latent=audio_state.latent,
            )

        torch.cuda.synchronize()
        del transformer, transformer_s2, video_encoder
        cleanup_memory()

        decoded_video = vae_decode_video(
            video_state.latent,
            self.stage_2_model_ledger.video_decoder(),
            tiling_config,
            generator,
        )

        if has_audio:
            output_audio = decoded_audio_for_output
        else:
            output_audio = vae_decode_audio(
                audio_state.latent,
                self.stage_2_model_ledger.audio_decoder(),
                self.stage_2_model_ledger.vocoder(),
            )

        return decoded_video, output_audio


LTX_MODEL_REPO = "Lightricks/LTX-2.3"
CHECKPOINT_REPO = "linoyts/ltx-2.3-22b-distilled-1.1-fused-union-control"
GEMMA_REPO = "google/gemma-3-12b-it-qat-q4_0-unquantized"

print("=" * 80)
print("Downloading LTX-2.3 distilled model + Gemma + IC-LoRA...")
print("=" * 80)

checkpoint_path = hf_hub_download(
    repo_id=CHECKPOINT_REPO,
    filename="ltx-2.3-22b-distilled-1.1-fused-union-control.safetensors",
)
spatial_upsampler_path = hf_hub_download(
    repo_id=LTX_MODEL_REPO,
    filename="ltx-2.3-spatial-upscaler-x2-1.1.safetensors",
)
gemma_root = snapshot_download(repo_id=GEMMA_REPO)
default_lora_info = IC_LORA_OPTIONS[DEFAULT_IC_LORA]
default_ic_lora_path = hf_hub_download(
    repo_id=default_lora_info["repo"],
    filename=default_lora_info["filename"],
)

print(f"Checkpoint: {checkpoint_path}")
print(f"Spatial upsampler: {spatial_upsampler_path}")
print(f"Gemma root: {gemma_root}")
print(f"IC-LoRA: {default_ic_lora_path}")

pipeline = LTX23UnifiedPipeline(
    distilled_checkpoint_path=checkpoint_path,
    spatial_upsampler_path=spatial_upsampler_path,
    gemma_root=gemma_root,
    quantization=QuantizationPolicy.fp8_cast(),
    reference_downscale_factor=2,
)

print("Preloading all models...")
_ledger_1 = pipeline.stage_1_model_ledger
_ledger_2 = pipeline.stage_2_model_ledger
_shared = _ledger_1 is _ledger_2

_s1_transformer = _ledger_1.transformer()
_s1_video_encoder = _ledger_1.video_encoder()
_s1_text_encoder = _ledger_1.text_encoder()
_s1_embeddings = _ledger_1.gemma_embeddings_processor()
_s1_audio_encoder = _ledger_1.audio_encoder()

_ledger_1.transformer = lambda: _s1_transformer
_ledger_1.video_encoder = lambda: _s1_video_encoder
_ledger_1.text_encoder = lambda: _s1_text_encoder
_ledger_1.gemma_embeddings_processor = lambda: _s1_embeddings
_ledger_1.audio_encoder = lambda: _s1_audio_encoder

if _shared:
    _video_decoder = _ledger_1.video_decoder()
    _audio_decoder = _ledger_1.audio_decoder()
    _vocoder = _ledger_1.vocoder()
    _spatial_upsampler = _ledger_1.spatial_upsampler()
    _ledger_1.video_decoder = lambda: _video_decoder
    _ledger_1.audio_decoder = lambda: _audio_decoder
    _ledger_1.vocoder = lambda: _vocoder
    _ledger_1.spatial_upsampler = lambda: _spatial_upsampler
else:
    _s2_transformer = _ledger_2.transformer()
    _s2_video_encoder = _ledger_2.video_encoder()
    _s2_video_decoder = _ledger_2.video_decoder()
    _s2_audio_decoder = _ledger_2.audio_decoder()
    _s2_vocoder = _ledger_2.vocoder()
    _s2_spatial_upsampler = _ledger_2.spatial_upsampler()
    _s2_text_encoder = _ledger_2.text_encoder()
    _s2_embeddings = _ledger_2.gemma_embeddings_processor()
    _s2_audio_encoder = _ledger_2.audio_encoder()

    _ledger_2.transformer = lambda: _s2_transformer
    _ledger_2.video_encoder = lambda: _s2_video_encoder
    _ledger_2.video_decoder = lambda: _s2_video_decoder
    _ledger_2.audio_decoder = lambda: _s2_audio_decoder
    _ledger_2.vocoder = lambda: _s2_vocoder
    _ledger_2.spatial_upsampler = lambda: _s2_spatial_upsampler
    _ledger_2.text_encoder = lambda: _s2_text_encoder
    _ledger_2.gemma_embeddings_processor = lambda: _s2_embeddings
    _ledger_2.audio_encoder = lambda: _s2_audio_encoder

print("All models preloaded!")
print("=" * 80)


def detect_aspect_ratio_from_media(*media_candidates) -> str:
    for media in media_candidates:
        if media is None:
            continue
        if hasattr(media, "size") and not isinstance(media, (str, Path)):
            try:
                w, h = media.size
                ratio = w / h
                candidates = {"16:9": 16 / 9, "9:16": 9 / 16, "1:1": 1.0}
                return min(candidates, key=lambda k: abs(ratio - candidates[k]))
            except Exception:
                pass

        media_str = str(media)
        ext = media_str.lower().rsplit(".", 1)[-1] if "." in media_str else ""
        if ext in ("jpg", "jpeg", "png", "bmp", "webp", "gif", "tiff"):
            try:
                with Image.open(media_str) as img:
                    w, h = img.size
            except Exception:
                continue
        else:
            try:
                import av
                with av.open(media_str) as container:
                    stream = container.streams.video[0]
                    w, h = stream.codec_context.width, stream.codec_context.height
            except Exception:
                continue
        ratio = w / h
        candidates = {"16:9": 16 / 9, "9:16": 9 / 16, "1:1": 1.0}
        return min(candidates, key=lambda k: abs(ratio - candidates[k]))
    return "16:9"


def update_resolution(first_image, last_image, input_video, high_res):
    aspect = detect_aspect_ratio_from_media(first_image, last_image, input_video)
    tier = "high" if high_res else "low"
    w, h = RESOLUTIONS[tier][aspect]
    return gr.update(value=w), gr.update(value=h)


def update_mode_visibility(ui_mode: str):
    is_interpolate = ui_mode == "Interpolate"
    is_motion = ui_mode == "Motion Control"
    return (
        gr.update(visible=is_interpolate),
        gr.update(visible=is_motion),
        gr.update(visible=is_motion),
        gr.update(visible=not is_motion),
    )


def apply_duration_choice(duration_value: str):
    mapping = {"3s": 3.0, "5s": 5.0, "10s": 10.0, "15s": 15.0}
    return mapping.get(duration_value, 3.0)


def choose_duration_option(seconds: float | None):
    if seconds is None:
        return gr.update(value="3s")
    if seconds <= 3:
        return gr.update(value="3s")
    if seconds <= 5:
        return gr.update(value="5s")
    if seconds <= 10:
        return gr.update(value="10s")
    return gr.update(value="15s")


def _get_video_duration(video_path) -> float | None:
    if video_path is None:
        return None
    try:
        result = subprocess.run(
            [
                "ffprobe", "-v", "error", "-select_streams", "v:0",
                "-show_entries", "format=duration", "-of", "default=nw=1:nk=1", str(video_path),
            ],
            capture_output=True,
            text=True,
        )
        return float(result.stdout.strip())
    except Exception:
        return None


def on_video_upload(input_video, first_image, last_image, high_res):
    w_update, h_update = update_resolution(first_image, last_image, input_video, high_res)
    vid_dur = _get_video_duration(input_video)
    dur = round(min(vid_dur, 15.0), 1) if vid_dur is not None else 3.0
    return w_update, h_update, choose_duration_option(dur), dur


def apply_prompt_preset(preset_name: str) -> str:
    """Return the full prompt for a given preset key."""
    return PROMPT_PRESETS.get(preset_name, "")

def apply_style_modifier(style_name: str, current_prompt: str) -> str:
    """Append style modifier to current prompt."""
    suffix = STYLE_MODIFIERS.get(style_name, "")
    base = current_prompt.strip()
    if not suffix:
        return base
    # Remove any existing style suffix before appending new one
    for s in STYLE_MODIFIERS.values():
        if s and base.endswith(s.strip(",")):
            base = base[: -len(s.strip(","))].rstrip(", ")
    return base + suffix

def on_motion_preset_to_prompt(preset_name: str, custom_motion: str) -> tuple:
    """Write motion preset text directly into prompt box; show custom input if needed."""
    show_custom = preset_name == "Custom"
    if preset_name == "None":
        return gr.update(visible=False), gr.update()
    if preset_name == "Custom":
        text = custom_motion.strip() if custom_motion else ""
    else:
        text = MOTION_PRESETS.get(preset_name, "")
    return gr.update(visible=show_custom), gr.update(value=text) if text else gr.update()

def on_custom_motion_change(custom_text: str, preset_name: str) -> object:
    """Live-update prompt when user types in custom motion box."""
    if preset_name == "Custom":
        return gr.update(value=custom_text.strip())
    return gr.update()


def apply_motion_preset(preset_name: str, current_prompt: str, custom_motion: str = "") -> str:
    """Append selected motion preset text to the current prompt."""
    if preset_name == "None":
        return (current_prompt or "").strip()

    if preset_name == "Custom":
        extra = (custom_motion or "").strip()
    else:
        extra = (MOTION_PRESETS.get(preset_name, "") or "").strip()

    base = (current_prompt or "").strip()
    if not extra:
        return base
    if base:
        return f"{base}, {extra}"
    return extra


def _extract_audio_from_video(video_path: str) -> str | None:
    out_path = tempfile.mktemp(suffix=".wav")
    try:
        probe = subprocess.run(
            [
                "ffprobe", "-v", "error", "-select_streams", "a:0",
                "-show_entries", "stream=codec_type", "-of", "default=nw=1:nk=1", video_path,
            ],
            capture_output=True,
            text=True,
        )
        if not probe.stdout.strip():
            return None
        subprocess.run(
            [
                "ffmpeg", "-y", "-v", "error", "-i", video_path,
                "-vn", "-ac", "2", "-ar", "48000", "-c:a", "pcm_s16le", out_path,
            ],
            check=True,
        )
        return out_path
    except (subprocess.CalledProcessError, FileNotFoundError):
        return None


def get_duration(
    prompt, first_image, last_image, input_video, input_audio,
    ui_mode, clip_duration, video_preprocess, conditioning_strength,
    enhance_prompt, use_video_audio, seed, randomize_seed, height, width,
    motion_preset, custom_motion, progress,
):
    if ui_mode in ("Image-to-Video", "Interpolate"):
        return 60
    extra_time = 0
    if input_audio is not None:
        extra_time += 10
    if input_video is not None:
        extra_time += 60
    if clip_duration <= 3:
        return 60 + extra_time
    elif clip_duration <= 5:
        return 80 + extra_time
    elif clip_duration <= 10:
        return 120 + extra_time
    else:
        return 180 + extra_time


@spaces.GPU(duration=get_duration)
@torch.inference_mode()
def generate_video(
    prompt: str,
    first_image=None,
    last_image=None,
    input_video=None,
    input_audio=None,
    ui_mode="Image-to-Video",
    clip_duration=3,
    video_preprocess="Raw (no preprocessing)",
    conditioning_strength=0.85,
    enhance_prompt=False,
    use_video_audio=True,
    seed=42,
    randomize_seed=True,
    height=512,
    width=768,
    motion_preset="None",
    custom_motion="",
    progress=gr.Progress(track_tqdm=True),
):
    current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)

    # Apply motion preset
    final_prompt = apply_motion_preset(motion_preset, prompt, custom_motion)

    if ui_mode == "Image-to-Video":
        last_image = None
        input_video = None
    elif ui_mode == "Interpolate":
        input_video = None
    elif ui_mode == "Motion Control":
        last_image = None

    try:
        torch.cuda.reset_peak_memory_stats()
        log_memory("start")

        frame_rate = DEFAULT_FRAME_RATE
        num_frames = int(clip_duration * frame_rate) + 1
        num_frames = ((num_frames - 1 + 7) // 8) * 8 + 1

        images: list[ImageConditioningInput] = []
        if first_image is not None:
            if hasattr(first_image, "save"):
                first_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
                first_image.save(first_path)
                first_path = str(first_path)
            else:
                first_path = str(first_image)
            images.append(ImageConditioningInput(path=first_path, frame_idx=0, strength=1.0))

        if last_image is not None:
            if hasattr(last_image, "save"):
                last_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
                last_image.save(last_path)
                last_path = str(last_path)
            else:
                last_path = str(last_image)
            images.append(ImageConditioningInput(path=last_path, frame_idx=num_frames - 1, strength=1.0))

        video_conditioning = None
        if input_video is not None:
            video_path = str(input_video)
            if video_preprocess != "Raw (no preprocessing)":
                cond_mp4, first_frame_png = preprocess_conditioning_video(
                    video_path=video_path,
                    mode=video_preprocess,
                    width=int(width) // 2,
                    height=int(height) // 2,
                    num_frames=num_frames,
                    fps=frame_rate,
                )
                video_conditioning = [(cond_mp4, 1.0)]
                if not images:
                    images.append(ImageConditioningInput(path=first_frame_png, frame_idx=0, strength=1.0))
            else:
                video_conditioning = [(video_path, 1.0)]

            if input_audio is None and use_video_audio:
                extracted_audio = _extract_audio_from_video(video_path)
                if extracted_audio is not None:
                    input_audio = extracted_audio

        if not final_prompt or not final_prompt.strip():
            final_prompt = DEFAULT_PROMPT

        tiling_config = TilingConfig.default()
        video_chunks_number = get_video_chunks_number(num_frames, tiling_config)

        video, audio = pipeline(
            prompt=final_prompt,
            seed=current_seed,
            height=int(height),
            width=int(width),
            num_frames=num_frames,
            frame_rate=frame_rate,
            images=images,
            audio_path=input_audio,
            video_conditioning=video_conditioning,
            tiling_config=tiling_config,
            enhance_prompt=enhance_prompt,
            conditioning_strength=conditioning_strength,
        )

        output_path = tempfile.mktemp(suffix=".mp4")
        encode_video(
            video=video,
            fps=frame_rate,
            audio=audio,
            output_path=output_path,
            video_chunks_number=video_chunks_number,
        )
        log_memory("done")
        return str(output_path), current_seed
    except Exception as e:
        import traceback
        log_memory("error")
        print(f"Error: {str(e)}\n{traceback.format_exc()}")
        return None, current_seed


# ─────────────────────────────────────────────────────────────
# CSS — Purple Space Star Theme
# ─────────────────────────────────────────────────────────────
css = """
@import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700;900&family=Rajdhani:wght@300;400;600;700&display=swap');

/* ─── Reset / Base ─── */
*, *::before, *::after { box-sizing: border-box; }

:root {
  --bg-deep:       #04020f;
  --bg-mid:        #08041a;
  --bg-card:       #0d0820;
  --bg-card2:      #120d28;
  --border:        rgba(138, 43, 226, 0.25);
  --border-bright: rgba(180, 100, 255, 0.5);
  --purple-1:      #8a2be2;
  --purple-2:      #b45fff;
  --purple-3:      #d4aaff;
  --purple-glow:   rgba(138, 43, 226, 0.4);
  --cyan-acc:      #00e5ff;
  --pink-acc:      #ff6ec7;
  --text-primary:  #e8d8ff;
  --text-muted:    rgba(200, 170, 255, 0.55);
  --radius-lg:     16px;
  --radius-pill:   9999px;
  --font-display:  'Orbitron', sans-serif;
  --font-body:     'Rajdhani', sans-serif;
}

/* ─── Starfield Background ─── */
body, .gradio-container {
  background: var(--bg-deep) !important;
  font-family: var(--font-body) !important;
  color: var(--text-primary) !important;
  min-height: 100vh;
  position: relative;
  overflow-x: hidden;
}

.gradio-container::before {
  content: '';
  position: fixed;
  inset: 0;
  background:
    radial-gradient(ellipse 80% 50% at 20% 10%, rgba(90,20,180,0.18) 0%, transparent 60%),
    radial-gradient(ellipse 60% 40% at 80% 80%, rgba(0,229,255,0.07) 0%, transparent 55%),
    radial-gradient(ellipse 50% 60% at 50% 50%, rgba(138,43,226,0.06) 0%, transparent 70%);
  pointer-events: none;
  z-index: 0;
}

/* Animated stars */
.gradio-container::after {
  content: '';
  position: fixed;
  inset: 0;
  background-image:
    radial-gradient(1px 1px at 10% 15%, rgba(255,255,255,0.9) 0%, transparent 100%),
    radial-gradient(1px 1px at 25% 35%, rgba(255,255,255,0.6) 0%, transparent 100%),
    radial-gradient(1.5px 1.5px at 40% 8%, rgba(200,150,255,0.8) 0%, transparent 100%),
    radial-gradient(1px 1px at 55% 60%, rgba(255,255,255,0.7) 0%, transparent 100%),
    radial-gradient(1px 1px at 70% 25%, rgba(180,100,255,0.9) 0%, transparent 100%),
    radial-gradient(1.5px 1.5px at 85% 45%, rgba(255,255,255,0.5) 0%, transparent 100%),
    radial-gradient(1px 1px at 15% 75%, rgba(0,229,255,0.8) 0%, transparent 100%),
    radial-gradient(1px 1px at 90% 10%, rgba(255,255,255,0.6) 0%, transparent 100%),
    radial-gradient(1px 1px at 35% 88%, rgba(255,255,255,0.7) 0%, transparent 100%),
    radial-gradient(1.5px 1.5px at 60% 92%, rgba(180,100,255,0.6) 0%, transparent 100%),
    radial-gradient(1px 1px at 78% 68%, rgba(255,255,255,0.8) 0%, transparent 100%),
    radial-gradient(1px 1px at 5% 50%, rgba(0,229,255,0.5) 0%, transparent 100%),
    radial-gradient(1px 1px at 48% 42%, rgba(255,255,255,0.4) 0%, transparent 100%),
    radial-gradient(1px 1px at 92% 82%, rgba(200,150,255,0.7) 0%, transparent 100%),
    radial-gradient(1.5px 1.5px at 22% 55%, rgba(255,255,255,0.5) 0%, transparent 100%);
  pointer-events: none;
  z-index: 0;
  animation: starTwinkle 6s ease-in-out infinite alternate;
}

@keyframes starTwinkle {
  0%   { opacity: 0.6; }
  50%  { opacity: 1; }
  100% { opacity: 0.7; }
}

/* ─── Gradio overrides ─── */
.gradio-container > * { position: relative; z-index: 1; }

footer { display: none !important; }

.tabs > .tab-nav { display: none !important; }

/* Hide default Gradio chrome */
.app.svelte-182fdeq.svelte-182fdeq { padding: 0 !important; }

/* Blocks */
.block, .form, .gap, .contain { 
  background: transparent !important; 
  border: none !important; 
  box-shadow: none !important;
}

.gr-padded { padding: 0 !important; }

/* ─── Page wrapper ─── */
#ltx-root {
  max-width: 1300px;
  margin: 0 auto;
  padding: 24px 20px 60px;
}

/* ─── Header ─── */
#ltx-header {
  text-align: center;
  padding: 40px 0 32px;
  position: relative;
}

.ltx-logo-text {
  font-family: var(--font-display);
  font-size: clamp(32px, 5vw, 56px);
  font-weight: 900;
  letter-spacing: 0.08em;
  background: linear-gradient(135deg, #b45fff 0%, #e8d8ff 40%, #00e5ff 80%, #ff6ec7 100%);
  -webkit-background-clip: text;
  -webkit-text-fill-color: transparent;
  background-clip: text;
  text-shadow: none;
  filter: drop-shadow(0 0 30px rgba(180,95,255,0.5));
  animation: logoGlow 3s ease-in-out infinite alternate;
}

@keyframes logoGlow {
  from { filter: drop-shadow(0 0 20px rgba(180,95,255,0.4)); }
  to   { filter: drop-shadow(0 0 45px rgba(0,229,255,0.5)); }
}

.ltx-tagline {
  font-family: var(--font-body);
  font-size: 15px;
  font-weight: 400;
  color: var(--text-muted);
  letter-spacing: 0.12em;
  text-transform: uppercase;
  margin-top: 8px;
}

.ltx-site-link {
  display: inline-flex;
  align-items: center;
  gap: 6px;
  margin-top: 14px;
  padding: 7px 18px;
  border-radius: var(--radius-pill);
  border: 1px solid var(--border-bright);
  background: rgba(138,43,226,0.12);
  color: var(--purple-3);
  font-family: var(--font-body);
  font-size: 13px;
  font-weight: 600;
  letter-spacing: 0.06em;
  text-decoration: none;
  transition: all 0.25s ease;
}
.ltx-site-link:hover {
  background: rgba(138,43,226,0.28);
  border-color: var(--purple-2);
  color: #fff;
  box-shadow: 0 0 18px var(--purple-glow);
  transform: translateY(-1px);
}

/* Nebula decoration */
.ltx-nebula {
  position: absolute;
  top: -30px; left: 50%;
  transform: translateX(-50%);
  width: 500px; height: 180px;
  background: radial-gradient(ellipse at center, rgba(138,43,226,0.15) 0%, transparent 70%);
  pointer-events: none;
}

/* ─── Mode Selector ─── */
#mode-selector-wrap {
  display: flex;
  justify-content: center;
  margin-bottom: 28px;
}

.mode-pill-group {
  display: inline-flex;
  background: rgba(10,5,25,0.8);
  border: 1px solid var(--border);
  border-radius: var(--radius-pill);
  padding: 5px;
  gap: 2px;
  backdrop-filter: blur(12px);
}

.mode-pill {
  position: relative;
  padding: 10px 24px;
  border-radius: var(--radius-pill);
  font-family: var(--font-body);
  font-size: 14px;
  font-weight: 700;
  letter-spacing: 0.05em;
  color: var(--text-muted);
  cursor: pointer;
  border: none;
  background: transparent;
  transition: color 0.2s ease;
  user-select: none;
  white-space: nowrap;
}
.mode-pill.active {
  color: #04020f;
  background: linear-gradient(135deg, var(--purple-2), var(--cyan-acc));
  box-shadow: 0 0 20px rgba(180,95,255,0.4);
}
.mode-pill:not(.active):hover { color: var(--purple-3); }

/* ─── Luminous Radio Pills (all groups) ─── */

/* Kill the default Gradio radio layout */
#mode-radio .wrap,
#preset-radio .wrap,
#duration-radio .wrap,
#preprocess-radio .wrap,
#style-radio .wrap,
#prompt-preset-radio .wrap {
  display: flex !important;
  flex-wrap: wrap !important;
  gap: 8px !important;
  background: transparent !important;
  border: none !important;
  padding: 4px 0 !important;
}

/* Hide the actual radio circle */
#mode-radio input[type="radio"],
#preset-radio input[type="radio"],
#duration-radio input[type="radio"],
#preprocess-radio input[type="radio"],
#style-radio input[type="radio"],
#prompt-preset-radio input[type="radio"] {
  display: none !important;
}

/* ── Base pill style ── */
#mode-radio label,
#preset-radio label,
#duration-radio label,
#preprocess-radio label,
#style-radio label,
#prompt-preset-radio label {
  display: inline-flex !important;
  align-items: center !important;
  justify-content: center !important;
  gap: 5px !important;
  padding: 8px 16px !important;
  border-radius: 9999px !important;
  border: 1.5px solid rgba(138,43,226,0.28) !important;
  background: rgba(8,4,20,0.8) !important;
  color: rgba(190,160,255,0.6) !important;
  font-family: 'Rajdhani', sans-serif !important;
  font-size: 13px !important;
  font-weight: 700 !important;
  letter-spacing: 0.03em !important;
  cursor: pointer !important;
  transition: border-color 0.18s ease, color 0.18s ease,
              box-shadow 0.18s ease, transform 0.15s ease,
              background 0.18s ease !important;
  text-transform: none !important;
  white-space: nowrap !important;
  position: relative !important;
  user-select: none !important;
  -webkit-user-select: none !important;
}

/* ── Hover — subtle lift ── */
#mode-radio label:hover,
#preset-radio label:hover,
#duration-radio label:hover,
#preprocess-radio label:hover,
#style-radio label:hover,
#prompt-preset-radio label:hover {
  border-color: rgba(180,100,255,0.55) !important;
  color: rgba(220,190,255,0.9) !important;
  box-shadow: 0 0 12px rgba(138,43,226,0.18) !important;
  transform: translateY(-1px) !important;
}

/* ── SELECTED STATE — unmistakably obvious ── */

/* checkmark prepended via ::before on the label itself */
#mode-radio input:checked + label::before,
#preset-radio input:checked + label::before,
#duration-radio input:checked + label::before,
#preprocess-radio input:checked + label::before,
#style-radio input:checked + label::before,
#prompt-preset-radio input:checked + label::before,
#mode-radio label:has(input:checked)::before,
#preset-radio label:has(input:checked)::before,
#duration-radio label:has(input:checked)::before,
#preprocess-radio label:has(input:checked)::before,
#style-radio label:has(input:checked)::before,
#prompt-preset-radio label:has(input:checked)::before,
#mode-radio label.selected::before,
#preset-radio label.selected::before,
#duration-radio label.selected::before,
#preprocess-radio label.selected::before,
#style-radio label.selected::before,
#prompt-preset-radio label.selected::before {
  content: '✓' !important;
  font-size: 12px !important;
  font-weight: 900 !important;
  line-height: 1 !important;
  flex-shrink: 0 !important;
}

/* Pulse ring animation */
@keyframes pillPulse {
  0%   { box-shadow: 0 0 0 0 rgba(180,100,255,0.6),  0 0 20px rgba(138,43,226,0.4); }
  50%  { box-shadow: 0 0 0 5px rgba(180,100,255,0),   0 0 30px rgba(138,43,226,0.25); }
  100% { box-shadow: 0 0 0 0 rgba(180,100,255,0),     0 0 20px rgba(138,43,226,0.4); }
}

/* Bounce in on select */
@keyframes pillBounce {
  0%   { transform: scale(1); }
  40%  { transform: scale(1.10) translateY(-2px); }
  70%  { transform: scale(0.97); }
  100% { transform: scale(1) translateY(-1px); }
}

/* Base selected */
#mode-radio input:checked + label,
#preset-radio input:checked + label,
#duration-radio input:checked + label,
#preprocess-radio input:checked + label,
#style-radio input:checked + label,
#prompt-preset-radio input:checked + label,
#mode-radio label:has(input:checked),
#preset-radio label:has(input:checked),
#duration-radio label:has(input:checked),
#preprocess-radio label:has(input:checked),
#style-radio label:has(input:checked),
#prompt-preset-radio label:has(input:checked),
#mode-radio label.selected,
#preset-radio label.selected,
#duration-radio label.selected,
#preprocess-radio label.selected,
#style-radio label.selected,
#prompt-preset-radio label.selected {
  background: linear-gradient(135deg,
    rgba(107,32,200,0.65) 0%,
    rgba(154,64,224,0.55) 50%,
    rgba(0,188,212,0.3) 100%) !important;
  border-color: var(--purple-2) !important;
  border-width: 2px !important;
  color: #fff !important;
  font-weight: 900 !important;
  text-shadow: 0 0 10px rgba(220,180,255,0.8) !important;
  animation: pillBounce 0.32s ease forwards, pillPulse 2s ease 0.32s infinite !important;
  transform: translateY(-1px) !important;
}

/* ── Per-group selected color themes ── */

/* Duration — cyan */
#duration-radio label { padding: 9px 22px !important; font-size: 14px !important; }
#duration-radio input:checked + label,
#duration-radio label:has(input:checked),
#duration-radio label.selected {
  background: linear-gradient(135deg, rgba(0,180,200,0.55), rgba(0,100,180,0.45)) !important;
  border-color: #00e5ff !important;
  box-shadow: 0 0 0 2px rgba(0,229,255,0.25), 0 0 22px rgba(0,229,255,0.45) !important;
  text-shadow: 0 0 12px rgba(0,255,255,0.8) !important;
  animation: pillBounce 0.32s ease forwards, pillPulseCyan 2s ease 0.32s infinite !important;
}
@keyframes pillPulseCyan {
  0%   { box-shadow: 0 0 0 0 rgba(0,229,255,0.6),  0 0 22px rgba(0,229,255,0.4); }
  50%  { box-shadow: 0 0 0 6px rgba(0,229,255,0),   0 0 30px rgba(0,229,255,0.2); }
  100% { box-shadow: 0 0 0 0 rgba(0,229,255,0),     0 0 22px rgba(0,229,255,0.4); }
}

/* Mode — bright gradient, bigger pills */
#mode-radio .wrap {
  background: rgba(10,5,25,0.75) !important;
  border: 1px solid var(--border) !important;
  border-radius: 9999px !important;
  padding: 5px !important;
  backdrop-filter: blur(14px) !important;
  display: inline-flex !important;
  width: fit-content !important;
  margin: 0 auto !important;
}
#mode-radio label {
  padding: 11px 28px !important;
  font-size: 14px !important;
  font-family: 'Orbitron', sans-serif !important;
  letter-spacing: 0.07em !important;
}
#mode-radio input:checked + label,
#mode-radio label:has(input:checked),
#mode-radio label.selected {
  background: linear-gradient(135deg, #5c18b8, #9a40e0 45%, #00bcd4) !important;
  border-color: rgba(255,255,255,0.3) !important;
  border-width: 1.5px !important;
  text-shadow: 0 0 18px rgba(255,255,255,0.7) !important;
  box-shadow: 0 0 0 3px rgba(138,43,226,0.25), 0 0 28px rgba(138,43,226,0.5) !important;
  animation: pillBounce 0.32s ease forwards, pillPulseMode 2.5s ease 0.32s infinite !important;
}
@keyframes pillPulseMode {
  0%   { box-shadow: 0 0 0 0 rgba(138,43,226,0.55), 0 0 28px rgba(138,43,226,0.5); }
  50%  { box-shadow: 0 0 0 7px rgba(138,43,226,0),  0 0 40px rgba(138,43,226,0.25); }
  100% { box-shadow: 0 0 0 0 rgba(138,43,226,0),    0 0 28px rgba(138,43,226,0.5); }
}

/* Prompt preset — teal */
#prompt-preset-radio input:checked + label,
#prompt-preset-radio label:has(input:checked),
#prompt-preset-radio label.selected {
  background: linear-gradient(135deg, rgba(0,140,130,0.6), rgba(0,188,212,0.4)) !important;
  border-color: #00e5c8 !important;
  text-shadow: 0 0 10px rgba(0,255,210,0.8) !important;
  animation: pillBounce 0.32s ease forwards, pillPulseTeal 2s ease 0.32s infinite !important;
}
@keyframes pillPulseTeal {
  0%   { box-shadow: 0 0 0 0 rgba(0,229,200,0.55), 0 0 20px rgba(0,200,180,0.4); }
  50%  { box-shadow: 0 0 0 5px rgba(0,229,200,0),  0 0 28px rgba(0,200,180,0.2); }
  100% { box-shadow: 0 0 0 0 rgba(0,229,200,0),    0 0 20px rgba(0,200,180,0.4); }
}

/* Style modifier — pink */
#style-radio input:checked + label,
#style-radio label:has(input:checked),
#style-radio label.selected {
  background: linear-gradient(135deg, rgba(180,30,140,0.55), rgba(138,43,226,0.45)) !important;
  border-color: #ff6ec7 !important;
  text-shadow: 0 0 10px rgba(255,150,220,0.8) !important;
  animation: pillBounce 0.32s ease forwards, pillPulsePink 2s ease 0.32s infinite !important;
}
@keyframes pillPulsePink {
  0%   { box-shadow: 0 0 0 0 rgba(255,110,199,0.55), 0 0 20px rgba(255,110,199,0.35); }
  50%  { box-shadow: 0 0 0 5px rgba(255,110,199,0),  0 0 28px rgba(255,110,199,0.15); }
  100% { box-shadow: 0 0 0 0 rgba(255,110,199,0),    0 0 20px rgba(255,110,199,0.35); }
}

/* Motion preset — amber/gold */
#preset-radio input:checked + label,
#preset-radio label:has(input:checked),
#preset-radio label.selected {
  background: linear-gradient(135deg, rgba(180,90,0,0.55), rgba(220,160,0,0.4)) !important;
  border-color: #ffab40 !important;
  text-shadow: 0 0 10px rgba(255,200,100,0.8) !important;
  animation: pillBounce 0.32s ease forwards, pillPulseAmber 2s ease 0.32s infinite !important;
}
@keyframes pillPulseAmber {
  0%   { box-shadow: 0 0 0 0 rgba(255,171,64,0.55), 0 0 20px rgba(255,171,64,0.35); }
  50%  { box-shadow: 0 0 0 5px rgba(255,171,64,0),  0 0 28px rgba(255,171,64,0.15); }
  100% { box-shadow: 0 0 0 0 rgba(255,171,64,0),    0 0 20px rgba(255,171,64,0.35); }
}

/* Preprocess — green */
#preprocess-radio input:checked + label,
#preprocess-radio label:has(input:checked),
#preprocess-radio label.selected {
  background: linear-gradient(135deg, rgba(20,140,70,0.55), rgba(0,200,120,0.35)) !important;
  border-color: #00e676 !important;
  text-shadow: 0 0 10px rgba(100,255,180,0.8) !important;
  animation: pillBounce 0.32s ease forwards, pillPulseGreen 2s ease 0.32s infinite !important;
}
@keyframes pillPulseGreen {
  0%   { box-shadow: 0 0 0 0 rgba(0,230,118,0.55), 0 0 20px rgba(0,230,118,0.35); }
  50%  { box-shadow: 0 0 0 5px rgba(0,230,118,0),  0 0 28px rgba(0,230,118,0.15); }
  100% { box-shadow: 0 0 0 0 rgba(0,230,118,0),    0 0 20px rgba(0,230,118,0.35); }
}

/* Center mode selector */
#mode-selector-wrap,
#mode-selector-wrap > div {
  display: flex !important;
  justify-content: center !important;
}
#mode-selector-wrap .wrap { justify-content: center !important; }

/* Hide default radio headings for these selector groups */
#mode-radio > .label-wrap,
#duration-radio > .label-wrap,
#preprocess-radio > .label-wrap {
    display: none !important;
}

/* ─── Duration Pills ─── */
#main-layout {
  display: grid;
  grid-template-columns: 1fr 1fr;
  gap: 20px;
  align-items: start;
}
@media (max-width: 900px) {
  #main-layout { grid-template-columns: 1fr; }
}

/* ─── Cards ─── */
.ltx-card {
  background: linear-gradient(135deg, rgba(13,8,32,0.95) 0%, rgba(18,13,40,0.9) 100%);
  border: 1px solid var(--border);
  border-radius: var(--radius-lg);
  padding: 20px;
  backdrop-filter: blur(20px);
  box-shadow: 0 8px 32px rgba(0,0,0,0.4), inset 0 1px 0 rgba(180,100,255,0.1);
  position: relative;
  overflow: hidden;
  transition: border-color 0.3s ease, box-shadow 0.3s ease;
}
.ltx-card:hover {
  border-color: rgba(180,100,255,0.4);
  box-shadow: 0 8px 40px rgba(0,0,0,0.5), 0 0 20px rgba(138,43,226,0.1);
}
.ltx-card::before {
  content: '';
  position: absolute;
  top: 0; left: 0; right: 0;
  height: 1px;
  background: linear-gradient(90deg, transparent, rgba(180,100,255,0.5), transparent);
}

.ltx-card-label {
  font-family: var(--font-display);
  font-size: 11px;
  font-weight: 700;
  letter-spacing: 0.15em;
  text-transform: uppercase;
  color: var(--purple-2);
  margin-bottom: 14px;
  display: flex;
  align-items: center;
  gap: 8px;
}
.ltx-card-label::after {
  content: '';
  flex: 1;
  height: 1px;
  background: linear-gradient(90deg, var(--border), transparent);
}

/* ─── Image Upload ─── */
.media-upload-row {
  display: grid;
  grid-template-columns: 1fr 1fr;
  gap: 12px;
  margin-bottom: 16px;
}
.media-upload-row.single { grid-template-columns: 1fr; }
.media-upload-row.triple { grid-template-columns: 1fr 1fr 1fr; }

/* Override Gradio image/video upload */
.ltx-card .image-container,
.ltx-card .video-container,
.ltx-card .wrap { 
  border-radius: 12px !important;
  border: 1px solid var(--border) !important;
  background: rgba(8,4,20,0.6) !important;
  overflow: hidden !important;
}
.ltx-card .image-container:hover,
.ltx-card .wrap:hover {
  border-color: var(--border-bright) !important;
}

/* ─── Prompt Box ─── */
.ltx-prompt-wrap { position: relative; }
.ltx-prompt-area {
  width: 100%;
  min-height: 120px;
  max-height: 220px;
  background: rgba(8,4,20,0.8) !important;
  border: 1px solid var(--border) !important;
  border-radius: 12px !important;
  color: var(--text-primary) !important;
  font-family: var(--font-body) !important;
  font-size: 15px !important;
  font-weight: 400 !important;
  padding: 14px 16px !important;
  resize: none !important;
  outline: none !important;
  transition: border-color 0.25s ease, box-shadow 0.25s ease !important;
  line-height: 1.5 !important;
}
.ltx-prompt-area:focus {
  border-color: var(--purple-2) !important;
  box-shadow: 0 0 0 3px rgba(138,43,226,0.18) !important;
}

/* Override Gradio textbox */
.ltx-card textarea {
  background: rgba(8,4,20,0.8) !important;
  border: 1px solid var(--border) !important;
  border-radius: 12px !important;
  color: var(--text-primary) !important;
  font-family: var(--font-body) !important;
  font-size: 15px !important;
  transition: border-color 0.25s ease, box-shadow 0.25s ease !important;
}
.ltx-card textarea:focus {
  border-color: var(--purple-2) !important;
  box-shadow: 0 0 0 3px rgba(138,43,226,0.18) !important;
}

/* ─── Output Video ─── */
.ltx-btn {
  display: inline-flex;
  align-items: center;
  justify-content: center;
  gap: 8px;
  padding: 12px 24px;
  border-radius: var(--radius-pill);
  font-family: var(--font-body);
  font-size: 15px;
  font-weight: 700;
  letter-spacing: 0.04em;
  cursor: pointer;
  border: none;
  outline: none;
  transition: all 0.22s ease;
  position: relative;
  overflow: hidden;
  user-select: none;
}
.ltx-btn::after {
  content: '';
  position: absolute;
  inset: 0;
  background: rgba(255,255,255,0);
  transition: background 0.18s ease;
  border-radius: inherit;
}
.ltx-btn:hover::after { background: rgba(255,255,255,0.07); }
.ltx-btn:active { transform: scale(0.97); }
.ltx-btn:active::after { background: rgba(255,255,255,0.12); }

/* Primary Generate */
.ltx-btn-generate {
  width: 100%;
  padding: 16px 32px;
  font-size: 17px;
  font-family: var(--font-display) !important;
  font-weight: 700 !important;
  letter-spacing: 0.1em;
  background: linear-gradient(135deg, #6b20c8 0%, #9a40e0 40%, #00bcd4 100%);
  color: #fff !important;
  box-shadow: 0 4px 24px rgba(138,43,226,0.45), 0 0 0 1px rgba(180,100,255,0.3);
}
.ltx-btn-generate:hover {
  box-shadow: 0 6px 32px rgba(138,43,226,0.6), 0 0 40px rgba(0,229,255,0.2), 0 0 0 1px rgba(180,100,255,0.5) !important;
  transform: translateY(-2px);
}
.ltx-btn-generate:active { transform: scale(0.98) translateY(0); }

/* Cancel */
.ltx-btn-cancel {
  width: 100%;
  padding: 12px 24px;
  background: rgba(255,60,100,0.12);
  border: 1px solid rgba(255,60,100,0.35) !important;
  color: rgba(255,130,150,0.9) !important;
  font-family: var(--font-body) !important;
  font-size: 14px !important;
  font-weight: 700 !important;
  letter-spacing: 0.06em;
}
.ltx-btn-cancel:hover {
  background: rgba(255,60,100,0.22) !important;
  border-color: rgba(255,60,100,0.6) !important;
  color: #ffb0bc !important;
  box-shadow: 0 0 20px rgba(255,60,100,0.2) !important;
  transform: translateY(-1px);
}
.ltx-btn-cancel:active { transform: scale(0.98); }

/* Override Gradio button defaults */
button.primary {
  background: linear-gradient(135deg, #6b20c8 0%, #9a40e0 40%, #00bcd4 100%) !important;
  border: none !important;
  font-family: var(--font-display) !important;
  letter-spacing: 0.08em !important;
  box-shadow: 0 4px 24px rgba(138,43,226,0.45) !important;
  transition: all 0.22s ease !important;
}
button.primary:hover {
  box-shadow: 0 6px 32px rgba(138,43,226,0.65), 0 0 30px rgba(0,229,255,0.15) !important;
  transform: translateY(-2px) !important;
}
button.primary:active { transform: scale(0.98) !important; }

button.secondary {
  background: rgba(138,43,226,0.1) !important;
  border: 1px solid var(--border) !important;
  color: var(--text-primary) !important;
  transition: all 0.22s ease !important;
}
button.secondary:hover {
  background: rgba(138,43,226,0.22) !important;
  border-color: var(--border-bright) !important;
  box-shadow: 0 0 16px rgba(138,43,226,0.2) !important;
}

/* ─── Sliders, Dropdowns, Checkboxes ─── */
input[type="range"] {
  accent-color: var(--purple-2) !important;
}

.gradio-slider input[type="range"]::-webkit-slider-thumb {
  background: var(--purple-2) !important;
  box-shadow: 0 0 10px var(--purple-glow) !important;
}

select, .gr-dropdown select {
  background: rgba(8,4,20,0.85) !important;
  border: 1px solid var(--border) !important;
  color: var(--text-primary) !important;
  border-radius: 8px !important;
  font-family: var(--font-body) !important;
}
select:focus {
  border-color: var(--purple-2) !important;
  box-shadow: 0 0 0 3px rgba(138,43,226,0.18) !important;
  outline: none !important;
}

input[type="checkbox"] {
  accent-color: var(--purple-2) !important;
}

/* ─── Labels ─── */
label, .label-wrap span, .gr-form label {
  color: var(--text-muted) !important;
  font-family: var(--font-body) !important;
  font-size: 13px !important;
  font-weight: 600 !important;
  letter-spacing: 0.04em !important;
  text-transform: uppercase !important;
}

/* ─── Main Layout ─── */
.output-wrap {
  background: rgba(8,4,20,0.9);
  border: 1px solid var(--border);
  border-radius: var(--radius-lg);
  overflow: hidden;
  position: relative;
  min-height: 320px;
  display: flex;
  align-items: center;
  justify-content: center;
}
.output-wrap video {
  width: 100%;
  border-radius: var(--radius-lg);
}

.output-placeholder {
  display: flex;
  flex-direction: column;
  align-items: center;
  justify-content: center;
  gap: 12px;
  color: var(--text-muted);
  font-family: var(--font-body);
  font-size: 14px;
  padding: 40px;
  text-align: center;
}
.output-placeholder .icon {
  font-size: 48px;
  opacity: 0.5;
  animation: pulse 2.5s ease-in-out infinite;
}
@keyframes pulse {
  0%, 100% { opacity: 0.4; transform: scale(1); }
  50% { opacity: 0.7; transform: scale(1.05); }
}

/* ─── Accordion / Advanced ─── */
.gr-accordion {
  background: rgba(8,4,20,0.5) !important;
  border: 1px solid var(--border) !important;
  border-radius: 12px !important;
  overflow: hidden !important;
}
.gr-accordion > .label-wrap {
  background: rgba(138,43,226,0.08) !important;
  padding: 12px 16px !important;
  cursor: pointer !important;
  transition: background 0.2s ease !important;
}
.gr-accordion > .label-wrap:hover {
  background: rgba(138,43,226,0.15) !important;
}

/* ─── Number inputs ─── */
input[type="number"] {
  background: rgba(8,4,20,0.8) !important;
  border: 1px solid var(--border) !important;
  color: var(--text-primary) !important;
  border-radius: 8px !important;
  font-family: var(--font-body) !important;
}
input[type="number"]:focus {
  border-color: var(--purple-2) !important;
  box-shadow: 0 0 0 3px rgba(138,43,226,0.18) !important;
  outline: none !important;
}

/* ─── Status / Seed display ─── */
.seed-display {
  display: inline-flex;
  align-items: center;
  gap: 8px;
  padding: 6px 14px;
  border-radius: var(--radius-pill);
  border: 1px solid var(--border);
  background: rgba(8,4,20,0.6);
  color: var(--text-muted);
  font-family: var(--font-body);
  font-size: 12px;
  font-weight: 600;
  letter-spacing: 0.06em;
}

/* ─── Custom textarea override ─── */
.gr-textbox textarea {
  background: rgba(8,4,20,0.8) !important;
  border: 1px solid var(--border) !important;
  border-radius: 12px !important;
  color: var(--text-primary) !important;
  font-family: var(--font-body) !important;
  font-size: 15px !important;
}

/* ─── Glow divider ─── */
.glow-divider {
  height: 1px;
  background: linear-gradient(90deg, transparent, var(--purple-1), var(--cyan-acc), var(--purple-1), transparent);
  margin: 20px 0;
  opacity: 0.5;
}

/* ─── Badges ─── */
.badge {
  display: inline-flex;
  align-items: center;
  padding: 3px 10px;
  border-radius: var(--radius-pill);
  font-family: var(--font-body);
  font-size: 11px;
  font-weight: 700;
  letter-spacing: 0.08em;
  text-transform: uppercase;
}
.badge-purple {
  background: rgba(138,43,226,0.2);
  border: 1px solid rgba(138,43,226,0.4);
  color: var(--purple-3);
}
.badge-cyan {
  background: rgba(0,229,255,0.1);
  border: 1px solid rgba(0,229,255,0.3);
  color: var(--cyan-acc);
}

/* ─── Scrollbar ─── */
::-webkit-scrollbar { width: 6px; height: 6px; }
::-webkit-scrollbar-track { background: var(--bg-deep); }
::-webkit-scrollbar-thumb { background: rgba(138,43,226,0.4); border-radius: 3px; }
::-webkit-scrollbar-thumb:hover { background: var(--purple-1); }

/* ─── Gradio structural overrides ─── */
#col-container { max-width: 1300px; margin: 0 auto; }
.gap { gap: 12px !important; }
.contain { background: transparent !important; }
.padded { padding: 0 !important; }

/* Image upload styling */
.svelte-116rqfv { 
  background: rgba(8,4,20,0.6) !important;
  border: 1px dashed var(--border) !important;
  border-radius: 12px !important;
}

/* Progress bar */
.progress-bar { background: linear-gradient(90deg, var(--purple-1), var(--cyan-acc)) !important; }

/* Generating animation on button */
@keyframes generating {
  0%, 100% { box-shadow: 0 4px 24px rgba(138,43,226,0.45), 0 0 0 1px rgba(180,100,255,0.3); }
  50% { box-shadow: 0 4px 36px rgba(138,43,226,0.7), 0 0 50px rgba(0,229,255,0.3), 0 0 0 2px rgba(180,100,255,0.5); }
}

/* Custom motion input */
.custom-motion-wrap { margin-top: 10px; }
.custom-motion-wrap textarea {
  background: rgba(8,4,20,0.8) !important;
  border: 1px solid rgba(138,43,226,0.3) !important;
  border-radius: 10px !important;
  color: var(--text-primary) !important;
  font-family: var(--font-body) !important;
  font-size: 14px !important;
  min-height: 60px !important;
}

/* Tooltips / info text */
.info-text {
  color: var(--text-muted);
  font-family: var(--font-body);
  font-size: 12px;
  margin-top: 4px;
}

/* ─── High-res toggle chip ─── */
#high-res-toggle label {
  display: inline-flex !important;
  align-items: center !important;
  gap: 10px !important;
  padding: 10px 18px !important;
  border-radius: var(--radius-pill) !important;
  border: 1px solid var(--border) !important;
  background: rgba(0,229,255,0.05) !important;
  color: var(--text-muted) !important;
  font-size: 14px !important;
  font-weight: 700 !important;
  cursor: pointer !important;
  transition: all 0.22s ease !important;
  text-transform: none !important;
  letter-spacing: 0.03em !important;
}
#high-res-toggle input:checked + label,
#high-res-toggle label:has(input:checked) {
  border-color: var(--cyan-acc) !important;
  background: rgba(0,229,255,0.12) !important;
  color: var(--cyan-acc) !important;
  box-shadow: 0 0 16px rgba(0,229,255,0.2) !important;
}
#high-res-toggle label:hover {
  border-color: var(--border-bright) !important;
  color: var(--purple-3) !important;
}
"""

# ─────────────────────────────────────────────────────────────
# UI helper functions
# ─────────────────────────────────────────────────────────────
def update_mode_ui(mode):
    is_interp = mode == "Interpolate"
    is_motion = mode == "Motion Control"
    return (
        gr.update(visible=is_interp),   # last_image
        gr.update(visible=is_motion),   # input_video
        gr.update(visible=is_motion),   # preprocess section
        gr.update(visible=not is_motion), # video_preprocess hidden
    )

# ─────────────────────────────────────────────────────────────
# Gradio App
# ─────────────────────────────────────────────────────────────
with gr.Blocks(
    title="LTX Studio — Anime Video Generator",
) as demo:

    # ── State ──
    ui_mode     = gr.State("Image-to-Video")
    duration    = gr.State(3.0)
    sel_preset  = gr.State("None")

    with gr.Column(elem_id="col-container"):

        # ── Header ──
        gr.HTML("""
        <div id="ltx-header">
          <div class="ltx-nebula"></div>
          <div class="ltx-logo-text">⚡ LTX STUDIO</div>
          <div class="ltx-tagline">Anime Image Studio · Qwen · Upscale Gen · AI Video Synthesis</div>
          <a class="ltx-site-link" href="https://animara.space" target="_blank" rel="noopener noreferrer">
            ✦ Visit Anime Studio Site
          </a>
        </div>
        """)

        # ── Mode Selector ──
        with gr.Row(elem_id="mode-selector-wrap"):
            mode_radio = gr.Radio(
                choices=["Image-to-Video", "Interpolate", "Motion Control"],
                value="Image-to-Video",
                label="",
                show_label=False,
                interactive=True,
                elem_id="mode-radio",
            )

        # ── Main Two-Column Layout ──
        with gr.Row(equal_height=False):

            # ════════════════════════════════════
            # LEFT PANEL — Inputs
            # ════════════════════════════════════
            with gr.Column(scale=1, elem_classes=["ltx-card"]):

                gr.HTML('<div class="ltx-card-label">✦ Media Input</div>')

                with gr.Row():
                    first_image = gr.Image(
                        label="First Frame",
                        type="filepath",
                        height=220,
                        elem_id="first_image",
                    )
                    last_image = gr.Image(
                        label="Last Frame",
                        type="filepath",
                        visible=False,
                        height=220,
                        elem_id="last_image",
                    )
                    input_video = gr.Video(
                        label="Reference Video",
                        visible=False,
                        height=220,
                        elem_id="input_video",
                    )

                # ── Video Preprocess (Motion Control only — shown at TOP) ──
                with gr.Column(visible=False, elem_id="preprocess-section") as preprocess_section:
                    gr.HTML('<div class="ltx-card-label">✦ Reference Video Processing</div>')
                    video_preprocess = gr.Radio(
                        choices=["Raw (no preprocessing)", "Pose (DWPose)", "Canny Edge", "Depth (Laplacian)"],
                        value="Raw (no preprocessing)",
                        label="",
                        show_label=False,
                        interactive=True,
                        elem_id="preprocess-radio",
                    )
                    gr.HTML('<div class="glow-divider"></div>')

                # Hidden preprocess for non-motion modes
                video_preprocess_hidden = gr.Textbox(
                    value="Raw (no preprocessing)",
                    visible=False,
                    elem_id="preprocess-hidden",
                )

                gr.HTML('<div class="glow-divider"></div>')

                # ── Prompt Presets ──
                gr.HTML('<div class="ltx-card-label">✦ Prompt Presets</div>')
                prompt_preset_radio = gr.Radio(
                    choices=list(PROMPT_PRESETS.keys()),
                    value=None,
                    label="",
                    interactive=True,
                    elem_id="prompt-preset-radio",
                )

                gr.HTML('<div class="glow-divider"></div>')
                gr.HTML('<div class="ltx-card-label">✦ Style Modifier</div>')
                style_radio = gr.Radio(
                    choices=list(STYLE_MODIFIERS.keys()),
                    value="None",
                    label="",
                    interactive=True,
                    elem_id="style-radio",
                )

                gr.HTML('<div class="glow-divider"></div>')
                gr.HTML('<div class="ltx-card-label">✦ Prompt</div>')

                prompt = gr.Textbox(
                    label="",
                    value=DEFAULT_PROMPT,
                    placeholder="Describe motion, scene dynamics, facial expression, camera movement, or style...",
                    lines=4,
                    max_lines=8,
                    elem_id="prompt-box",
                )

                gr.HTML('<div class="glow-divider"></div>')

                # ── Motion Presets ──
                gr.HTML('<div class="ltx-card-label">✦ Motion Presets</div>')
                preset_radio = gr.Radio(
                    choices=list(MOTION_PRESETS.keys()),
                    value="None",
                    label="",
                    interactive=True,
                    elem_id="preset-radio",
                )

                with gr.Column(visible=False, elem_id="custom-motion-wrap") as custom_motion_wrap:
                    custom_motion = gr.Textbox(
                        label="Custom Motion Description",
                        placeholder="e.g. slow spiral camera orbit with lens flare bloom...",
                        lines=2,
                        elem_id="custom-motion-input",
                    )

                gr.HTML('<div class="glow-divider"></div>')

                # ── Duration ──
                gr.HTML('<div class="ltx-card-label">✦ Clip Duration</div>')
                duration_radio = gr.Radio(
                    choices=["3s", "5s", "10s", "15s"],
                    value="3s",
                    label="",
                    show_label=False,
                    interactive=True,
                    elem_id="duration-radio",
                )

                gr.HTML('<div class="glow-divider"></div>')

                # ── High Res Toggle ──
                gr.HTML('<div class="ltx-card-label">✦ Output Quality</div>')
                high_res = gr.Checkbox(
                    label="⬆ High Resolution (2×) — slower, uses more VRAM",
                    value=False,
                    elem_id="high-res-toggle",
                )

                gr.HTML('<div class="glow-divider"></div>')

                # ── Advanced Settings ──
                with gr.Accordion("⚙ Advanced Settings", open=False):
                    conditioning_strength = gr.Slider(
                        label="Video Conditioning Strength",
                        minimum=0.0, maximum=1.0, value=0.85, step=0.05,
                    )
                    enhance_prompt = gr.Checkbox(label="Enhance Prompt (auto-rewrite)", value=False)
                    use_video_audio = gr.Checkbox(
                        label="Extract Audio from Reference Video",
                        value=True,
                    )
                    with gr.Row():
                        seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, value=42, step=1)
                        randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                    with gr.Row():
                        width  = gr.Number(label="Width",  value=768, precision=0)
                        height = gr.Number(label="Height", value=512, precision=0)

            # ════════════════════════════════════
            # RIGHT PANEL — Output
            # ════════════════════════════════════
            with gr.Column(scale=1, elem_classes=["ltx-card"]):

                gr.HTML('<div class="ltx-card-label">✦ Generated Video</div>')

                output_video = gr.Video(
                    label="",
                    autoplay=True,
                    height=400,
                    elem_id="output-video",
                )

                gr.HTML('<div class="glow-divider"></div>')

                generate_btn = gr.Button(
                    "⚡ Generate Video",
                    variant="primary",
                    size="lg",
                    elem_id="generate-btn",
                )

                cancel_btn = gr.Button(
                    "✕ Cancel Generation",
                    variant="secondary",
                    size="sm",
                    elem_id="cancel-btn",
                    elem_classes=["ltx-btn-cancel"],
                )

                seed_display = gr.Number(
                    label="Used Seed",
                    value=42,
                    interactive=False,
                    elem_id="seed-display",
                )

                gr.HTML('<div class="glow-divider"></div>')

                # Info badges
                gr.HTML("""
                <div style="display:flex; gap:8px; flex-wrap:wrap; margin-top:4px;">
                  <span class="badge badge-purple">LTX 2.3 22B</span>
                  <span class="badge badge-cyan">Distilled</span>
                  <span class="badge badge-purple">Audio-Video</span>
                  <span class="badge badge-cyan">IC-LoRA</span>
                </div>
                """)

    # ─────────────────────────────────────────────────────────
    # Event Wiring
    # ─────────────────────────────────────────────────────────

    # Mode changes
    mode_radio.change(
        fn=update_mode_visibility,
        inputs=[mode_radio],
        outputs=[last_image, input_video, preprocess_section, video_preprocess_hidden],
        api_name=False,
    ).then(
        fn=lambda x: x,
        inputs=[mode_radio],
        outputs=[ui_mode],
        api_name=False,
    )

    # Motion preset → writes directly to prompt + shows/hides custom input
    preset_radio.change(
        fn=on_motion_preset_to_prompt,
        inputs=[preset_radio, custom_motion],
        outputs=[custom_motion_wrap, prompt],
        api_name=False,
    )

    # Custom motion text → live update prompt when typing
    custom_motion.change(
        fn=on_custom_motion_change,
        inputs=[custom_motion, preset_radio],
        outputs=[prompt],
        api_name=False,
    )

    # Prompt preset → fills prompt box
    def on_prompt_preset_select(preset_name):
        text = PROMPT_PRESETS.get(preset_name, "")
        return gr.update(value=text) if text else gr.update()

    prompt_preset_radio.change(
        fn=on_prompt_preset_select,
        inputs=[prompt_preset_radio],
        outputs=[prompt],
        api_name=False,
    )

    # Style modifier → appends to current prompt
    style_radio.change(
        fn=apply_style_modifier,
        inputs=[style_radio, prompt],
        outputs=[prompt],
        api_name=False,
    )

    # Duration
    duration_radio.change(
        fn=apply_duration_choice,
        inputs=[duration_radio],
        outputs=[duration],
        api_name=False,
    )

    # Image / video change → auto resolution
    first_image.change(
        fn=update_resolution,
        inputs=[first_image, last_image, input_video, high_res],
        outputs=[width, height],
        api_name=False,
    )
    last_image.change(
        fn=update_resolution,
        inputs=[first_image, last_image, input_video, high_res],
        outputs=[width, height],
        api_name=False,
    )
    input_video.change(
        fn=on_video_upload,
        inputs=[input_video, first_image, last_image, high_res],
        outputs=[width, height, duration_radio, duration],
        api_name=False,
    )
    high_res.change(
        fn=update_resolution,
        inputs=[first_image, last_image, input_video, high_res],
        outputs=[width, height],
        api_name=False,
    )

    # Generate
    gen_event = generate_btn.click(
        fn=generate_video,
        inputs=[
            prompt, first_image, last_image, input_video,
            gr.State(None),           # input_audio = None (removed from UI)
            ui_mode, duration, video_preprocess,
            conditioning_strength, enhance_prompt, use_video_audio,
            seed, randomize_seed, height, width,
            preset_radio, custom_motion,
        ],
        outputs=[output_video, seed_display],
    )

    # Cancel
    cancel_btn.click(
        fn=None,
        inputs=[],
        outputs=[],
        cancels=[gen_event],
        api_name=False,
    )


if __name__ == "__main__":
    demo.launch(
        ssr_mode=False,
        mcp_server=True,
        css=css,
        theme=gr.themes.Base(
            primary_hue=gr.themes.colors.purple,
            secondary_hue=gr.themes.colors.blue,
            neutral_hue=gr.themes.colors.gray,
            font=gr.themes.GoogleFont("Rajdhani"),
        ),
    )