#!/usr/bin/env python3
"""gQIR Gradio demo
Single-frame mode follows infer_sd2GAN_stage2.py (color path only).
Burst mode follows infer_burst_realistic.py for 77->11 aggregation and reconstruction.
Local run cmd:
python gradio_app.py 
  --single-config configs/inference/eval_sd2GAN.yaml \
  --burst-config configs/inference/eval_burst_mosaic.yaml \
  --device cuda --local
"""
from __future__ import annotations

import spaces
import argparse
import atexit
import os
import random
import shutil
import subprocess
import tempfile
import threading
import traceback
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Optional

import gradio as gr
import numpy as np
import torch
import torch.nn.functional as F
import torchvision.transforms as transforms
from accelerate.utils import set_seed
from diffusers import DDPMScheduler, UNet2DConditionModel
from omegaconf import OmegaConf
from peft import LoraConfig
from PIL import Image
from transformers import CLIPTextModel, CLIPTokenizer

from gqvr.dataset.utils import emulate_spc, srgb_to_linearrgb
from gqvr.model.core_raft.raft import RAFT
from gqvr.model.fusionViT import LightweightHybrid3DFusion
from gqvr.model.generator import SD2Enhancer
from gqvr.model.vae import AutoencoderKL

try:
    import h5py
except Exception:
    h5py = None

try:
    from huggingface_hub import hf_hub_download
except Exception:
    hf_hub_download = None


APP_ROOT = Path(__file__).resolve().parent
DEFAULT_SINGLE_CONFIG_COLOR =  "configs/inference/eval_3bit_color.yaml"
DEFAULT_SINGLE_CONFIG_MONO  =  "configs/inference/eval_3bit_mono.yaml"
DEFAULT_BURST_CONFIG_COLOR  =  "configs/inference/eval_burst_mosaic.yaml"
DEFAULT_BURST_CONFIG_MONO   =  "configs/inference/eval_burst.yaml"
DEFAULT_MAX_SIZE = (512, 512)
BURST_WINDOW = 77

PIPELINE_COLOR = "Color"
PIPELINE_MONO = "Monochrome"
PIPELINE_OPTIONS = [PIPELINE_COLOR, PIPELINE_MONO]
HF_DEFAULT_REPO_ID = "aRy4n/gQIR"
HF_MODEL_FILES = {
    PIPELINE_COLOR: {
        "single_qvae": "0105000.pt",
        "single_lora": "state_dict.pth",
        "burst_qvae": "0105000.pt",
        "burst_lora": "state_dict.pth",
        "burst_fusion": "fusion_vit_0050000.pt",
    },
    PIPELINE_MONO: {
        "single_qvae": "mono/0150000.pt",
        "single_lora": "mono/state_dict.pth",
        "burst_qvae": "mono/0150000.pt",
        "burst_lora": "mono/state_dict.pth",
        "burst_fusion": "mono/fusion_vit_0020000.pt",
    },
}

SINGLE_MODE_GT = "GT image (simulate 3-bit SPAD)"
SINGLE_MODE_REAL = "Real SPAD frame"
BURST_MODE_GT = "GT cube (simulate SPAD from RGB cube)"
BURST_MODE_REAL = "Real photon cube / SPAD cube"

TO_TENSOR = transforms.ToTensor()

_SINGLE_PIPELINES: dict[str, "SingleColorPipeline"] = {}
_BURST_PIPELINES: dict[str, "BurstColorPipeline"] = {}
_SINGLE_LOCK = threading.Lock()
_BURST_LOCK = threading.Lock()

RUNTIME_SINGLE_CONFIGS: dict[str, Path] = {}
RUNTIME_BURST_CONFIGS: dict[str, Path] = {}
RUNTIME_DEVICE: str = "cuda"
RUNTIME_BURST_OUT_SIZES: dict[str, int] = {}
RUNTIME_HF_REPO_ID: str = HF_DEFAULT_REPO_ID
RUNTIME_HF_CACHE_DIR: Optional[str] = None
RUNTIME_HF_TOKEN: Optional[str] = None
_TEMP_VIDEO_DIRS: list[str] = []


def _cleanup_temp_video_dirs() -> None:
    for p in _TEMP_VIDEO_DIRS:
        try:
            shutil.rmtree(p, ignore_errors=True)
        except Exception:
            pass


atexit.register(_cleanup_temp_video_dirs)


@dataclass
class CubeDescriptor:
    source_mode: str
    kind: str  # dir | video | array | h5
    path: str
    total_frames: int
    out_size: int
    files: Optional[list[str]] = None
    array_format: Optional[str] = None  # npy | npz | pt
    array_key: Optional[str] = None
    h5_keys: Optional[list[str]] = None
    temp_dir: Optional[str] = None


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--single-config",
        type=str,
        default=None,
        help="Deprecated alias for --single-config-color",
    )
    parser.add_argument(
        "--burst-config",
        type=str,
        default=None,
        help="Deprecated alias for --burst-config-color",
    )
    parser.add_argument("--single-config-color", type=str, default=str(DEFAULT_SINGLE_CONFIG_COLOR))
    parser.add_argument("--single-config-mono", type=str, default=str(DEFAULT_SINGLE_CONFIG_MONO))
    parser.add_argument("--burst-config-color", type=str, default=str(DEFAULT_BURST_CONFIG_COLOR))
    parser.add_argument("--burst-config-mono", type=str, default=str(DEFAULT_BURST_CONFIG_MONO))
    parser.add_argument(
        "--device",
        type=str,
        default="cuda" if torch.cuda.is_available() else "cpu",
        help="Inference device, e.g. cuda, cuda:0, cpu",
    )
    parser.add_argument(
        "--hf-repo-id",
        type=str,
        default=HF_DEFAULT_REPO_ID,
        help="Hugging Face repo containing gQIR checkpoints used when config paths are not local files.",
    )
    parser.add_argument(
        "--hf-cache-dir",
        type=str,
        default=None,
        help="Optional Hugging Face cache directory for checkpoint downloads.",
    )
    parser.add_argument(
        "--hf-token",
        type=str,
        default=None,
        help="Optional HF token. If omitted, app reads HF_TOKEN or HUGGINGFACE_HUB_TOKEN env vars.",
    )
    parser.add_argument("--port", type=int, default=7860)
    parser.add_argument("--local", action="store_true", help="Bind to 127.0.0.1 instead of 0.0.0.0")
    parser.add_argument("--share", action="store_true")
    return parser.parse_args()


def _resolve_existing_file(path_value: Optional[str]) -> Optional[str]:
    if not path_value:
        return None
    raw = Path(str(path_value)).expanduser()
    candidates = [raw]
    if not raw.is_absolute():
        candidates.append(APP_ROOT / raw)
    for p in candidates:
        if p.is_file():
            return str(p.resolve())
    return None


def _download_hf_checkpoint(filename: str) -> str:
    if hf_hub_download is None:
        raise RuntimeError(
            "huggingface_hub is required to download checkpoints. Install it or provide local model paths."
        )

    kwargs: dict[str, Any] = {
        "repo_id": RUNTIME_HF_REPO_ID,
        "filename": filename,
    }
    if RUNTIME_HF_CACHE_DIR:
        kwargs["cache_dir"] = RUNTIME_HF_CACHE_DIR
    if RUNTIME_HF_TOKEN:
        kwargs["token"] = RUNTIME_HF_TOKEN

    try:
        downloaded = hf_hub_download(**kwargs)
    except Exception as exc:
        raise RuntimeError(
            f"Failed to download '{filename}' from '{RUNTIME_HF_REPO_ID}'. "
            "Check repo, token permissions, and network availability."
        ) from exc
    return str(Path(downloaded).resolve())


def _resolve_checkpoint_path(config_value: Optional[str], pipeline_type: str, file_key: str) -> str:
    if pipeline_type not in HF_MODEL_FILES:
        raise ValueError(f"Unknown pipeline type for checkpoint resolution: {pipeline_type}")
    if file_key not in HF_MODEL_FILES[pipeline_type]:
        raise ValueError(f"Unknown checkpoint key '{file_key}' for pipeline type '{pipeline_type}'")

    existing = _resolve_existing_file(config_value)
    if existing is not None:
        return existing

    hf_file = HF_MODEL_FILES[pipeline_type][file_key]
    print(f"[gQIR] Missing local checkpoint; downloading {RUNTIME_HF_REPO_ID}/{hf_file}")
    return _download_hf_checkpoint(hf_file)


def _prepare_single_cfg_paths(cfg: Any, pipeline_type: str) -> Any:
    cfg = OmegaConf.create(OmegaConf.to_container(cfg, resolve=False))
    if "model" not in cfg or "vae_cfg" not in cfg.model:
        raise ValueError("Single-frame config missing model.vae_cfg")

    qvae_path_cfg = None
    if "qvae_path" in cfg.model.vae_cfg:
        qvae_path_cfg = cfg.model.vae_cfg.qvae_path
    if not qvae_path_cfg and "qvae_path" in cfg:
        qvae_path_cfg = cfg.qvae_path

    cfg.weight_path = _resolve_checkpoint_path(cfg.get("weight_path"), pipeline_type, "single_lora")
    resolved_qvae = _resolve_checkpoint_path(qvae_path_cfg, pipeline_type, "single_qvae")
    cfg.model.vae_cfg.qvae_path = resolved_qvae
    if "qvae_path" in cfg:
        cfg.qvae_path = resolved_qvae
    return cfg


def _prepare_burst_cfg_paths(cfg: Any, pipeline_type: str) -> Any:
    cfg = OmegaConf.create(OmegaConf.to_container(cfg, resolve=False))
    cfg.qvae_path = _resolve_checkpoint_path(cfg.get("qvae_path"), pipeline_type, "burst_qvae")
    cfg.unet_weight_path = _resolve_checkpoint_path(cfg.get("unet_weight_path"), pipeline_type, "burst_lora")
    cfg.fusion_vit_weight_path = _resolve_checkpoint_path(
        cfg.get("fusion_vit_weight_path"), pipeline_type, "burst_fusion"
    )
    if "model" in cfg and "vae_cfg" in cfg.model:
        cfg.model.vae_cfg.qvae_path = cfg.qvae_path
    return cfg


def _ensure_rgb_image(arr: np.ndarray) -> np.ndarray:
    arr = np.asarray(arr)
    if arr.ndim == 2:
        arr = np.stack([arr] * 3, axis=-1)
    elif arr.ndim == 3 and arr.shape[-1] == 1:
        arr = np.repeat(arr, 3, axis=-1)
    elif arr.ndim == 3 and arr.shape[-1] == 4:
        arr = arr[..., :3]
    if arr.ndim != 3 or arr.shape[-1] != 3:
        raise ValueError(f"Expected image shape HxWx3, got {arr.shape}")
    return arr


def _normalize_float01(arr: np.ndarray) -> np.ndarray:
    arr = np.asarray(arr).astype(np.float32)
    if arr.size == 0:
        return arr
    min_v = float(arr.min())
    max_v = float(arr.max())
    if 0.0 <= min_v and max_v <= 1.0:
        return arr
    if min_v >= 0.0 and max_v <= 255.0:
        arr = arr / 255.0
    elif min_v >= 0.0 and max_v > 0.0:
        arr = arr / max_v
    else:
        den = max(max_v - min_v, 1e-8)
        arr = (arr - min_v) / den
    return np.clip(arr, 0.0, 1.0)


def _to_uint8(arr_float01: np.ndarray) -> np.ndarray:
    return np.clip(arr_float01 * 255.0, 0.0, 255.0).astype(np.uint8)


def _resize_dims_keep_aspect(h: int, w: int, max_side: int, multiple_of: int = 1) -> tuple[int, int]:
    if h <= 0 or w <= 0:
        raise ValueError(f"Invalid frame size: {h}x{w}")
    scale = float(max_side) / float(max(h, w))
    new_h = max(1, int(round(h * scale)))
    new_w = max(1, int(round(w * scale)))

    if multiple_of > 1:
        new_h = max(multiple_of, int(round(new_h / multiple_of) * multiple_of))
        new_w = max(multiple_of, int(round(new_w / multiple_of) * multiple_of))
    new_h = min(new_h, max_side)
    new_w = min(new_w, max_side)
    return new_h, new_w


def _resize_frame_rgb(frame_float01: np.ndarray, max_side: int, multiple_of: int = 1) -> np.ndarray:
    frame_float01 = _normalize_float01(_ensure_rgb_image(frame_float01))
    h, w = frame_float01.shape[:2]
    new_h, new_w = _resize_dims_keep_aspect(h, w, max_side=max_side, multiple_of=multiple_of)
    if h == new_h and w == new_w:
        return frame_float01
    pil_img = Image.fromarray(_to_uint8(frame_float01))
    return np.asarray(pil_img.resize((new_w, new_h), Image.LANCZOS), dtype=np.float32) / 255.0


def _resize_frames_rgb(frames_thwc: np.ndarray, max_side: int, multiple_of: int = 1) -> np.ndarray:
    frames_thwc = np.asarray(frames_thwc)
    if frames_thwc.ndim != 4 or frames_thwc.shape[-1] != 3:
        raise ValueError(f"Expected THWC with C=3, got {frames_thwc.shape}")

    h, w = frames_thwc.shape[1:3]
    new_h, new_w = _resize_dims_keep_aspect(h, w, max_side=max_side, multiple_of=multiple_of)
    if h == new_h and w == new_w:
        return _normalize_float01(frames_thwc)

    resized = [
        _resize_frame_rgb(frames_thwc[i], max_side=max_side, multiple_of=multiple_of)
        for i in range(frames_thwc.shape[0])
    ]
    return np.stack(resized, axis=0).astype(np.float32)


def _to_gray_uint8(img_uint8_rgb: Optional[np.ndarray]) -> Optional[np.ndarray]:
    if img_uint8_rgb is None:
        return None
    arr = np.asarray(img_uint8_rgb)
    if arr.ndim == 2:
        return arr.astype(np.uint8)
    if arr.ndim == 3 and arr.shape[-1] == 1:
        return arr[..., 0].astype(np.uint8)
    return np.asarray(Image.fromarray(arr.astype(np.uint8)).convert("L"), dtype=np.uint8)


def _resize_uint8_to_hw(img_uint8: Optional[np.ndarray], target_h: int, target_w: int) -> Optional[np.ndarray]:
    if img_uint8 is None:
        return None
    arr = np.asarray(img_uint8)
    if arr.ndim == 2:
        pil = Image.fromarray(arr.astype(np.uint8), mode="L")
        return np.asarray(pil.resize((target_w, target_h), Image.LANCZOS), dtype=np.uint8)
    if arr.ndim == 3 and arr.shape[-1] == 1:
        pil = Image.fromarray(arr[..., 0].astype(np.uint8), mode="L")
        return np.asarray(pil.resize((target_w, target_h), Image.LANCZOS), dtype=np.uint8)
    pil = Image.fromarray(arr.astype(np.uint8), mode="RGB")
    return np.asarray(pil.resize((target_w, target_h), Image.LANCZOS), dtype=np.uint8)


def _to_thwc(arr: np.ndarray) -> np.ndarray:
    arr = np.asarray(arr)
    if arr.ndim == 5 and arr.shape[0] == 1:
        arr = arr[0]

    if arr.ndim == 4:
        if arr.shape[-1] in (1, 3, 4):
            out = arr
        elif arr.shape[1] in (1, 3, 4):
            out = np.transpose(arr, (0, 2, 3, 1))
        elif arr.shape[0] in (1, 3, 4):
            out = np.transpose(arr, (3, 1, 2, 0))
        else:
            raise ValueError(f"Cannot infer channel axis from shape {arr.shape}")
    elif arr.ndim == 3:
        if arr.shape[-1] in (1, 3, 4):
            out = arr[None, ...]
        elif arr.shape[0] in (1, 3, 4):
            out = np.transpose(arr, (1, 2, 0))[None, ...]
        else:
            # Treat as T x H x W single-channel.
            out = arr[..., None]
    else:
        raise ValueError(f"Expected 3D or 4D array, got shape {arr.shape}")

    if out.shape[-1] == 4:
        out = out[..., :3]
    return out


def _single_channel_bayer_to_sparse_rgb(frames_thw1: np.ndarray) -> np.ndarray:
    bayer = np.asarray(frames_thw1).astype(np.float32)
    if bayer.ndim != 4 or bayer.shape[-1] != 1:
        raise ValueError(f"Expected THW1, got {bayer.shape}")
    bayer = bayer[..., 0]
    t, h, w = bayer.shape
    out = np.zeros((t, h, w, 3), dtype=np.float32)
    out[:, 0::2, 0::2, 0] = bayer[:, 0::2, 0::2]
    out[:, 0::2, 1::2, 1] = bayer[:, 0::2, 1::2]
    out[:, 1::2, 0::2, 1] = bayer[:, 1::2, 0::2]
    out[:, 1::2, 1::2, 2] = bayer[:, 1::2, 1::2]
    return out


def _mosaic_with_pattern(img_rgb: np.ndarray, pattern: str) -> np.ndarray:
    r = img_rgb[:, :, 0]
    g = img_rgb[:, :, 1]
    b = img_rgb[:, :, 2]
    out = np.zeros_like(img_rgb, dtype=np.float32)

    if pattern == "RGGB":
        out[0::2, 0::2, 0] = r[0::2, 0::2]
        out[0::2, 1::2, 1] = g[0::2, 1::2]
        out[1::2, 0::2, 1] = g[1::2, 0::2]
        out[1::2, 1::2, 2] = b[1::2, 1::2]
    elif pattern == "GRBG":
        out[0::2, 1::2, 0] = r[0::2, 1::2]
        out[0::2, 0::2, 1] = g[0::2, 0::2]
        out[1::2, 1::2, 1] = g[1::2, 1::2]
        out[1::2, 0::2, 2] = b[1::2, 0::2]
    elif pattern == "BGGR":
        out[0::2, 0::2, 2] = b[0::2, 0::2]
        out[0::2, 1::2, 1] = g[0::2, 1::2]
        out[1::2, 0::2, 1] = g[1::2, 0::2]
        out[1::2, 1::2, 0] = r[1::2, 1::2]
    elif pattern == "GBRG":
        out[0::2, 0::2, 1] = g[0::2, 0::2]
        out[1::2, 1::2, 1] = g[1::2, 1::2]
        out[0::2, 1::2, 2] = b[0::2, 1::2]
        out[1::2, 0::2, 0] = r[1::2, 0::2]
    else:
        raise ValueError(f"Unsupported Bayer pattern: {pattern}")
    return out


def _simulate_single_3bit_from_gt(gt_rgb_float01: np.ndarray, target_ppp: float) -> np.ndarray:
    bits = 3
    n = (2**bits) - 1
    factor = target_ppp / 3.5
    lq_sum = np.zeros_like(gt_rgb_float01, dtype=np.float32)
    for _ in range(n):
        spc = emulate_spc(srgb_to_linearrgb(gt_rgb_float01), factor=factor).astype(np.float32)
        pattern = random.choice(["RGGB", "GRBG", "BGGR", "GBRG"])
        lq_sum += _mosaic_with_pattern(spc, pattern)
    return np.clip(lq_sum / float(n), 0.0, 1.0)


def _simulate_single_3bit_from_gt_mono(gt_rgb_float01: np.ndarray, target_ppp: float) -> np.ndarray:
    bits = 3
    n = (2**bits) - 1
    factor = target_ppp / 3.5
    lq_sum = np.zeros_like(gt_rgb_float01, dtype=np.float32)
    for _ in range(n):
        spc = emulate_spc(srgb_to_linearrgb(gt_rgb_float01), factor=factor).astype(np.float32)
        lq_sum += spc
    return np.clip(lq_sum / float(n), 0.0, 1.0)


def _simulate_binary_burst_frame_from_gt(gt_rgb_float01: np.ndarray, target_ppp: float = 3.5) -> np.ndarray:
    # Same PPP scaling convention as single-frame simulation.
    factor = target_ppp / 3.5
    spc = emulate_spc(srgb_to_linearrgb(gt_rgb_float01), factor=factor).astype(np.float32)
    return _mosaic_with_pattern(spc, "BGGR")


def _simulate_binary_burst_frame_from_gt_mono(gt_rgb_float01: np.ndarray, target_ppp: float = 3.5) -> np.ndarray:
    # Same PPP scaling convention as single-frame simulation.
    factor = target_ppp / 3.5
    return emulate_spc(srgb_to_linearrgb(gt_rgb_float01), factor=factor).astype(np.float32)


def _tensor_to_uint8_image(x_bchw: torch.Tensor) -> np.ndarray:
    x = x_bchw.detach().cpu().clamp(0.0, 1.0)
    x = (x[0].permute(1, 2, 0).numpy() * 255.0).astype(np.uint8)
    return x


def _encode_prompt(tokenizer: CLIPTokenizer, text_encoder: CLIPTextModel, prompt: str, bs: int, device: str) -> torch.Tensor:
    txt_ids = tokenizer(
        [prompt] * bs,
        max_length=tokenizer.model_max_length,
        padding="max_length",
        truncation=True,
        return_tensors="pt",
    ).input_ids
    return text_encoder(txt_ids.to(device))[0]


def differentiable_warp(x: torch.Tensor, flow: torch.Tensor) -> torch.Tensor:
    b, c, h, w = x.size()
    grid_y, grid_x = torch.meshgrid(torch.arange(h), torch.arange(w))
    grid = torch.stack((grid_x, grid_y), 2).float().to(x.device)
    grid = grid.unsqueeze(0).repeat(b, 1, 1, 1)
    flow = flow.permute(0, 2, 3, 1)
    new_grid = grid + flow
    new_grid[..., 0] = 2.0 * new_grid[..., 0] / (w - 1) - 1.0
    new_grid[..., 1] = 2.0 * new_grid[..., 1] / (h - 1) - 1.0
    return F.grid_sample(x, new_grid, align_corners=True, padding_mode="border")


class SingleColorPipeline:
    def __init__(self, config_path: Path, device: str, pipeline_type: str):
        self.config_path = config_path
        self.device = device
        self.pipeline_type = pipeline_type
        self.max_size = DEFAULT_MAX_SIZE
        self.model: Optional[SD2Enhancer] = None

    def load(self) -> None:
        if self.model is not None:
            return
        cfg = OmegaConf.load(str(self.config_path))
        cfg = _prepare_single_cfg_paths(cfg, self.pipeline_type)
        if cfg.base_model_type != "sd2":
            raise ValueError(f"Unsupported base_model_type for single pipeline: {cfg.base_model_type}")
        self.model = SD2Enhancer(
            base_model_path=cfg.base_model_path,
            weight_path=cfg.weight_path,
            lora_modules=cfg.lora_modules,
            lora_rank=cfg.lora_rank,
            model_t=cfg.model_t,
            coeff_t=cfg.coeff_t,
            vae_cfg=cfg.model.vae_cfg,
            device=self.device,
        )
        self.model.init_models()

    def _enhance(self, lq_rgb_float01: np.ndarray, prompt: str, only_vae_output: bool, seed: int) -> tuple[np.ndarray, int]:
        if self.model is None:
            self.load()
        if seed == -1:
            seed = random.randint(0, 2**32 - 1)
        set_seed(seed)

        out_h, out_w = lq_rgb_float01.shape[:2]
        if out_h * out_w > self.max_size[0] * self.max_size[1]:
            raise ValueError(
                f"Resolution {out_h}x{out_w} exceeds max pixel budget "
                f"{self.max_size[0]}x{self.max_size[1]}."
            )

        image_tensor = TO_TENSOR(lq_rgb_float01).unsqueeze(0)
        pil_img = self.model.enhance(
            lq=image_tensor,
            prompt=prompt,
            upscale=1,
            return_type="pil",
            only_vae_output=only_vae_output,
            save_Gprocessed_latents=False,
            fname="",
        )[0]
        return np.asarray(pil_img.convert("RGB"), dtype=np.uint8), seed

    def reconstruct_from_gt(
        self,
        gt_image_np: np.ndarray,
        prompt: str,
        target_ppp: float,
        only_vae_output: bool,
        seed: int,
        simulate_color_mosaic: bool = True,
    ) -> tuple[np.ndarray, np.ndarray, np.ndarray, str]:
        gt_rgb = _normalize_float01(_ensure_rgb_image(gt_image_np))
        gt_rgb = _resize_frame_rgb(gt_rgb, self.max_size[0], multiple_of=8)
        if simulate_color_mosaic:
            lq_rgb = _simulate_single_3bit_from_gt(gt_rgb, target_ppp=target_ppp)
        else:
            lq_rgb = _simulate_single_3bit_from_gt_mono(gt_rgb, target_ppp=target_ppp)
        recon_uint8, used_seed = self._enhance(lq_rgb, prompt, only_vae_output, seed)
        status = f"Single reconstruction complete (mode=GT simulation, seed={used_seed}, PPP={target_ppp:.2f})."
        return _to_uint8(gt_rgb), _to_uint8(lq_rgb), recon_uint8, status

    def reconstruct_from_real_spad(
        self,
        lq_image_np: np.ndarray,
        prompt: str,
        only_vae_output: bool,
        seed: int,
    ) -> tuple[np.ndarray, np.ndarray, str]:
        lq_rgb = _normalize_float01(_ensure_rgb_image(lq_image_np))
        lq_rgb = _resize_frame_rgb(lq_rgb, self.max_size[0], multiple_of=8)
        recon_uint8, used_seed = self._enhance(lq_rgb, prompt, only_vae_output, seed)
        status = f"Single reconstruction complete (mode=real SPAD frame, seed={used_seed})."
        return _to_uint8(lq_rgb), recon_uint8, status


class BurstColorPipeline:
    def __init__(self, config_path: Path, device: str, pipeline_type: str):
        self.config_path = config_path
        self.device = device
        self.pipeline_type = pipeline_type
        self.cfg = None
        self.out_size = 512
        self.weight_dtype = torch.bfloat16 if str(device).startswith("cuda") else torch.float32

        self.vae: Optional[AutoencoderKL] = None
        self.raft_model: Optional[RAFT] = None
        self.fusion_vit: Optional[LightweightHybrid3DFusion] = None
        self.tokenizer: Optional[CLIPTokenizer] = None
        self.text_encoder: Optional[CLIPTextModel] = None
        self.scheduler: Optional[DDPMScheduler] = None
        self.ls_burst_unet: Optional[UNet2DConditionModel] = None

    def load(self) -> None:
        if self.vae is not None:
            return

        cfg = OmegaConf.load(str(self.config_path))
        cfg = _prepare_burst_cfg_paths(cfg, self.pipeline_type)
        self.cfg = cfg
        self.out_size = int(cfg.dataset.val.params.out_size)

        vae = AutoencoderKL(cfg.model.vae_cfg.ddconfig, cfg.model.vae_cfg.embed_dim)
        da_vae = torch.load(cfg.qvae_path, map_location="cpu")
        init_vae = {}
        scratch = vae.state_dict()
        for key in scratch:
            if key in da_vae:
                init_vae[key] = da_vae[key].clone()
        vae.load_state_dict(init_vae, strict=True)
        vae.requires_grad_(False)
        vae.eval().to(self.device)
        self.vae = vae

        class RAFTArgs:
            mixed_precision = True
            small = False
            alternate_corr = True
            dropout = False

        raft_model = RAFT(RAFTArgs())
        raft_path = APP_ROOT / "pretrained_ckpts" / "models" / "raft-things.pth"
        raft_dict = torch.load(str(raft_path), map_location="cpu")
        corrected = {}
        for k, v in raft_dict.items():
            k2 = ".".join(k.split(".")[1:]) if "." in k else k
            corrected[k2] = v
        raft_model.load_state_dict(corrected)
        raft_model.eval().requires_grad_(False).to(self.device)
        self.raft_model = raft_model

        fusion_vit = LightweightHybrid3DFusion()
        fusion_ckpt = torch.load(cfg.fusion_vit_weight_path, map_location="cpu")
        fusion_vit.load_state_dict(fusion_ckpt)
        fusion_vit.eval().requires_grad_(False).to(self.device)
        self.fusion_vit = fusion_vit

        self.tokenizer = CLIPTokenizer.from_pretrained(cfg.base_model_path, subfolder="tokenizer")
        self.text_encoder = CLIPTextModel.from_pretrained(
            cfg.base_model_path,
            subfolder="text_encoder",
            torch_dtype=self.weight_dtype,
        ).to(self.device)
        self.text_encoder.eval().requires_grad_(False)

        self.scheduler = DDPMScheduler.from_pretrained(cfg.base_model_path, subfolder="scheduler")
        ls_burst_unet = UNet2DConditionModel.from_pretrained(
            cfg.base_model_path,
            subfolder="unet",
            torch_dtype=self.weight_dtype,
        ).to(self.device)

        lora_cfg = LoraConfig(
            r=cfg.lora_rank,
            lora_alpha=cfg.lora_rank,
            init_lora_weights="gaussian",
            target_modules=cfg.lora_modules,
        )
        ls_burst_unet.add_adapter(lora_cfg)

        try:
            state_dict = torch.load(cfg.unet_weight_path, map_location="cpu", weights_only=False)
        except TypeError:
            state_dict = torch.load(cfg.unet_weight_path, map_location="cpu")

        ls_burst_unet.load_state_dict(state_dict, strict=False)
        required_keys = {k for k in ls_burst_unet.state_dict().keys() if "lora" in k}
        input_keys = set(state_dict.keys())
        if required_keys != input_keys:
            missing = required_keys - input_keys
            unexpected = input_keys - required_keys
            raise RuntimeError(f"LoRA key mismatch. Missing={len(missing)} Unexpected={len(unexpected)}")

        ls_burst_unet.eval().requires_grad_(False)
        self.ls_burst_unet = ls_burst_unet

    def _ensure_loaded(self) -> None:
        if self.vae is None:
            self.load()

    def reconstruct_from_binary_window(
        self,
        binary_window_77: np.ndarray,
        gt_window_77: Optional[np.ndarray] = None,
    ) -> tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]:
        self._ensure_loaded()
        assert self.cfg is not None
        assert self.vae is not None
        assert self.raft_model is not None
        assert self.fusion_vit is not None
        assert self.tokenizer is not None
        assert self.text_encoder is not None
        assert self.scheduler is not None
        assert self.ls_burst_unet is not None

        if binary_window_77.shape[0] != BURST_WINDOW:
            raise ValueError(f"Burst window must have {BURST_WINDOW} frames, got {binary_window_77.shape[0]}")

        binary_window_77 = _resize_frames_rgb(
            _normalize_float01(binary_window_77),
            max_side=self.out_size,
            multiple_of=64,
        )

        lqs = torch.from_numpy(binary_window_77).unsqueeze(0).permute(0, 1, 4, 2, 3).to(self.device)
        lqs = (lqs * 2.0) - 1.0

        lqs_3bit = []
        for i in range(0, lqs.size(1), 7):
            chunk = lqs[:, i : i + 7, ...]
            if chunk.size(1) < 7:
                break
            lqs_3bit.append(torch.mean(chunk, dim=1, keepdim=True))
        lqs = torch.cat(lqs_3bit, dim=1)

        gts = None
        if gt_window_77 is not None:
            gt_window_77 = _resize_frames_rgb(
                _normalize_float01(gt_window_77),
                max_side=self.out_size,
                multiple_of=64,
            )
            gts = torch.from_numpy(gt_window_77).unsqueeze(0).permute(0, 1, 4, 2, 3).to(self.device)
            gts = (gts * 2.0) - 1.0
            gts_3bit = []
            for i in range(0, gts.size(1), 7):
                chunk = gts[:, i : i + 7, ...]
                if chunk.size(1) < 7:
                    break
                gts_3bit.append(torch.mean(chunk, dim=1, keepdim=True))
            gts = torch.cat(gts_3bit, dim=1)

        with torch.inference_mode():
            bs = lqs.size(0)
            t_total = lqs.size(1)
            center_t = t_total // 2

            latents = []
            decoded_lqs = []
            for t in range(t_total):
                lq_t = lqs[:, t, ...].float()
                z_t = self.vae.encode(lq_t).mode()
                latents.append(z_t)
                decoded_lqs.append(self.vae.decode(z_t).float())

            y = torch.stack(decoded_lqs, dim=1)
            flow_vectors = []
            for t in range(t_total):
                ls_in = y[:, t, ...].float()
                center_in = y[:, center_t, ...].float()
                if t < center_t:
                    _, flow_bw = self.raft_model(center_in, ls_in, iters=20, test_mode=True)
                else:
                    _, flow_bw = self.raft_model(ls_in, center_in, iters=20, test_mode=True)
                z_h, z_w = latents[t].shape[-2:]
                in_h, in_w = ls_in.shape[-2:]
                flow_bw = F.interpolate(flow_bw, size=(z_h, z_w), mode="bilinear", align_corners=True)
                flow_bw[:, 0] *= float(z_w) / float(in_w)
                flow_bw[:, 1] *= float(z_h) / float(in_h)
                flow_vectors.append(flow_bw)

            aligned_latents = []
            for t in range(t_total):
                latent_t = latents[t]
                if t == center_t:
                    aligned_latents.append(latent_t)
                else:
                    aligned_latents.append(differentiable_warp(latent_t, flow_vectors[t]))

            aligned_latents = torch.stack(aligned_latents, dim=1)
            merged_latent = self.fusion_vit(aligned_latents)

            z_in = (merged_latent * 0.18215).to(self.weight_dtype)
            timesteps = torch.full((bs,), int(self.cfg.model_t), dtype=torch.long, device=self.device)
            text_embed = _encode_prompt(self.tokenizer, self.text_encoder, "", bs=bs, device=self.device)
            eps = self.ls_burst_unet(z_in, timesteps, encoder_hidden_states=text_embed).sample
            z = self.scheduler.step(eps, int(self.cfg.coeff_t), z_in).pred_original_sample
            decoded_refined = self.vae.decode(z.float() / 0.18215).float().clamp(0.0, 1.0)

            center_input = ((lqs[:, center_t, ...] + 1.0) / 2.0).clamp(0.0, 1.0)
            center_gt = None
            if gts is not None:
                center_gt = ((gts[:, center_t, ...] + 1.0) / 2.0).clamp(0.0, 1.0)

        if torch.cuda.is_available() and str(self.device).startswith("cuda"):
            torch.cuda.empty_cache()

        return (
            _tensor_to_uint8_image(center_input),
            _tensor_to_uint8_image(decoded_refined),
            _tensor_to_uint8_image(center_gt) if center_gt is not None else None,
        )


def _get_single_pipeline(pipeline_type: str) -> SingleColorPipeline:
    if pipeline_type not in PIPELINE_OPTIONS:
        raise ValueError(f"Unknown pipeline type: {pipeline_type}")
    if pipeline_type not in RUNTIME_SINGLE_CONFIGS:
        raise RuntimeError(f"Single config not initialized for pipeline type: {pipeline_type}")
    with _SINGLE_LOCK:
        if pipeline_type not in _SINGLE_PIPELINES:
            _SINGLE_PIPELINES[pipeline_type] = SingleColorPipeline(
                RUNTIME_SINGLE_CONFIGS[pipeline_type],
                RUNTIME_DEVICE,
                pipeline_type,
            )
            _SINGLE_PIPELINES[pipeline_type].load()
    return _SINGLE_PIPELINES[pipeline_type]


def _get_burst_pipeline(pipeline_type: str) -> BurstColorPipeline:
    if pipeline_type not in PIPELINE_OPTIONS:
        raise ValueError(f"Unknown pipeline type: {pipeline_type}")
    if pipeline_type not in RUNTIME_BURST_CONFIGS:
        raise RuntimeError(f"Burst config not initialized for pipeline type: {pipeline_type}")
    with _BURST_LOCK:
        if pipeline_type not in _BURST_PIPELINES:
            _BURST_PIPELINES[pipeline_type] = BurstColorPipeline(
                RUNTIME_BURST_CONFIGS[pipeline_type],
                RUNTIME_DEVICE,
                pipeline_type,
            )
            _BURST_PIPELINES[pipeline_type].load()
    return _BURST_PIPELINES[pipeline_type]


def _get_runtime_burst_out_size(pipeline_type: str) -> int:
    if pipeline_type not in RUNTIME_BURST_OUT_SIZES:
        return DEFAULT_MAX_SIZE[0]
    return int(RUNTIME_BURST_OUT_SIZES[pipeline_type])


def _resolve_uploaded_path(uploaded_file: Any, local_path: str) -> Optional[str]:
    if isinstance(uploaded_file, str) and uploaded_file:
        return uploaded_file
    if hasattr(uploaded_file, "name") and uploaded_file.name:
        return uploaded_file.name
    if isinstance(uploaded_file, dict) and uploaded_file.get("name"):
        return uploaded_file["name"]

    if local_path and local_path.strip():
        return local_path.strip()
    return None


def _list_image_files(dir_path: str) -> list[str]:
    exts = {".png", ".jpg", ".jpeg", ".bmp", ".tif", ".tiff"}
    files = []
    for name in sorted(os.listdir(dir_path)):
        p = os.path.join(dir_path, name)
        if os.path.isfile(p) and Path(name).suffix.lower() in exts:
            files.append(p)
    return files


def _is_video_extension(ext: str) -> bool:
    # Include common public-facing formats. Keep ".wav" for user compatibility;
    # extraction will fail with a clear message since it is audio-only.
    return ext in {".mp4", ".mov", ".m4v", ".avi", ".mkv", ".webm", ".wmv", ".wav"}


def _extract_video_frames_to_temp(video_path: str) -> tuple[str, list[str]]:
    temp_dir = tempfile.mkdtemp(prefix="gqir_video_frames_")
    out_pattern = os.path.join(temp_dir, "frame_%06d.png")
    cmd = [
        "ffmpeg",
        "-hide_banner",
        "-loglevel",
        "error",
        "-i",
        video_path,
        "-vsync",
        "0",
        "-start_number",
        "0",
        out_pattern,
    ]
    try:
        proc = subprocess.run(cmd, check=True, capture_output=True, text=True)
    except FileNotFoundError as exc:
        shutil.rmtree(temp_dir, ignore_errors=True)
        raise RuntimeError("ffmpeg is not installed; video input requires ffmpeg.") from exc
    except subprocess.CalledProcessError as exc:
        stderr = (exc.stderr or "").strip()
        shutil.rmtree(temp_dir, ignore_errors=True)
        if Path(video_path).suffix.lower() == ".wav":
            raise ValueError(
                "WAV is audio-only and does not contain video frames. "
                "Please upload MP4/MOV/WMV/AVI/MKV/WebM for GT video input."
            ) from exc
        raise ValueError(f"Failed to decode video with ffmpeg: {stderr or 'unknown ffmpeg error'}") from exc

    files = _list_image_files(temp_dir)
    if not files:
        shutil.rmtree(temp_dir, ignore_errors=True)
        stderr = (proc.stderr or "").strip()
        raise ValueError(f"No frames were extracted from video. {stderr}".strip())
    _TEMP_VIDEO_DIRS.append(temp_dir)
    return temp_dir, files


def _extract_first_array(obj: Any) -> np.ndarray:
    if isinstance(obj, np.ndarray):
        return obj
    if isinstance(obj, torch.Tensor):
        return obj.detach().cpu().numpy()
    if isinstance(obj, (list, tuple)) and obj:
        if all(isinstance(x, (np.ndarray, torch.Tensor)) for x in obj):
            stacked = [x.detach().cpu().numpy() if isinstance(x, torch.Tensor) else x for x in obj]
            return np.stack(stacked, axis=0)
        return _extract_first_array(obj[0])
    if isinstance(obj, dict):
        preferred = ["cube", "frames", "lqs", "data", "array"]
        for key in preferred:
            if key in obj:
                return _extract_first_array(obj[key])
        for value in obj.values():
            try:
                return _extract_first_array(value)
            except Exception:
                continue
    raise ValueError("Could not extract ndarray/tensor from input object.")


def _inspect_array_file(path: str) -> tuple[str, Optional[str], int]:
    ext = Path(path).suffix.lower()

    if ext == ".npy":
        arr = np.load(path, mmap_mode="r")
        arr = _to_thwc(arr)
        return "npy", None, int(arr.shape[0])

    if ext == ".npz":
        with np.load(path) as npz_data:
            if not npz_data.files:
                raise ValueError("NPZ has no arrays.")
            key = npz_data.files[0]
            arr = _to_thwc(npz_data[key])
        return "npz", key, int(arr.shape[0])

    if ext in {".pt", ".pth"}:
        try:
            obj = torch.load(path, map_location="cpu", weights_only=False)
        except TypeError:
            obj = torch.load(path, map_location="cpu")
        arr = _to_thwc(_extract_first_array(obj))
        return "pt", None, int(arr.shape[0])

    raise ValueError(f"Unsupported array file extension: {ext}")


def _load_array_window(desc: CubeDescriptor, start: int, count: int) -> np.ndarray:
    assert desc.array_format is not None
    path = desc.path
    fmt = desc.array_format

    if fmt == "npy":
        arr = np.load(path, mmap_mode="r")
        arr = arr[start : start + count]
    elif fmt == "npz":
        with np.load(path) as npz_data:
            assert desc.array_key is not None
            arr = npz_data[desc.array_key][start : start + count]
    elif fmt == "pt":
        try:
            obj = torch.load(path, map_location="cpu", weights_only=False)
        except TypeError:
            obj = torch.load(path, map_location="cpu")
        arr = _extract_first_array(obj)
        arr = arr[start : start + count]
    else:
        raise ValueError(f"Unsupported array format in descriptor: {fmt}")

    arr = _to_thwc(arr)
    arr = _normalize_float01(arr)
    return arr


def _load_h5_window(desc: CubeDescriptor, start: int, count: int) -> np.ndarray:
    if h5py is None:
        raise RuntimeError("h5py is required for .h5 photon cube loading. Install with: pip install h5py")
    assert desc.h5_keys is not None

    frames = []
    with h5py.File(desc.path, "r") as h5f:
        grp = h5f["capture_integrated"]["raw_hdf5"]
        for idx in range(start, start + count):
            key_slice = desc.h5_keys[idx * 4 : (idx + 1) * 4]
            if len(key_slice) < 4:
                raise ValueError("H5 does not contain enough raw planes for the requested frame window.")
            sample_r = np.asarray(grp[key_slice[0]])[:, :, 0, 0].astype(np.float32)
            sample_g1 = np.asarray(grp[key_slice[1]])[:, :, 0, 0].astype(np.float32)
            sample_b = np.asarray(grp[key_slice[2]])[:, :, 0, 0].astype(np.float32)
            sample_g2 = np.asarray(grp[key_slice[3]])[:, :, 0, 0].astype(np.float32)

            h, w = sample_r.shape
            bayer_rgb = np.zeros((h, w, 3), dtype=np.float32)
            bayer_rgb[0::2, 0::2, 0] = sample_r[0::2, 0::2]
            bayer_rgb[0::2, 1::2, 1] = sample_g1[0::2, 1::2]
            bayer_rgb[1::2, 0::2, 1] = sample_g2[1::2, 0::2]
            bayer_rgb[1::2, 1::2, 2] = sample_b[1::2, 1::2]
            frames.append(bayer_rgb)

    out = np.stack(frames, axis=0)
    return _normalize_float01(out)


def _load_window_from_descriptor(
    desc: CubeDescriptor,
    start: int,
    count: int,
    pipeline_type: str = PIPELINE_COLOR,
    resize_for_model: bool = True,
) -> np.ndarray:
    if start < 0 or start + count > desc.total_frames:
        raise ValueError(
            f"Invalid start index {start}. Valid range is [0, {max(desc.total_frames - count, 0)}]."
        )

    if desc.kind in {"dir", "video"}:
        assert desc.files is not None
        subset = desc.files[start : start + count]
        frames = []
        for p in subset:
            img = Image.open(p).convert("RGB")
            frames.append(np.asarray(img, dtype=np.float32) / 255.0)
        out = np.stack(frames, axis=0)
    elif desc.kind == "array":
        out = _load_array_window(desc, start, count)
    elif desc.kind == "h5":
        out = _load_h5_window(desc, start, count)
    else:
        raise ValueError(f"Unknown descriptor kind: {desc.kind}")

    if out.shape[-1] == 1:
        if desc.source_mode == BURST_MODE_GT or pipeline_type == PIPELINE_MONO:
            out = np.repeat(out, 3, axis=-1)
        else:
            out = _single_channel_bayer_to_sparse_rgb(out)

    if out.shape[-1] != 3:
        raise ValueError(f"Expected 3 channels after conversion, got shape {out.shape}")

    if not resize_for_model:
        return _normalize_float01(out)

    return _resize_frames_rgb(out, max_side=desc.out_size, multiple_of=64)


def _build_cube_descriptor(source_mode: str, path: str, out_size: int) -> CubeDescriptor:
    p = Path(path)
    if not p.exists():
        raise FileNotFoundError(f"Path does not exist: {path}")

    if p.is_dir():
        files = _list_image_files(path)
        if not files:
            raise ValueError("Directory has no supported image files.")
        return CubeDescriptor(
            source_mode=source_mode,
            kind="dir",
            path=path,
            total_frames=len(files),
            out_size=out_size,
            files=files,
        )

    ext = p.suffix.lower()
    if _is_video_extension(ext):
        if source_mode != BURST_MODE_GT:
            raise ValueError("Video files are currently supported for GT burst mode only.")
        temp_dir, files = _extract_video_frames_to_temp(path)
        return CubeDescriptor(
            source_mode=source_mode,
            kind="video",
            path=path,
            total_frames=len(files),
            out_size=out_size,
            files=files,
            temp_dir=temp_dir,
        )

    if ext in {".npy", ".npz", ".pt", ".pth"}:
        fmt, key, total = _inspect_array_file(path)
        return CubeDescriptor(
            source_mode=source_mode,
            kind="array",
            path=path,
            total_frames=total,
            out_size=out_size,
            array_format=fmt,
            array_key=key,
        )

    if ext in {".h5", ".hdf5"}:
        if source_mode != BURST_MODE_REAL:
            raise ValueError("H5/UBI input is only supported for real photon cube mode.")
        if h5py is None:
            raise RuntimeError("h5py is required for .h5 photon cube loading. Install with: pip install h5py")

        with h5py.File(path, "r") as h5f:
            try:
                grp = h5f["capture_integrated"]["raw_hdf5"]
            except Exception as exc:
                raise ValueError("Expected H5 group capture_integrated/raw_hdf5") from exc
            keys = [k for k in grp.keys()]
            keys = sorted(keys, key=lambda x: int(x) if str(x).isdigit() else x)

        total = len(keys) // 4
        if total <= 0:
            raise ValueError("No usable frame groups found in H5 file.")

        return CubeDescriptor(
            source_mode=source_mode,
            kind="h5",
            path=path,
            total_frames=total,
            out_size=out_size,
            h5_keys=keys,
        )

    raise ValueError(f"Unsupported cube input format: {p.suffix}")


def _single_inputs_visibility(mode: str):
    gt_visible = mode == SINGLE_MODE_GT
    return (
        gr.update(visible=gt_visible),
        gr.update(visible=not gt_visible),
        gr.update(visible=gt_visible),
    )


def _burst_ppp_interactivity(mode: str):
    return gr.update(interactive=(mode == BURST_MODE_GT))

@spaces.GPU
def run_single_reconstruction(
    pipeline_type: str,
    mode: str,
    gt_image: Optional[np.ndarray],
    real_spad_image: Optional[np.ndarray],
    prompt: str,
    target_ppp: float,
    only_vae_output: bool,
    seed: int,
):
    try:
        pipeline = _get_single_pipeline(pipeline_type)
        prompt = (prompt or "").strip()
        seed = int(seed)

        if mode == SINGLE_MODE_GT:
            if gt_image is None:
                raise ValueError("Please provide a GT image.")
            in_h, in_w = _ensure_rgb_image(gt_image).shape[:2]
            gt_prev, lq_prev, recon, status = pipeline.reconstruct_from_gt(
                gt_image_np=gt_image,
                prompt=prompt,
                target_ppp=float(target_ppp),
                only_vae_output=bool(only_vae_output),
                seed=seed,
                simulate_color_mosaic=(pipeline_type == PIPELINE_COLOR),
            )
            recon = _resize_uint8_to_hw(recon, in_h, in_w)
            input_preview = _to_uint8(_normalize_float01(_ensure_rgb_image(gt_image)))
            if pipeline_type == PIPELINE_MONO:
                input_preview = _to_gray_uint8(input_preview)
                lq_prev = _to_gray_uint8(lq_prev)
                recon = _to_gray_uint8(recon)
            return input_preview, recon, lq_prev, status

        if real_spad_image is None:
            raise ValueError("Please provide a real SPAD frame.")
        in_h, in_w = _ensure_rgb_image(real_spad_image).shape[:2]
        lq_prev, recon, status = pipeline.reconstruct_from_real_spad(
            lq_image_np=real_spad_image,
            prompt=prompt,
            only_vae_output=bool(only_vae_output),
            seed=seed,
        )
        recon = _resize_uint8_to_hw(recon, in_h, in_w)
        input_preview = _to_uint8(_normalize_float01(_ensure_rgb_image(real_spad_image)))
        if pipeline_type == PIPELINE_MONO:
            input_preview = _to_gray_uint8(input_preview)
            lq_prev = _to_gray_uint8(lq_prev)
            recon = _to_gray_uint8(recon)
        return input_preview, recon, lq_prev, status

    except Exception as exc:
        msg = f"Single reconstruction failed: {exc}"
        tb = traceback.format_exc(limit=1)
        return None, None, None, f"{msg}\n{tb}"


def load_cube_for_ui(pipeline_type: str, mode: str, cube_file: Any, cube_path: str):
    try:
        path = _resolve_uploaded_path(cube_file, cube_path)
        if not path:
            raise ValueError("Provide a cube file upload or local path.")

        descriptor = _build_cube_descriptor(mode, path, out_size=_get_runtime_burst_out_size(pipeline_type))
        if descriptor.total_frames < BURST_WINDOW:
            raise ValueError(
                f"Cube has {descriptor.total_frames} frames, but gQIR burst requires at least {BURST_WINDOW}."
            )

        max_start = descriptor.total_frames - BURST_WINDOW
        slider_update = gr.update(minimum=0, maximum=max_start, value=0, step=1, interactive=True)

        preview_window = _load_window_from_descriptor(
            descriptor,
            start=0,
            count=1,
            pipeline_type=pipeline_type,
            resize_for_model=False,
        )
        preview = _to_uint8(preview_window[0])
        if pipeline_type == PIPELINE_MONO:
            preview = _to_gray_uint8(preview)

        input_display_preview = preview
        model_input_preview = None if mode == BURST_MODE_GT else preview

        info = (
            f"Loaded cube: {descriptor.path}\n"
            f"Input type: {descriptor.kind}\n"
            f"Frames: {descriptor.total_frames}\n"
            f"Valid start index range: [0, {max_start}]\n"
            f"Window size fixed at {BURST_WINDOW}"
        )
        return descriptor, info, slider_update, input_display_preview, model_input_preview, "Cube loaded successfully."

    except Exception as exc:
        err = f"Cube load failed: {exc}"
        return None, err, gr.update(interactive=False), None, None, err

@spaces.GPU
def run_burst_reconstruction(
    pipeline_type: str,
    mode: str,
    descriptor: Optional[CubeDescriptor],
    start_idx: int,
    target_ppp: float,
):
    try:
        if descriptor is None:
            raise ValueError("Load a burst cube first.")

        start_idx = int(start_idx)
        gt_window = None
        raw_window = _load_window_from_descriptor(
            descriptor,
            start=start_idx,
            count=BURST_WINDOW,
            pipeline_type=pipeline_type,
            resize_for_model=False,
        )
        raw_center_h, raw_center_w = raw_window[BURST_WINDOW // 2].shape[:2]
        if mode == BURST_MODE_GT:
            gt_window = raw_window
            if pipeline_type == PIPELINE_COLOR:
                binary_window = np.stack(
                    [
                        _simulate_binary_burst_frame_from_gt(gt_window[i], target_ppp=float(target_ppp))
                        for i in range(BURST_WINDOW)
                    ],
                    axis=0,
                ).astype(np.float32)
            else:
                binary_window = np.stack(
                    [
                        _simulate_binary_burst_frame_from_gt_mono(gt_window[i], target_ppp=float(target_ppp))
                        for i in range(BURST_WINDOW)
                    ],
                    axis=0,
                ).astype(np.float32)
        else:
            binary_window = raw_window

        pipeline = _get_burst_pipeline(pipeline_type)
        center_input, recon, center_gt = pipeline.reconstruct_from_binary_window(
            binary_window_77=binary_window,
            gt_window_77=gt_window,
        )
        center_input = _resize_uint8_to_hw(center_input, raw_center_h, raw_center_w)
        recon = _resize_uint8_to_hw(recon, raw_center_h, raw_center_w)
        center_gt = _resize_uint8_to_hw(center_gt, raw_center_h, raw_center_w)
        display_input = _to_uint8(raw_window[BURST_WINDOW // 2])
        display_input = _resize_uint8_to_hw(display_input, raw_center_h, raw_center_w)
        if pipeline_type == PIPELINE_MONO:
            display_input = _to_gray_uint8(display_input)
            center_input = _to_gray_uint8(center_input)
            recon = _to_gray_uint8(recon)
            center_gt = _to_gray_uint8(center_gt)

        ppp_status = f"PPP={float(target_ppp):.2f}" if mode == BURST_MODE_GT else "PPP=ignored (real cube input)"
        status = (
            f"Burst reconstruction complete. "
            f"Pipeline={pipeline_type}, "
            f"Input mode={'GT simulation' if mode == BURST_MODE_GT else 'real photon cube'}, "
            f"{ppp_status}, "
            f"window=[{start_idx}, {start_idx + BURST_WINDOW - 1}]."
        )
        return display_input, recon, center_input, status

    except Exception as exc:
        msg = f"Burst reconstruction failed: {exc}"
        tb = traceback.format_exc(limit=1)
        return None, None, None, f"{msg}\n{tb}"


def build_demo() -> gr.Blocks:
    markdown = """
<h1 align="center">gQIR: Generative Quanta Image Reconstruction</h1>
<p align="center">
  <a href="https://aryan-garg.github.io/gqir/">Project Page</a> |
  <a href="https://arxiv.org/abs/2602.20417">ArXiv</a> |
  <a href="https://github.com/Aryan-Garg/gQIR">GitHub</a>
</p>
### What You Can Run
- **Single Frame (Stage-2):** Reconstruct one frame from either a clean GT image (internally simulated to SPAD) or a real SPAD frame.
- **Burst (Stage-3):** Reconstruct from a fixed **77-frame** window using either GT videos/cubes or real photon cubes.
- **Pipelines:** Toggle between **Color** and **Monochrome** reconstruction in both tabs.
### Supported Inputs
- **Single GT:** Standard image uploads.
- **Single Real:** Real SPAD frame image uploads.
- **Burst GT:** Public-friendly videos (`.mp4`, `.mov`, `.wmv`, `.avi`, `.mkv`, `.webm`) plus research cube formats (`.npy`, `.npz`, `.pt`, `.h5`) or image folders.
- **Burst Real:** Photon cubes (`.npy`, `.npz`, `.pt`, `.h5`) or image folders.
### Quick Usage
1. Pick pipeline and input mode.
2. Load input and select burst start index (for Stage-3).
3. Set PPP for GT simulation paths, then run reconstruction and compare input vs output side-by-side.
"""

    with gr.Blocks(title="gQIR Demo") as demo:
        gr.Markdown(markdown)

        with gr.Tab("Single Frame (Stage-2)"):
            with gr.Row():
                with gr.Column():
                    single_pipeline_type = gr.Radio(
                        PIPELINE_OPTIONS,
                        value=PIPELINE_COLOR,
                        label="Pipeline",
                    )
                    single_mode = gr.Radio(
                        [SINGLE_MODE_GT, SINGLE_MODE_REAL],
                        value=SINGLE_MODE_GT,
                        label="Input Mode",
                    )
                    gt_image = gr.Image(label="GT Image", type="numpy")
                    real_spad_image = gr.Image(label="Real SPAD Frame", type="numpy", visible=False)
                    prompt = gr.Textbox(label="Prompt (optional)", value="")
                    target_ppp = gr.Slider(
                        minimum=0.25,
                        maximum=5.0,
                        value=3.5,
                        step=0.25,
                        label="Target PPP (GT simulation only)",
                    )
                    only_vae_output = gr.Checkbox(label="Stage 1 (qVAE) output only", value=False)
                    seed = gr.Number(label="Seed (-1 for random)", value=310, precision=0)
                    run_single_btn = gr.Button("Run Single Reconstruction")

                with gr.Column():
                    with gr.Row():
                        single_input_preview = gr.Image(label="Input Frame", type="numpy")
                        single_output_preview = gr.Image(label="Reconstruction (original aspect)", type="numpy")
                    single_model_input_preview = gr.Image(label="Model Input (resized for inference)", type="numpy")
                    single_status = gr.Textbox(label="Status", interactive=False)

            single_mode.change(
                fn=_single_inputs_visibility,
                inputs=[single_mode],
                outputs=[gt_image, real_spad_image, target_ppp],
            )

            run_single_btn.click(
                fn=run_single_reconstruction,
                inputs=[single_pipeline_type, single_mode, gt_image, real_spad_image, prompt, target_ppp, only_vae_output, seed],
                outputs=[single_input_preview, single_output_preview, single_model_input_preview, single_status],
            )

        with gr.Tab("Burst (Stage-3)"):
            cube_state = gr.State(value=None)

            with gr.Row():
                with gr.Column():
                    burst_pipeline_type = gr.Radio(
                        PIPELINE_OPTIONS,
                        value=PIPELINE_COLOR,
                        label="Pipeline",
                    )
                    burst_mode = gr.Radio(
                        [BURST_MODE_GT, BURST_MODE_REAL],
                        value=BURST_MODE_GT,
                        label="Burst Input Mode",
                    )
                    cube_file = gr.File(
                        label=(
                            "GT video/cube file "
                            "(.mp4/.mov/.wmv/.avi/.mkv/.webm/.npy/.npz/.pt/.h5) "
                            "or image directory path below"
                        ),
                        type="filepath",
                    )
                    cube_path = gr.Textbox(label="Or Local Cube Path (file or folder)", value="")
                    load_cube_btn = gr.Button("Load Cube")
                    cube_info = gr.Textbox(label="Cube Info", interactive=False)
                    start_idx = gr.Slider(
                        minimum=0,
                        maximum=0,
                        value=0,
                        step=1,
                        interactive=False,
                        label=f"Start Index (window size fixed to {BURST_WINDOW})",
                    )
                    burst_target_ppp = gr.Slider(
                        minimum=0.25,
                        maximum=5.0,
                        value=3.5,
                        step=0.25,
                        label="Target PPP (GT simulation only)",
                    )
                    run_burst_btn = gr.Button("Run Burst Reconstruction")

                with gr.Column():
                    with gr.Row():
                        burst_input_display = gr.Image(label="Input Center Frame", type="numpy")
                        burst_recon = gr.Image(label="Reconstruction (original aspect)", type="numpy")
                    burst_model_input = gr.Image(label="Model Input Center (post-processing)", type="numpy")
                    burst_status = gr.Textbox(label="Status", interactive=False)

            load_cube_btn.click(
                fn=load_cube_for_ui,
                inputs=[burst_pipeline_type, burst_mode, cube_file, cube_path],
                outputs=[cube_state, cube_info, start_idx, burst_input_display, burst_model_input, burst_status],
            )

            burst_mode.change(
                fn=_burst_ppp_interactivity,
                inputs=[burst_mode],
                outputs=[burst_target_ppp],
            )

            run_burst_btn.click(
                fn=run_burst_reconstruction,
                inputs=[burst_pipeline_type, burst_mode, cube_state, start_idx, burst_target_ppp],
                outputs=[burst_input_display, burst_recon, burst_model_input, burst_status],
            )

    return demo


def main() -> None:
    global RUNTIME_SINGLE_CONFIGS, RUNTIME_BURST_CONFIGS, RUNTIME_DEVICE, RUNTIME_BURST_OUT_SIZES
    global RUNTIME_HF_REPO_ID, RUNTIME_HF_CACHE_DIR, RUNTIME_HF_TOKEN

    args = parse_args()
    single_color_cfg = Path(args.single_config if args.single_config else args.single_config_color).resolve()
    burst_color_cfg = Path(args.burst_config if args.burst_config else args.burst_config_color).resolve()
    single_mono_cfg = Path(args.single_config_mono).resolve()
    burst_mono_cfg = Path(args.burst_config_mono).resolve()
    hf_token = args.hf_token
    if not hf_token:
        hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")

    RUNTIME_SINGLE_CONFIGS = {
        PIPELINE_COLOR: single_color_cfg,
        PIPELINE_MONO: single_mono_cfg,
    }
    RUNTIME_BURST_CONFIGS = {
        PIPELINE_COLOR: burst_color_cfg,
        PIPELINE_MONO: burst_mono_cfg,
    }
    RUNTIME_DEVICE = args.device
    RUNTIME_HF_REPO_ID = str(args.hf_repo_id or HF_DEFAULT_REPO_ID).strip()
    RUNTIME_HF_CACHE_DIR = str(Path(args.hf_cache_dir).expanduser()) if args.hf_cache_dir else None
    RUNTIME_HF_TOKEN = hf_token
    RUNTIME_BURST_OUT_SIZES = {}
    for key, cfg_path in RUNTIME_BURST_CONFIGS.items():
        burst_cfg = OmegaConf.load(str(cfg_path))
        RUNTIME_BURST_OUT_SIZES[key] = int(burst_cfg.dataset.val.params.out_size)

    demo = build_demo().queue()
    demo.launch()


if __name__ == "__main__":
    main()