import os
import shutil
import urllib.request
from pathlib import Path
from typing import Dict, Tuple, Any, Optional, List

import numpy as np
import torch
from PIL import Image

import comfy.model_management as model_management

# transformers is required for depth-estimation pipeline
try:
    from transformers import pipeline
except Exception as e:
    pipeline = None
    _TRANSFORMERS_IMPORT_ERROR = e


# --------------------------------------------------------------------------------------
# Paths / sources
# --------------------------------------------------------------------------------------

# This file: comfyui-salia_online/nodes/Salia_Depth.py
# Plugin root: comfyui-salia_online/
PLUGIN_ROOT = Path(__file__).resolve().parent.parent

# Requested local path: assets/depth
MODEL_DIR = PLUGIN_ROOT / "assets" / "depth"
MODEL_DIR.mkdir(parents=True, exist_ok=True)

REQUIRED_FILES = {
    "config.json": "https://huggingface.co/saliacoel/depth/resolve/main/config.json",
    "model.safetensors": "https://huggingface.co/saliacoel/depth/resolve/main/model.safetensors",
    "preprocessor_config.json": "https://huggingface.co/saliacoel/depth/resolve/main/preprocessor_config.json",
}

# "zoe-path" fallback
ZOE_FALLBACK_REPO_ID = "Intel/zoedepth-nyu-kitti"


# --------------------------------------------------------------------------------------
# Logging helpers
# --------------------------------------------------------------------------------------

def _make_logger() -> Tuple[List[str], Any]:
    lines: List[str] = []

    def log(msg: str):
        # console
        try:
            print(msg)
        except Exception:
            pass
        # UI string
        lines.append(str(msg))

    return lines, log


def _fmt_bytes(n: Optional[int]) -> str:
    if n is None:
        return "?"
    # simple readable
    for unit in ["B", "KB", "MB", "GB", "TB"]:
        if n < 1024:
            return f"{n:.0f}{unit}"
        n /= 1024.0
    return f"{n:.1f}PB"


def _file_size(path: Path) -> Optional[int]:
    try:
        return path.stat().st_size
    except Exception:
        return None


def _hf_cache_info() -> Dict[str, str]:
    info: Dict[str, str] = {}
    info["env.HF_HOME"] = os.environ.get("HF_HOME", "")
    info["env.HF_HUB_CACHE"] = os.environ.get("HF_HUB_CACHE", "")
    info["env.TRANSFORMERS_CACHE"] = os.environ.get("TRANSFORMERS_CACHE", "")
    info["env.HUGGINGFACE_HUB_CACHE"] = os.environ.get("HUGGINGFACE_HUB_CACHE", "")

    try:
        from huggingface_hub import constants as hf_constants
        # These exist in most hub versions:
        info["huggingface_hub.constants.HF_HOME"] = str(getattr(hf_constants, "HF_HOME", ""))
        info["huggingface_hub.constants.HF_HUB_CACHE"] = str(getattr(hf_constants, "HF_HUB_CACHE", ""))
    except Exception:
        pass

    return info


# --------------------------------------------------------------------------------------
# Download helpers
# --------------------------------------------------------------------------------------

def _have_required_files() -> bool:
    return all((MODEL_DIR / name).exists() for name in REQUIRED_FILES.keys())


def _download_url_to_file(url: str, dst: Path, timeout: int = 180) -> None:
    """
    Download with atomic temp rename.
    """
    dst.parent.mkdir(parents=True, exist_ok=True)
    tmp = dst.with_suffix(dst.suffix + ".tmp")

    if tmp.exists():
        try:
            tmp.unlink()
        except Exception:
            pass

    req = urllib.request.Request(url, headers={"User-Agent": "ComfyUI-SaliaDepth/1.1"})
    with urllib.request.urlopen(req, timeout=timeout) as r, open(tmp, "wb") as f:
        shutil.copyfileobj(r, f)

    tmp.replace(dst)


def ensure_local_model_files(log) -> bool:
    """
    Ensure assets/depth contains the 3 files.
    Returns True if present or downloaded successfully, else False.
    """
    # Always log expected locations + URLs, even if we don't download.
    log("[SaliaDepth] ===== Local model file check =====")
    log(f"[SaliaDepth] Plugin root: {PLUGIN_ROOT}")
    log(f"[SaliaDepth] Local model dir (on drive): {MODEL_DIR}")

    for fname, url in REQUIRED_FILES.items():
        fpath = MODEL_DIR / fname
        exists = fpath.exists()
        size = _file_size(fpath) if exists else None
        log(f"[SaliaDepth]   - {fname}")
        log(f"[SaliaDepth]       local path: {fpath}  exists={exists}  size={_fmt_bytes(size)}")
        log(f"[SaliaDepth]       remote url : {url}")

    if _have_required_files():
        log("[SaliaDepth] All required local files already exist. No download needed.")
        return True

    log("[SaliaDepth] One or more local files missing. Attempting download...")

    try:
        for fname, url in REQUIRED_FILES.items():
            fpath = MODEL_DIR / fname
            if fpath.exists():
                continue
            log(f"[SaliaDepth] Downloading '{fname}' -> '{fpath}'")
            _download_url_to_file(url, fpath)
            log(f"[SaliaDepth] Downloaded '{fname}' size={_fmt_bytes(_file_size(fpath))}")

        ok = _have_required_files()
        log(f"[SaliaDepth] Download finished. ok={ok}")
        return ok
    except Exception as e:
        log(f"[SaliaDepth] Download failed with error: {repr(e)}")
        return False


# --------------------------------------------------------------------------------------
# Exact Zoe-style preprocessing helpers (copied/adapted from your snippet)
# --------------------------------------------------------------------------------------

def HWC3(x: np.ndarray) -> np.ndarray:
    assert x.dtype == np.uint8
    if x.ndim == 2:
        x = x[:, :, None]
    assert x.ndim == 3
    H, W, C = x.shape
    assert C == 1 or C == 3 or C == 4
    if C == 3:
        return x
    if C == 1:
        return np.concatenate([x, x, x], axis=2)
    # C == 4
    color = x[:, :, 0:3].astype(np.float32)
    alpha = x[:, :, 3:4].astype(np.float32) / 255.0
    y = color * alpha + 255.0 * (1.0 - alpha)  # white background
    y = y.clip(0, 255).astype(np.uint8)
    return y


def pad64(x: int) -> int:
    return int(np.ceil(float(x) / 64.0) * 64 - x)


def safer_memory(x: np.ndarray) -> np.ndarray:
    return np.ascontiguousarray(x.copy()).copy()


def resize_image_with_pad_min_side(
    input_image: np.ndarray,
    resolution: int,
    upscale_method: str = "INTER_CUBIC",
    skip_hwc3: bool = False,
    mode: str = "edge",
    log=None
) -> Tuple[np.ndarray, Any]:
    """
    EXACT behavior like your zoe.transformers.py:
      k = resolution / min(H,W)
      resize to (W_target, H_target)
      pad to multiple of 64
      return padded image and remove_pad() closure
    """
    # prefer cv2 like original for matching results
    cv2 = None
    try:
        import cv2 as _cv2
        cv2 = _cv2
    except Exception:
        cv2 = None
        if log:
            log("[SaliaDepth] WARN: cv2 not available; resizing will use PIL fallback (may change results).")

    if skip_hwc3:
        img = input_image
    else:
        img = HWC3(input_image)

    H_raw, W_raw, _ = img.shape
    if resolution <= 0:
        # keep original, but still pad to 64 (we will handle padding separately for -1 path)
        return img, (lambda x: x)

    k = float(resolution) / float(min(H_raw, W_raw))
    H_target = int(np.round(float(H_raw) * k))
    W_target = int(np.round(float(W_raw) * k))

    if cv2 is not None:
        upscale_methods = {
            "INTER_NEAREST": cv2.INTER_NEAREST,
            "INTER_LINEAR": cv2.INTER_LINEAR,
            "INTER_AREA": cv2.INTER_AREA,
            "INTER_CUBIC": cv2.INTER_CUBIC,
            "INTER_LANCZOS4": cv2.INTER_LANCZOS4,
        }
        method = upscale_methods.get(upscale_method, cv2.INTER_CUBIC)
        img = cv2.resize(img, (W_target, H_target), interpolation=method if k > 1 else cv2.INTER_AREA)
    else:
        # PIL fallback
        pil = Image.fromarray(img)
        resample = Image.BICUBIC if k > 1 else Image.LANCZOS
        pil = pil.resize((W_target, H_target), resample=resample)
        img = np.array(pil, dtype=np.uint8)

    H_pad, W_pad = pad64(H_target), pad64(W_target)
    img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode=mode)

    def remove_pad(x: np.ndarray) -> np.ndarray:
        return safer_memory(x[:H_target, :W_target, ...])

    return safer_memory(img_padded), remove_pad


def pad_only_to_64(img_u8: np.ndarray, mode: str = "edge") -> Tuple[np.ndarray, Any]:
    """
    For resolution == -1: keep original resolution but still pad to multiples of 64,
    then provide remove_pad that returns original size.
    """
    img = HWC3(img_u8)
    H_raw, W_raw, _ = img.shape
    H_pad, W_pad = pad64(H_raw), pad64(W_raw)
    img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode=mode)

    def remove_pad(x: np.ndarray) -> np.ndarray:
        return safer_memory(x[:H_raw, :W_raw, ...])

    return safer_memory(img_padded), remove_pad


# --------------------------------------------------------------------------------------
# RGBA rules (as you requested)
# --------------------------------------------------------------------------------------

def composite_rgba_over_white_keep_alpha(inp_u8: np.ndarray) -> Tuple[np.ndarray, Optional[np.ndarray]]:
    """
    If RGBA: return RGB composited over WHITE + alpha_u8 kept separately.
    If RGB: return input RGB + None alpha.
    """
    if inp_u8.ndim == 3 and inp_u8.shape[2] == 4:
        rgba = inp_u8.astype(np.uint8)
        rgb = rgba[:, :, 0:3].astype(np.float32)
        a = (rgba[:, :, 3:4].astype(np.float32) / 255.0)
        rgb_white = (rgb * a + 255.0 * (1.0 - a)).clip(0, 255).astype(np.uint8)
        alpha_u8 = rgba[:, :, 3].copy()
        return rgb_white, alpha_u8
    # force to RGB
    return HWC3(inp_u8), None


def apply_alpha_then_black_background(depth_rgb_u8: np.ndarray, alpha_u8: np.ndarray) -> np.ndarray:
    """
    Requested output rule:
      - attach alpha to depth (conceptually RGBA)
      - composite over BLACK
      - output RGB
    That is equivalent to depth_rgb * alpha.
    """
    depth_rgb_u8 = HWC3(depth_rgb_u8)
    a = (alpha_u8.astype(np.float32) / 255.0)[:, :, None]
    out = (depth_rgb_u8.astype(np.float32) * a).clip(0, 255).astype(np.uint8)
    return out


# --------------------------------------------------------------------------------------
# ComfyUI conversion helpers
# --------------------------------------------------------------------------------------

def comfy_tensor_to_u8(img: torch.Tensor) -> np.ndarray:
    """
    Comfy IMAGE: float [0..1], shape [H,W,C] or [B,H,W,C]
    Convert to uint8 HWC.
    """
    if img.ndim == 4:
        img = img[0]
    arr = img.detach().cpu().float().clamp(0, 1).numpy()
    u8 = (arr * 255.0).round().astype(np.uint8)
    return u8


def u8_to_comfy_tensor(img_u8: np.ndarray) -> torch.Tensor:
    img_u8 = HWC3(img_u8)
    t = torch.from_numpy(img_u8.astype(np.float32) / 255.0)
    return t.unsqueeze(0)  # [1,H,W,C]


# --------------------------------------------------------------------------------------
# Pipeline loading (local-first, then zoe fallback)
# --------------------------------------------------------------------------------------

_PIPE_CACHE: Dict[Tuple[str, str], Any] = {}  # (model_source, device_str) -> pipeline


def _try_load_pipeline(model_source: str, device: torch.device, log):
    """
    Use transformers.pipeline like Zoe code does.
    We intentionally do NOT pass device=... here, and instead move model like Zoe node.
    """
    if pipeline is None:
        raise RuntimeError(f"transformers import failed: {_TRANSFORMERS_IMPORT_ERROR}")

    key = (model_source, str(device))
    if key in _PIPE_CACHE:
        log(f"[SaliaDepth] Using cached pipeline for source='{model_source}' device='{device}'")
        return _PIPE_CACHE[key]

    log(f"[SaliaDepth] Creating pipeline(task='depth-estimation', model='{model_source}')")
    p = pipeline(task="depth-estimation", model=model_source)

    # Try to move model to torch device, like ZoeDetector.to()
    try:
        p.model = p.model.to(device)
        p.device = device  # Zoe code sets this; newer transformers uses torch.device internally
        log(f"[SaliaDepth] Moved pipeline model to device: {device}")
    except Exception as e:
        log(f"[SaliaDepth] WARN: Could not move pipeline model to device {device}: {repr(e)}")

    # Log config info for debugging
    try:
        cfg = p.model.config
        log(f"[SaliaDepth] Model class: {p.model.__class__.__name__}")
        log(f"[SaliaDepth] Config class: {cfg.__class__.__name__}")
        log(f"[SaliaDepth] Config model_type: {getattr(cfg, 'model_type', '')}")
        log(f"[SaliaDepth] Config _name_or_path: {getattr(cfg, '_name_or_path', '')}")
    except Exception as e:
        log(f"[SaliaDepth] WARN: Could not log model config: {repr(e)}")

    _PIPE_CACHE[key] = p
    return p


def get_depth_pipeline(device: torch.device, log):
    """
    1) Ensure assets/depth files exist (download if missing)
    2) Try load local dir
    3) Fallback to Intel/zoedepth-nyu-kitti
    4) If both fail -> None
    """
    # Always log HF cache info (helps locate where fallback downloads go)
    log("[SaliaDepth] ===== Hugging Face cache info (fallback path) =====")
    for k, v in _hf_cache_info().items():
        if v:
            log(f"[SaliaDepth] {k} = {v}")
    log(f"[SaliaDepth] Zoe fallback repo id: {ZOE_FALLBACK_REPO_ID}")

    # Local-first
    local_ok = ensure_local_model_files(log)
    if local_ok:
        try:
            log(f"[SaliaDepth] Trying LOCAL model from directory: {MODEL_DIR}")
            return _try_load_pipeline(str(MODEL_DIR), device, log)
        except Exception as e:
            log(f"[SaliaDepth] Local model load FAILED: {repr(e)}")

    # Fallback
    try:
        log(f"[SaliaDepth] Trying ZOE fallback model: {ZOE_FALLBACK_REPO_ID}")
        return _try_load_pipeline(ZOE_FALLBACK_REPO_ID, device, log)
    except Exception as e:
        log(f"[SaliaDepth] Zoe fallback load FAILED: {repr(e)}")

    return None


# --------------------------------------------------------------------------------------
# Depth inference (Zoe-style)
# --------------------------------------------------------------------------------------

def depth_estimate_zoe_style(
    pipe,
    input_rgb_u8: np.ndarray,
    detect_resolution: int,
    log,
    upscale_method: str = "INTER_CUBIC"
) -> np.ndarray:
    """
    Matches your ZoeDetector.__call__ logic very closely.
    Returns uint8 RGB depth map.
    """
    # detect_resolution:
    #   - if -1: keep original but pad-to-64
    #   - else: min-side resize to detect_resolution, then pad-to-64
    if detect_resolution == -1:
        work_img, remove_pad = pad_only_to_64(input_rgb_u8, mode="edge")
        log(f"[SaliaDepth] Preprocess: resolution=-1 (no resize), padded to 64. work={work_img.shape}")
    else:
        work_img, remove_pad = resize_image_with_pad_min_side(
            input_rgb_u8,
            int(detect_resolution),
            upscale_method=upscale_method,
            skip_hwc3=False,
            mode="edge",
            log=log
        )
        log(f"[SaliaDepth] Preprocess: min-side resized to {detect_resolution}, padded to 64. work={work_img.shape}")

    pil_image = Image.fromarray(work_img)

    with torch.no_grad():
        result = pipe(pil_image)
        depth = result["depth"]

        if isinstance(depth, Image.Image):
            depth_array = np.array(depth, dtype=np.float32)
        else:
            depth_array = np.array(depth, dtype=np.float32)

        # EXACT normalization like your Zoe code
        vmin = float(np.percentile(depth_array, 2))
        vmax = float(np.percentile(depth_array, 85))

        log(f"[SaliaDepth] Depth raw stats: shape={depth_array.shape} vmin(p2)={vmin:.6f} vmax(p85)={vmax:.6f} mean={float(depth_array.mean()):.6f}")

        depth_array = depth_array - vmin
        denom = (vmax - vmin)
        if abs(denom) < 1e-12:
            # avoid division by zero; log it
            log("[SaliaDepth] WARN: vmax==vmin; forcing denom epsilon to avoid NaNs.")
            denom = 1e-6
        depth_array = depth_array / denom

        # EXACT invert like your Zoe code
        depth_array = 1.0 - depth_array

        depth_image = (depth_array * 255.0).clip(0, 255).astype(np.uint8)

    detected_map = remove_pad(HWC3(depth_image))
    log(f"[SaliaDepth] Output (post-remove_pad): {detected_map.shape} dtype={detected_map.dtype}")
    return detected_map


def resize_to_original(depth_rgb_u8: np.ndarray, w0: int, h0: int, log) -> np.ndarray:
    """
    Resize depth output back to original input size.
    Use cv2 if available, else PIL.
    """
    try:
        import cv2
        out = cv2.resize(depth_rgb_u8, (w0, h0), interpolation=cv2.INTER_LINEAR)
        return out.astype(np.uint8)
    except Exception as e:
        log(f"[SaliaDepth] WARN: cv2 resize failed ({repr(e)}); using PIL.")
        pil = Image.fromarray(depth_rgb_u8)
        pil = pil.resize((w0, h0), resample=Image.BILINEAR)
        return np.array(pil, dtype=np.uint8)


# --------------------------------------------------------------------------------------
# ComfyUI Node
# --------------------------------------------------------------------------------------

class Salia_Depth_Preprocessor:
    @classmethod
    def INPUT_TYPES(cls):
        return {
            "required": {
                "image": ("IMAGE",),
                # note: default -1, min -1
                "resolution": ("INT", {"default": -1, "min": -1, "max": 8192, "step": 1}),
            }
        }

    # 2 outputs: image + log string
    RETURN_TYPES = ("IMAGE", "STRING")
    FUNCTION = "execute"
    CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"

    def execute(self, image, resolution=-1):
        lines, log = _make_logger()
        log("[SaliaDepth] ==================================================")
        log("[SaliaDepth] SaliaDepthPreprocessor starting")
        log(f"[SaliaDepth] resolution input = {resolution}")

        # Get torch device
        try:
            device = model_management.get_torch_device()
        except Exception as e:
            device = torch.device("cpu")
            log(f"[SaliaDepth] WARN: model_management.get_torch_device failed: {repr(e)} -> using CPU")

        log(f"[SaliaDepth] torch device = {device}")

        # Load pipeline
        pipe = None
        try:
            pipe = get_depth_pipeline(device, log)
        except Exception as e:
            log(f"[SaliaDepth] ERROR: get_depth_pipeline crashed: {repr(e)}")
            pipe = None

        if pipe is None:
            log("[SaliaDepth] FATAL: No pipeline available. Returning input image unchanged.")
            return (image, "\n".join(lines))

        # Batch support
        if image.ndim == 3:
            image = image.unsqueeze(0)

        outs = []
        for i in range(image.shape[0]):
            try:
                # Original dimensions
                h0 = int(image[i].shape[0])
                w0 = int(image[i].shape[1])
                c0 = int(image[i].shape[2])
                log(f"[SaliaDepth] ---- Batch index {i} input shape = ({h0},{w0},{c0}) ----")

                inp_u8 = comfy_tensor_to_u8(image[i])

                # RGBA rule (pre)
                rgb_for_depth, alpha_u8 = composite_rgba_over_white_keep_alpha(inp_u8)
                had_rgba = alpha_u8 is not None
                log(f"[SaliaDepth] had_rgba={had_rgba}")

                # Run depth (Zoe-style)
                depth_rgb = depth_estimate_zoe_style(
                    pipe=pipe,
                    input_rgb_u8=rgb_for_depth,
                    detect_resolution=int(resolution),
                    log=log,
                    upscale_method="INTER_CUBIC"
                )

                # Resize back to original input size
                depth_rgb = resize_to_original(depth_rgb, w0=w0, h0=h0, log=log)

                # RGBA rule (post)
                if had_rgba:
                    # Use original alpha at original size.
                    # If alpha size differs, resize alpha to match.
                    if alpha_u8.shape[0] != h0 or alpha_u8.shape[1] != w0:
                        log("[SaliaDepth] Alpha size mismatch; resizing alpha to original size.")
                        try:
                            import cv2
                            alpha_u8 = cv2.resize(alpha_u8, (w0, h0), interpolation=cv2.INTER_LINEAR).astype(np.uint8)
                        except Exception:
                            pil_a = Image.fromarray(alpha_u8)
                            pil_a = pil_a.resize((w0, h0), resample=Image.BILINEAR)
                            alpha_u8 = np.array(pil_a, dtype=np.uint8)

                    # "Put alpha on RGB turning it into RGBA, then put BLACK background behind it, then back to RGB"
                    depth_rgb = apply_alpha_then_black_background(depth_rgb, alpha_u8)
                    log("[SaliaDepth] Applied RGBA post-step (alpha + black background).")

                outs.append(u8_to_comfy_tensor(depth_rgb))

            except Exception as e:
                log(f"[SaliaDepth] ERROR: Inference failed at batch index {i}: {repr(e)}")
                log("[SaliaDepth] Passing through original input image for this batch item.")
                outs.append(image[i].unsqueeze(0))

        out = torch.cat(outs, dim=0)
        log("[SaliaDepth] Done.")
        return (out, "\n".join(lines))


NODE_CLASS_MAPPINGS = {
    "SaliaDepthPreprocessor": Salia_Depth_Preprocessor
}

NODE_DISPLAY_NAME_MAPPINGS = {
    "SaliaDepthPreprocessor": "Salia Depth (local assets/depth + logs)"
}