saliacoel
/

depth

Safetensors

zoedepth

Model card Files Files and versions

xet

Community

saliacoel commited on 8 days ago

Commit

f1929d6

verified ·

1 Parent(s): 1cc0ed9

Upload salia_depth.py

Browse files

Files changed (1) hide show

salia_depth.py +483 -221

salia_depth.py CHANGED Viewed

@@ -1,10 +1,8 @@
-from __future__ import annotations
 import os
 import shutil
 import urllib.request
 from pathlib import Path
-from typing import Dict, Tuple, Optional, List
 import numpy as np
 import torch
@@ -12,248 +10,492 @@ from PIL import Image
 import comfy.model_management as model_management
 try:
-    import cv2
-except Exception:
-    cv2 = None
-# -----------------------------
-# Paths / URLs (per your spec)
-# -----------------------------
-# nodes/Salia_Depth.py -> comfyui-salia_online/
-PLUGIN_ROOT = Path(__file__).resolve().parents[1]
-# MUST be assets/depth (not assets/assets, not assets/)
-ASSETS_DEPTH_DIR = PLUGIN_ROOT / "assets" / "depth"
-REQUIRED_FILES = ["config.json", "preprocessor_config.json", "model.safetensors"]
-HF_BASE = "https://huggingface.co/saliacoel/depth/resolve/main"
-FILE_URLS = {
-    "config.json": f"{HF_BASE}/config.json",
-    "preprocessor_config.json": f"{HF_BASE}/preprocessor_config.json",
-    "model.safetensors": f"{HF_BASE}/model.safetensors",
 }
-# Fallback “zoe-path”
-FALLBACK_ZOE_REPO = "Intel/zoedepth-nyu-kitti"
-# -----------------------------
-# Global model cache
-# -----------------------------
-# key: (device_str, source_id) -> (processor, model)
-_MODEL_CACHE: Dict[Tuple[str, str], Tuple[object, object]] = {}
-# -----------------------------
-# Utility
-# -----------------------------
-def _ensure_dir(p: Path) -> None:
-    p.mkdir(parents=True, exist_ok=True)
-def _file_ok(p: Path) -> bool:
-    # existence + non-empty is a good baseline against partial downloads
-    return p.exists() and p.is_file() and p.stat().st_size > 0
-def _have_local_files() -> bool:
-    return all(_file_ok(ASSETS_DEPTH_DIR / f) for f in REQUIRED_FILES)
-def _download_file(url: str, dst: Path, timeout: int = 60) -> None:
     """
-    Download url -> dst atomically (tmp + replace).
-    Raises on failure.
     """
-    _ensure_dir(dst.parent)
     tmp = dst.with_suffix(dst.suffix + ".tmp")
-    req = urllib.request.Request(url, headers={"User-Agent": "ComfyUI-Salia-Depth/1.0"})
     with urllib.request.urlopen(req, timeout=timeout) as r, open(tmp, "wb") as f:
         shutil.copyfileobj(r, f)
-    if not _file_ok(tmp):
-        raise RuntimeError(f"Downloaded file is empty/corrupt: {tmp}")
-    os.replace(tmp, dst)
-def _ensure_local_model_files() -> bool:
     """
-    Ensure the 3 required files exist in assets/depth.
-    Returns True if available afterwards, False if download failed.
     """
-    _ensure_dir(ASSETS_DEPTH_DIR)
-    # already present
-    if _have_local_files():
         return True
-    # try download missing ones
     try:
-        for fname in REQUIRED_FILES:
-            dst = ASSETS_DEPTH_DIR / fname
-            if not _file_ok(dst):
-                _download_file(FILE_URLS[fname], dst)
-        return _have_local_files()
     except Exception as e:
-        print(f"[SaliaDepth] Download from saliacoel/depth failed: {e}")
         return False
-def _resize_max_side_uint8(img_u8: np.ndarray, max_side: int) -> np.ndarray:
     """
-    Resize uint8 HWC so that max(H,W) == max_side, keep aspect ratio.
-    If max_side <= 0 or already matches, returns original.
     """
-    if max_side <= 0:
-        return img_u8
-    h, w = img_u8.shape[:2]
-    cur_max = max(h, w)
-    if cur_max == 0 or cur_max == max_side:
-        return img_u8
-    scale = float(max_side) / float(cur_max)
-    new_w = max(1, int(round(w * scale)))
-    new_h = max(1, int(round(h * scale)))
-    if cv2 is not None:
-        interp = cv2.INTER_AREA if scale < 1 else cv2.INTER_CUBIC
-        return cv2.resize(img_u8, (new_w, new_h), interpolation=interp)
-    # PIL fallback
-    pil = Image.fromarray(img_u8)
-    resample = Image.Resampling.LANCZOS if scale < 1 else Image.Resampling.BICUBIC
-    pil = pil.resize((new_w, new_h), resample=resample)
-    return np.array(pil, dtype=np.uint8)
-def _depth_to_hint_rgb(depth_2d: np.ndarray) -> np.ndarray:
     """
-    Normalize depth to a ControlNet-style grayscale RGB hint.
-    Uses percentile normalization (2..85) and inverts.
     """
-    d = depth_2d.astype(np.float32)
-    if not np.isfinite(d).all():
-        d = np.nan_to_num(d, nan=0.0, posinf=0.0, neginf=0.0)
-    vmin = np.percentile(d, 2)
-    vmax = np.percentile(d, 85)
-    denom = max(vmax - vmin, 1e-6)
-    dn = (d - vmin) / denom
-    dn = np.clip(dn, 0.0, 1.0)
-    dn = 1.0 - dn
-    u8 = (dn * 255.0).round().clip(0, 255).astype(np.uint8)
-    return np.stack([u8, u8, u8], axis=-1)
-def _comfy_tensor_to_uint8_hwc(img: torch.Tensor) -> np.ndarray:
     """
-    ComfyUI IMAGE: float [0..1], shape [H,W,3]
-    -> uint8 HWC
     """
-    x = img.detach()
-    if x.is_cuda:
-        x = x.cpu()
-    x = x.float().clamp(0, 1).numpy()
-    return (x * 255.0).round().clip(0, 255).astype(np.uint8)
-def _uint8_hwc_to_comfy_tensor(img_u8: np.ndarray) -> torch.Tensor:
     """
-    uint8 HWC -> float32 tensor HWC [0..1]
     """
-    return torch.from_numpy(img_u8.astype(np.float32) / 255.0)
-def _post_process_depth(processor, outputs, target_h: int, target_w: int) -> np.ndarray:
     """
-    Transformers API compatibility shim.
-    Some versions use target_sizes, some source_sizes.
-    Returns depth as float32 HxW.
     """
-    # Try the most common signature first
-    try:
-        post = processor.post_process_depth_estimation(outputs, target_sizes=[(target_h, target_w)])
-    except TypeError:
-        post = processor.post_process_depth_estimation(outputs, source_sizes=[(target_h, target_w)])
-    # expected: list[{"predicted_depth": tensor[H,W]}]
-    depth_t = post[0]["predicted_depth"]
-    return depth_t.detach().float().cpu().numpy()
-def _load_zoedepth_from_local(device: torch.device):
-    """
-    Load ZoeDepth from ASSETS_DEPTH_DIR (offline).
-    """
-    from transformers import AutoImageProcessor, ZoeDepthForDepthEstimation
-    key = (str(device), f"local::{ASSETS_DEPTH_DIR}")
-    if key in _MODEL_CACHE:
-        return _MODEL_CACHE[key]
-    processor = AutoImageProcessor.from_pretrained(str(ASSETS_DEPTH_DIR), local_files_only=True)
-    model = ZoeDepthForDepthEstimation.from_pretrained(str(ASSETS_DEPTH_DIR), local_files_only=True)
-    model.eval().to(device)
-    _MODEL_CACHE[key] = (processor, model)
-    return processor, model
-def _load_zoedepth_fallback(device: torch.device):
     """
-    Load ZoeDepth from HF (zoe-path fallback).
     """
-    from transformers import AutoImageProcessor, ZoeDepthForDepthEstimation
-    key = (str(device), f"hf::{FALLBACK_ZOE_REPO}")
-    if key in _MODEL_CACHE:
-        return _MODEL_CACHE[key]
-    processor = AutoImageProcessor.from_pretrained(FALLBACK_ZOE_REPO)
-    model = ZoeDepthForDepthEstimation.from_pretrained(FALLBACK_ZOE_REPO)
-    model.eval().to(device)
-    _MODEL_CACHE[key] = (processor, model)
-    return processor, model
-def _get_model(device: torch.device):
     """
-    1) Try local assets/depth (download if missing)
-    2) If that fails -> zoe-path fallback
-    3) If that fails -> return None
     """
     # Local-first
     try:
-        if _ensure_local_model_files():
-            try:
-                return _load_zoedepth_from_local(device)
-            except Exception as e:
-                print(f"[SaliaDepth] Local load failed (assets/depth). Will fallback to zoe-path. Error: {e}")
     except Exception as e:
-        print(f"[SaliaDepth] Local ensure/load unexpected error. Fallback to zoe-path. Error: {e}")
-    # Fallback: zoe-path
     try:
-        return _load_zoedepth_fallback(device)
     except Exception as e:
-        print(f"[SaliaDepth] Zoe fallback load failed. Will passthrough image. Error: {e}")
-        return None
-# -----------------------------
 # ComfyUI Node
-# -----------------------------
 class Salia_Depth_Preprocessor:
     @classmethod
@@ -261,83 +503,103 @@ class Salia_Depth_Preprocessor:
         return {
             "required": {
                 "image": ("IMAGE",),
-                # note 5: default -1, min -1
                 "resolution": ("INT", {"default": -1, "min": -1, "max": 8192, "step": 1}),
             }
         }
-    RETURN_TYPES = ("IMAGE",)
     FUNCTION = "execute"
     CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
-    def execute(self, image: torch.Tensor, resolution: int = -1):
-        """
-        If anything fails:
-          - return (image,) passthrough
-        """
-        # Basic shape validation; if weird, passthrough
         try:
-            if image.dim() != 4 or image.shape[-1] != 3:
-                print(f"[SaliaDepth] Unexpected input IMAGE shape {tuple(image.shape)}; passthrough.")
-                return (image,)
-        except Exception:
-            return (image,)
-        device = model_management.get_torch_device()
-        model_pack = _get_model(device)
-        if model_pack is None:
-            return (image,)
-        processor, model = model_pack
-        outs: List[torch.Tensor] = []
-        for b in range(image.shape[0]):
             try:
-                # input in original size
-                img_u8 = _comfy_tensor_to_uint8_hwc(image[b])
-                h0, w0 = img_u8.shape[0], img_u8.shape[1]
-                # note 5: if -1, use bigger side (max(w,h))
-                max_side = max(w0, h0) if resolution == -1 else int(resolution)
-                # resize for inference (max side rule)
-                img_inf = _resize_max_side_uint8(img_u8, max_side=max_side)
-                pil = Image.fromarray(img_inf)
-                # preprocess
-                inputs = processor(images=pil, return_tensors="pt")
-                pixel_values = inputs["pixel_values"].to(device)
-                with torch.inference_mode():
-                    outputs = model(pixel_values=pixel_values)
-                # postprocess back to inference image size
-                depth_np = _post_process_depth(processor, outputs, pil.height, pil.width)
-                # depth -> grayscale RGB hint
-                hint_rgb = _depth_to_hint_rgb(depth_np)
-                # resize hint back to original size
-                if hint_rgb.shape[0] != h0 or hint_rgb.shape[1] != w0:
-                    if cv2 is not None:
-                        hint_rgb = cv2.resize(hint_rgb, (w0, h0), interpolation=cv2.INTER_CUBIC)
-                    else:
-                        hint_rgb = np.array(
-                            Image.fromarray(hint_rgb).resize((w0, h0), resample=Image.Resampling.BICUBIC),
-                            dtype=np.uint8
-                        )
-                outs.append(_uint8_hwc_to_comfy_tensor(hint_rgb))
             except Exception as e:
-                # Per-image failure -> passthrough that image (keeps batch size consistent)
-                print(f"[SaliaDepth] Inference failed on batch index {b}; passthrough that frame. Error: {e}")
-                outs.append(image[b].detach().cpu() if image[b].is_cuda else image[b])
-        out_batch = torch.stack(outs, dim=0)
-        return (out_batch,)
 NODE_CLASS_MAPPINGS = {
@@ -345,5 +607,5 @@ NODE_CLASS_MAPPINGS = {
 }
 NODE_DISPLAY_NAME_MAPPINGS = {
-    "SaliaDepthPreprocessor": "Salia Depth"
 }

 import os
 import shutil
 import urllib.request
 from pathlib import Path
+from typing import Dict, Tuple, Any, Optional, List
 import numpy as np
 import torch
 import comfy.model_management as model_management
+# transformers is required for depth-estimation pipeline
 try:
+    from transformers import pipeline
+except Exception as e:
+    pipeline = None
+    _TRANSFORMERS_IMPORT_ERROR = e
+# --------------------------------------------------------------------------------------
+# Paths / sources
+# --------------------------------------------------------------------------------------
+# This file: comfyui-salia_online/nodes/Salia_Depth.py
+# Plugin root: comfyui-salia_online/
+PLUGIN_ROOT = Path(__file__).resolve().parent.parent
+# Requested local path: assets/depth
+MODEL_DIR = PLUGIN_ROOT / "assets" / "depth"
+MODEL_DIR.mkdir(parents=True, exist_ok=True)
+REQUIRED_FILES = {
+    "config.json": "https://huggingface.co/saliacoel/depth/resolve/main/config.json",
+    "model.safetensors": "https://huggingface.co/saliacoel/depth/resolve/main/model.safetensors",
+    "preprocessor_config.json": "https://huggingface.co/saliacoel/depth/resolve/main/preprocessor_config.json",
 }
+# "zoe-path" fallback
+ZOE_FALLBACK_REPO_ID = "Intel/zoedepth-nyu-kitti"
+# --------------------------------------------------------------------------------------
+# Logging helpers
+# --------------------------------------------------------------------------------------
+def _make_logger() -> Tuple[List[str], Any]:
+    lines: List[str] = []
+    def log(msg: str):
+        # console
+        try:
+            print(msg)
+        except Exception:
+            pass
+        # UI string
+        lines.append(str(msg))
+    return lines, log
+def _fmt_bytes(n: Optional[int]) -> str:
+    if n is None:
+        return "?"
+    # simple readable
+    for unit in ["B", "KB", "MB", "GB", "TB"]:
+        if n < 1024:
+            return f"{n:.0f}{unit}"
+        n /= 1024.0
+    return f"{n:.1f}PB"
+def _file_size(path: Path) -> Optional[int]:
+    try:
+        return path.stat().st_size
+    except Exception:
+        return None
+def _hf_cache_info() -> Dict[str, str]:
+    info: Dict[str, str] = {}
+    info["env.HF_HOME"] = os.environ.get("HF_HOME", "")
+    info["env.HF_HUB_CACHE"] = os.environ.get("HF_HUB_CACHE", "")
+    info["env.TRANSFORMERS_CACHE"] = os.environ.get("TRANSFORMERS_CACHE", "")
+    info["env.HUGGINGFACE_HUB_CACHE"] = os.environ.get("HUGGINGFACE_HUB_CACHE", "")
+    try:
+        from huggingface_hub import constants as hf_constants
+        # These exist in most hub versions:
+        info["huggingface_hub.constants.HF_HOME"] = str(getattr(hf_constants, "HF_HOME", ""))
+        info["huggingface_hub.constants.HF_HUB_CACHE"] = str(getattr(hf_constants, "HF_HUB_CACHE", ""))
+    except Exception:
+        pass
+    return info
+# --------------------------------------------------------------------------------------
+# Download helpers
+# --------------------------------------------------------------------------------------
+def _have_required_files() -> bool:
+    return all((MODEL_DIR / name).exists() for name in REQUIRED_FILES.keys())
+def _download_url_to_file(url: str, dst: Path, timeout: int = 180) -> None:
     """
+    Download with atomic temp rename.
     """
+    dst.parent.mkdir(parents=True, exist_ok=True)
     tmp = dst.with_suffix(dst.suffix + ".tmp")
+    if tmp.exists():
+        try:
+            tmp.unlink()
+        except Exception:
+            pass
+    req = urllib.request.Request(url, headers={"User-Agent": "ComfyUI-SaliaDepth/1.1"})
     with urllib.request.urlopen(req, timeout=timeout) as r, open(tmp, "wb") as f:
         shutil.copyfileobj(r, f)
+    tmp.replace(dst)
+def ensure_local_model_files(log) -> bool:
     """
+    Ensure assets/depth contains the 3 files.
+    Returns True if present or downloaded successfully, else False.
     """
+    # Always log expected locations + URLs, even if we don't download.
+    log("[SaliaDepth] ===== Local model file check =====")
+    log(f"[SaliaDepth] Plugin root: {PLUGIN_ROOT}")
+    log(f"[SaliaDepth] Local model dir (on drive): {MODEL_DIR}")
+    for fname, url in REQUIRED_FILES.items():
+        fpath = MODEL_DIR / fname
+        exists = fpath.exists()
+        size = _file_size(fpath) if exists else None
+        log(f"[SaliaDepth]   - {fname}")
+        log(f"[SaliaDepth]       local path: {fpath}  exists={exists}  size={_fmt_bytes(size)}")
+        log(f"[SaliaDepth]       remote url : {url}")
+    if _have_required_files():
+        log("[SaliaDepth] All required local files already exist. No download needed.")
         return True
+    log("[SaliaDepth] One or more local files missing. Attempting download...")
     try:
+        for fname, url in REQUIRED_FILES.items():
+            fpath = MODEL_DIR / fname
+            if fpath.exists():
+                continue
+            log(f"[SaliaDepth] Downloading '{fname}' -> '{fpath}'")
+            _download_url_to_file(url, fpath)
+            log(f"[SaliaDepth] Downloaded '{fname}' size={_fmt_bytes(_file_size(fpath))}")
+        ok = _have_required_files()
+        log(f"[SaliaDepth] Download finished. ok={ok}")
+        return ok
     except Exception as e:
+        log(f"[SaliaDepth] Download failed with error: {repr(e)}")
         return False
+# --------------------------------------------------------------------------------------
+# Exact Zoe-style preprocessing helpers (copied/adapted from your snippet)
+# --------------------------------------------------------------------------------------
+def HWC3(x: np.ndarray) -> np.ndarray:
+    assert x.dtype == np.uint8
+    if x.ndim == 2:
+        x = x[:, :, None]
+    assert x.ndim == 3
+    H, W, C = x.shape
+    assert C == 1 or C == 3 or C == 4
+    if C == 3:
+        return x
+    if C == 1:
+        return np.concatenate([x, x, x], axis=2)
+    # C == 4
+    color = x[:, :, 0:3].astype(np.float32)
+    alpha = x[:, :, 3:4].astype(np.float32) / 255.0
+    y = color * alpha + 255.0 * (1.0 - alpha)  # white background
+    y = y.clip(0, 255).astype(np.uint8)
+    return y
+def pad64(x: int) -> int:
+    return int(np.ceil(float(x) / 64.0) * 64 - x)
+def safer_memory(x: np.ndarray) -> np.ndarray:
+    return np.ascontiguousarray(x.copy()).copy()
+def resize_image_with_pad_min_side(
+    input_image: np.ndarray,
+    resolution: int,
+    upscale_method: str = "INTER_CUBIC",
+    skip_hwc3: bool = False,
+    mode: str = "edge",
+    log=None
+) -> Tuple[np.ndarray, Any]:
     """
+    EXACT behavior like your zoe.transformers.py:
+      k = resolution / min(H,W)
+      resize to (W_target, H_target)
+      pad to multiple of 64
+      return padded image and remove_pad() closure
     """
+    # prefer cv2 like original for matching results
+    cv2 = None
+    try:
+        import cv2 as _cv2
+        cv2 = _cv2
+    except Exception:
+        cv2 = None
+        if log:
+            log("[SaliaDepth] WARN: cv2 not available; resizing will use PIL fallback (may change results).")
+    if skip_hwc3:
+        img = input_image
+    else:
+        img = HWC3(input_image)
+    H_raw, W_raw, _ = img.shape
+    if resolution <= 0:
+        # keep original, but still pad to 64 (we will handle padding separately for -1 path)
+        return img, (lambda x: x)
+    k = float(resolution) / float(min(H_raw, W_raw))
+    H_target = int(np.round(float(H_raw) * k))
+    W_target = int(np.round(float(W_raw) * k))
+    if cv2 is not None:
+        upscale_methods = {
+            "INTER_NEAREST": cv2.INTER_NEAREST,
+            "INTER_LINEAR": cv2.INTER_LINEAR,
+            "INTER_AREA": cv2.INTER_AREA,
+            "INTER_CUBIC": cv2.INTER_CUBIC,
+            "INTER_LANCZOS4": cv2.INTER_LANCZOS4,
+        }
+        method = upscale_methods.get(upscale_method, cv2.INTER_CUBIC)
+        img = cv2.resize(img, (W_target, H_target), interpolation=method if k > 1 else cv2.INTER_AREA)
+    else:
+        # PIL fallback
+        pil = Image.fromarray(img)
+        resample = Image.BICUBIC if k > 1 else Image.LANCZOS
+        pil = pil.resize((W_target, H_target), resample=resample)
+        img = np.array(pil, dtype=np.uint8)
+    H_pad, W_pad = pad64(H_target), pad64(W_target)
+    img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode=mode)
+    def remove_pad(x: np.ndarray) -> np.ndarray:
+        return safer_memory(x[:H_target, :W_target, ...])
+    return safer_memory(img_padded), remove_pad
+def pad_only_to_64(img_u8: np.ndarray, mode: str = "edge") -> Tuple[np.ndarray, Any]:
     """
+    For resolution == -1: keep original resolution but still pad to multiples of 64,
+    then provide remove_pad that returns original size.
     """
+    img = HWC3(img_u8)
+    H_raw, W_raw, _ = img.shape
+    H_pad, W_pad = pad64(H_raw), pad64(W_raw)
+    img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode=mode)
+    def remove_pad(x: np.ndarray) -> np.ndarray:
+        return safer_memory(x[:H_raw, :W_raw, ...])
+    return safer_memory(img_padded), remove_pad
+# --------------------------------------------------------------------------------------
+# RGBA rules (as you requested)
+# --------------------------------------------------------------------------------------
+def composite_rgba_over_white_keep_alpha(inp_u8: np.ndarray) -> Tuple[np.ndarray, Optional[np.ndarray]]:
     """
+    If RGBA: return RGB composited over WHITE + alpha_u8 kept separately.
+    If RGB: return input RGB + None alpha.
     """
+    if inp_u8.ndim == 3 and inp_u8.shape[2] == 4:
+        rgba = inp_u8.astype(np.uint8)
+        rgb = rgba[:, :, 0:3].astype(np.float32)
+        a = (rgba[:, :, 3:4].astype(np.float32) / 255.0)
+        rgb_white = (rgb * a + 255.0 * (1.0 - a)).clip(0, 255).astype(np.uint8)
+        alpha_u8 = rgba[:, :, 3].copy()
+        return rgb_white, alpha_u8
+    # force to RGB
+    return HWC3(inp_u8), None
+def apply_alpha_then_black_background(depth_rgb_u8: np.ndarray, alpha_u8: np.ndarray) -> np.ndarray:
     """
+    Requested output rule:
+      - attach alpha to depth (conceptually RGBA)
+      - composite over BLACK
+      - output RGB
+    That is equivalent to depth_rgb * alpha.
     """
+    depth_rgb_u8 = HWC3(depth_rgb_u8)
+    a = (alpha_u8.astype(np.float32) / 255.0)[:, :, None]
+    out = (depth_rgb_u8.astype(np.float32) * a).clip(0, 255).astype(np.uint8)
+    return out
+# --------------------------------------------------------------------------------------
+# ComfyUI conversion helpers
+# --------------------------------------------------------------------------------------
+def comfy_tensor_to_u8(img: torch.Tensor) -> np.ndarray:
     """
+    Comfy IMAGE: float [0..1], shape [H,W,C] or [B,H,W,C]
+    Convert to uint8 HWC.
     """
+    if img.ndim == 4:
+        img = img[0]
+    arr = img.detach().cpu().float().clamp(0, 1).numpy()
+    u8 = (arr * 255.0).round().astype(np.uint8)
+    return u8
+def u8_to_comfy_tensor(img_u8: np.ndarray) -> torch.Tensor:
+    img_u8 = HWC3(img_u8)
+    t = torch.from_numpy(img_u8.astype(np.float32) / 255.0)
+    return t.unsqueeze(0)  # [1,H,W,C]
+# --------------------------------------------------------------------------------------
+# Pipeline loading (local-first, then zoe fallback)
+# --------------------------------------------------------------------------------------
+_PIPE_CACHE: Dict[Tuple[str, str], Any] = {}  # (model_source, device_str) -> pipeline
+def _try_load_pipeline(model_source: str, device: torch.device, log):
     """
+    Use transformers.pipeline like Zoe code does.
+    We intentionally do NOT pass device=... here, and instead move model like Zoe node.
     """
+    if pipeline is None:
+        raise RuntimeError(f"transformers import failed: {_TRANSFORMERS_IMPORT_ERROR}")
+    key = (model_source, str(device))
+    if key in _PIPE_CACHE:
+        log(f"[SaliaDepth] Using cached pipeline for source='{model_source}' device='{device}'")
+        return _PIPE_CACHE[key]
+    log(f"[SaliaDepth] Creating pipeline(task='depth-estimation', model='{model_source}')")
+    p = pipeline(task="depth-estimation", model=model_source)
+    # Try to move model to torch device, like ZoeDetector.to()
+    try:
+        p.model = p.model.to(device)
+        p.device = device  # Zoe code sets this; newer transformers uses torch.device internally
+        log(f"[SaliaDepth] Moved pipeline model to device: {device}")
+    except Exception as e:
+        log(f"[SaliaDepth] WARN: Could not move pipeline model to device {device}: {repr(e)}")
+    # Log config info for debugging
+    try:
+        cfg = p.model.config
+        log(f"[SaliaDepth] Model class: {p.model.__class__.__name__}")
+        log(f"[SaliaDepth] Config class: {cfg.__class__.__name__}")
+        log(f"[SaliaDepth] Config model_type: {getattr(cfg, 'model_type', '')}")
+        log(f"[SaliaDepth] Config _name_or_path: {getattr(cfg, '_name_or_path', '')}")
+    except Exception as e:
+        log(f"[SaliaDepth] WARN: Could not log model config: {repr(e)}")
+    _PIPE_CACHE[key] = p
+    return p
+def get_depth_pipeline(device: torch.device, log):
     """
+    1) Ensure assets/depth files exist (download if missing)
+    2) Try load local dir
+    3) Fallback to Intel/zoedepth-nyu-kitti
+    4) If both fail -> None
     """
+    # Always log HF cache info (helps locate where fallback downloads go)
+    log("[SaliaDepth] ===== Hugging Face cache info (fallback path) =====")
+    for k, v in _hf_cache_info().items():
+        if v:
+            log(f"[SaliaDepth] {k} = {v}")
+    log(f"[SaliaDepth] Zoe fallback repo id: {ZOE_FALLBACK_REPO_ID}")
     # Local-first
+    local_ok = ensure_local_model_files(log)
+    if local_ok:
+        try:
+            log(f"[SaliaDepth] Trying LOCAL model from directory: {MODEL_DIR}")
+            return _try_load_pipeline(str(MODEL_DIR), device, log)
+        except Exception as e:
+            log(f"[SaliaDepth] Local model load FAILED: {repr(e)}")
+    # Fallback
     try:
+        log(f"[SaliaDepth] Trying ZOE fallback model: {ZOE_FALLBACK_REPO_ID}")
+        return _try_load_pipeline(ZOE_FALLBACK_REPO_ID, device, log)
     except Exception as e:
+        log(f"[SaliaDepth] Zoe fallback load FAILED: {repr(e)}")
+    return None
+# --------------------------------------------------------------------------------------
+# Depth inference (Zoe-style)
+# --------------------------------------------------------------------------------------
+def depth_estimate_zoe_style(
+    pipe,
+    input_rgb_u8: np.ndarray,
+    detect_resolution: int,
+    log,
+    upscale_method: str = "INTER_CUBIC"
+) -> np.ndarray:
+    """
+    Matches your ZoeDetector.__call__ logic very closely.
+    Returns uint8 RGB depth map.
+    """
+    # detect_resolution:
+    #   - if -1: keep original but pad-to-64
+    #   - else: min-side resize to detect_resolution, then pad-to-64
+    if detect_resolution == -1:
+        work_img, remove_pad = pad_only_to_64(input_rgb_u8, mode="edge")
+        log(f"[SaliaDepth] Preprocess: resolution=-1 (no resize), padded to 64. work={work_img.shape}")
+    else:
+        work_img, remove_pad = resize_image_with_pad_min_side(
+            input_rgb_u8,
+            int(detect_resolution),
+            upscale_method=upscale_method,
+            skip_hwc3=False,
+            mode="edge",
+            log=log
+        )
+        log(f"[SaliaDepth] Preprocess: min-side resized to {detect_resolution}, padded to 64. work={work_img.shape}")
+    pil_image = Image.fromarray(work_img)
+    with torch.no_grad():
+        result = pipe(pil_image)
+        depth = result["depth"]
+        if isinstance(depth, Image.Image):
+            depth_array = np.array(depth, dtype=np.float32)
+        else:
+            depth_array = np.array(depth, dtype=np.float32)
+        # EXACT normalization like your Zoe code
+        vmin = float(np.percentile(depth_array, 2))
+        vmax = float(np.percentile(depth_array, 85))
+        log(f"[SaliaDepth] Depth raw stats: shape={depth_array.shape} vmin(p2)={vmin:.6f} vmax(p85)={vmax:.6f} mean={float(depth_array.mean()):.6f}")
+        depth_array = depth_array - vmin
+        denom = (vmax - vmin)
+        if abs(denom) < 1e-12:
+            # avoid division by zero; log it
+            log("[SaliaDepth] WARN: vmax==vmin; forcing denom epsilon to avoid NaNs.")
+            denom = 1e-6
+        depth_array = depth_array / denom
+        # EXACT invert like your Zoe code
+        depth_array = 1.0 - depth_array
+        depth_image = (depth_array * 255.0).clip(0, 255).astype(np.uint8)
+    detected_map = remove_pad(HWC3(depth_image))
+    log(f"[SaliaDepth] Output (post-remove_pad): {detected_map.shape} dtype={detected_map.dtype}")
+    return detected_map
+def resize_to_original(depth_rgb_u8: np.ndarray, w0: int, h0: int, log) -> np.ndarray:
+    """
+    Resize depth output back to original input size.
+    Use cv2 if available, else PIL.
+    """
     try:
+        import cv2
+        out = cv2.resize(depth_rgb_u8, (w0, h0), interpolation=cv2.INTER_LINEAR)
+        return out.astype(np.uint8)
     except Exception as e:
+        log(f"[SaliaDepth] WARN: cv2 resize failed ({repr(e)}); using PIL.")
+        pil = Image.fromarray(depth_rgb_u8)
+        pil = pil.resize((w0, h0), resample=Image.BILINEAR)
+        return np.array(pil, dtype=np.uint8)
+# --------------------------------------------------------------------------------------
 # ComfyUI Node
+# --------------------------------------------------------------------------------------
 class Salia_Depth_Preprocessor:
     @classmethod
         return {
             "required": {
                 "image": ("IMAGE",),
+                # note: default -1, min -1
                 "resolution": ("INT", {"default": -1, "min": -1, "max": 8192, "step": 1}),
             }
         }
+    # 2 outputs: image + log string
+    RETURN_TYPES = ("IMAGE", "STRING")
     FUNCTION = "execute"
     CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
+    def execute(self, image, resolution=-1):
+        lines, log = _make_logger()
+        log("[SaliaDepth] ==================================================")
+        log("[SaliaDepth] SaliaDepthPreprocessor starting")
+        log(f"[SaliaDepth] resolution input = {resolution}")
+        # Get torch device
         try:
+            device = model_management.get_torch_device()
+        except Exception as e:
+            device = torch.device("cpu")
+            log(f"[SaliaDepth] WARN: model_management.get_torch_device failed: {repr(e)} -> using CPU")
+        log(f"[SaliaDepth] torch device = {device}")
+        # Load pipeline
+        pipe = None
+        try:
+            pipe = get_depth_pipeline(device, log)
+        except Exception as e:
+            log(f"[SaliaDepth] ERROR: get_depth_pipeline crashed: {repr(e)}")
+            pipe = None
+        if pipe is None:
+            log("[SaliaDepth] FATAL: No pipeline available. Returning input image unchanged.")
+            return (image, "\n".join(lines))
+        # Batch support
+        if image.ndim == 3:
+            image = image.unsqueeze(0)
+        outs = []
+        for i in range(image.shape[0]):
             try:
+                # Original dimensions
+                h0 = int(image[i].shape[0])
+                w0 = int(image[i].shape[1])
+                c0 = int(image[i].shape[2])
+                log(f"[SaliaDepth] ---- Batch index {i} input shape = ({h0},{w0},{c0}) ----")
+                inp_u8 = comfy_tensor_to_u8(image[i])
+                # RGBA rule (pre)
+                rgb_for_depth, alpha_u8 = composite_rgba_over_white_keep_alpha(inp_u8)
+                had_rgba = alpha_u8 is not None
+                log(f"[SaliaDepth] had_rgba={had_rgba}")
+                # Run depth (Zoe-style)
+                depth_rgb = depth_estimate_zoe_style(
+                    pipe=pipe,
+                    input_rgb_u8=rgb_for_depth,
+                    detect_resolution=int(resolution),
+                    log=log,
+                    upscale_method="INTER_CUBIC"
+                )
+                # Resize back to original input size
+                depth_rgb = resize_to_original(depth_rgb, w0=w0, h0=h0, log=log)
+                # RGBA rule (post)
+                if had_rgba:
+                    # Use original alpha at original size.
+                    # If alpha size differs, resize alpha to match.
+                    if alpha_u8.shape[0] != h0 or alpha_u8.shape[1] != w0:
+                        log("[SaliaDepth] Alpha size mismatch; resizing alpha to original size.")
+                        try:
+                            import cv2
+                            alpha_u8 = cv2.resize(alpha_u8, (w0, h0), interpolation=cv2.INTER_LINEAR).astype(np.uint8)
+                        except Exception:
+                            pil_a = Image.fromarray(alpha_u8)
+                            pil_a = pil_a.resize((w0, h0), resample=Image.BILINEAR)
+                            alpha_u8 = np.array(pil_a, dtype=np.uint8)
+                    # "Put alpha on RGB turning it into RGBA, then put BLACK background behind it, then back to RGB"
+                    depth_rgb = apply_alpha_then_black_background(depth_rgb, alpha_u8)
+                    log("[SaliaDepth] Applied RGBA post-step (alpha + black background).")
+                outs.append(u8_to_comfy_tensor(depth_rgb))
             except Exception as e:
+                log(f"[SaliaDepth] ERROR: Inference failed at batch index {i}: {repr(e)}")
+                log("[SaliaDepth] Passing through original input image for this batch item.")
+                outs.append(image[i].unsqueeze(0))
+        out = torch.cat(outs, dim=0)
+        log("[SaliaDepth] Done.")
+        return (out, "\n".join(lines))
 NODE_CLASS_MAPPINGS = {
 }
 NODE_DISPLAY_NAME_MAPPINGS = {
+    "SaliaDepthPreprocessor": "Salia Depth (local assets/depth + logs)"
 }