saliacoel
/

depth

Safetensors

zoedepth

Model card Files Files and versions

xet

Community

saliacoel commited on 8 days ago

Commit

1cc0ed9

verified ·

1 Parent(s): 3d625c7

Upload salia_depth.py

Browse files

Files changed (1) hide show

salia_depth.py +229 -227

salia_depth.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import shutil
 import urllib.request
 from pathlib import Path
-from typing import Dict, Tuple, Any, Optional
 import numpy as np
 import torch
@@ -9,272 +12,248 @@ from PIL import Image
 import comfy.model_management as model_management
-# transformers is required
 try:
-    from transformers import pipeline
-except Exception as e:
-    pipeline = None
-    _TRANSFORMERS_IMPORT_ERROR = e
-# --------------------------------------------------------------------------------------
-# Paths / sources
-# --------------------------------------------------------------------------------------
-# This file: comfyui-salia_online/nodes/Salia_Depth.py
-# Plugin root: comfyui-salia_online/
-PLUGIN_ROOT = Path(__file__).resolve().parent.parent
-# Requested local path: assets/depth
-MODEL_DIR = PLUGIN_ROOT / "assets" / "depth"
-MODEL_DIR.mkdir(parents=True, exist_ok=True)
-REQUIRED_FILES = {
-    "config.json": "https://huggingface.co/saliacoel/depth/resolve/main/config.json",
-    "model.safetensors": "https://huggingface.co/saliacoel/depth/resolve/main/model.safetensors",
-    "preprocessor_config.json": "https://huggingface.co/saliacoel/depth/resolve/main/preprocessor_config.json",
 }
-# "zoe-path" fallback (matches what your current ZoeDetector code pulls)
-ZOE_FALLBACK_REPO_ID = "Intel/zoedepth-nyu-kitti"
-# --------------------------------------------------------------------------------------
-# Download + validation helpers
-# --------------------------------------------------------------------------------------
-def _have_required_files() -> bool:
-    return all((MODEL_DIR / name).exists() for name in REQUIRED_FILES.keys())
-def _download_url_to_file(url: str, dst: Path, timeout: int = 120) -> None:
     """
-    Download with an atomic temp file -> rename.
     """
-    dst.parent.mkdir(parents=True, exist_ok=True)
     tmp = dst.with_suffix(dst.suffix + ".tmp")
-    if tmp.exists():
-        try:
-            tmp.unlink()
-        except Exception:
-            pass
-    req = urllib.request.Request(url, headers={"User-Agent": "ComfyUI-SaliaDepth/1.0"})
     with urllib.request.urlopen(req, timeout=timeout) as r, open(tmp, "wb") as f:
         shutil.copyfileobj(r, f)
-    tmp.replace(dst)
-def ensure_local_model_files() -> bool:
     """
-    Ensure assets/depth contains config.json, model.safetensors, preprocessor_config.json.
-    Returns True if files are present (either already or downloaded).
-    Returns False if download failed.
     """
-    if _have_required_files():
-        return True
-    print("[SaliaDepth] Local model files missing in:", str(MODEL_DIR))
-    print("[SaliaDepth] Attempting to download required files from saliacoel/depth ...")
     try:
-        for fname, url in REQUIRED_FILES.items():
-            fpath = MODEL_DIR / fname
-            if fpath.exists():
-                continue
-            print(f"[SaliaDepth] Downloading {fname} ...")
-            _download_url_to_file(url, fpath)
-        ok = _have_required_files()
-        print(f"[SaliaDepth] Download complete. ok={ok}")
-        return ok
     except Exception as e:
-        print("[SaliaDepth] Download failed:", repr(e))
         return False
-# --------------------------------------------------------------------------------------
-# Pipeline cache / load
-# --------------------------------------------------------------------------------------
-_PIPE_CACHE: Dict[Tuple[str, str], Any] = {}  # (model_source, device_str) -> pipeline
-def _pipeline_device_arg(device: torch.device) -> int:
-    # transformers.pipeline: device=-1 for CPU, 0..N for CUDA index
-    if device.type == "cuda":
-        return int(device.index) if device.index is not None else 0
-    return -1
-def _try_load_pipeline(model_source: str, device: torch.device):
     """
-    model_source can be:
-      - local directory path (string)
-      - HF repo id
     """
-    if pipeline is None:
-        raise RuntimeError(f"transformers import failed: {_TRANSFORMERS_IMPORT_ERROR}")
-    key = (model_source, str(device))
-    if key in _PIPE_CACHE:
-        return _PIPE_CACHE[key]
-    dev_arg = _pipeline_device_arg(device)
-    print(f"[SaliaDepth] Loading depth-estimation pipeline from '{model_source}' (device={dev_arg})")
-    p = pipeline(task="depth-estimation", model=model_source, device=dev_arg)
-    # If Comfy gives MPS (mac), pipeline device arg is -1; try moving model anyway.
-    try:
-        p.model = p.model.to(device)
-    except Exception:
-        pass
-    _PIPE_CACHE[key] = p
-    return p
-def get_depth_pipeline(device: torch.device):
     """
-    1) Try local assets/depth (download if missing)
-    2) Fallback to zoe-path Intel/zoedepth-nyu-kitti
-    3) If both fail -> return None
     """
-    # 1) local-first
-    if ensure_local_model_files():
-        try:
-            return _try_load_pipeline(str(MODEL_DIR), device)
-        except Exception as e:
-            print("[SaliaDepth] Local model load failed:", repr(e))
-    # 2) zoe fallback
-    try:
-        print("[SaliaDepth] Falling back to Zoe path:", ZOE_FALLBACK_REPO_ID)
-        return _try_load_pipeline(ZOE_FALLBACK_REPO_ID, device)
-    except Exception as e:
-        print("[SaliaDepth] Zoe fallback load failed:", repr(e))
-    # 3) total failure
-    return None
-# --------------------------------------------------------------------------------------
-# Image utilities
-# --------------------------------------------------------------------------------------
-def _hwc3(x: np.ndarray) -> np.ndarray:
-    assert x.dtype == np.uint8
-    if x.ndim == 2:
-        x = x[:, :, None]
-    if x.shape[2] == 1:
-        return np.concatenate([x, x, x], axis=2)
-    if x.shape[2] == 3:
-        return x
-    if x.shape[2] == 4:
-        color = x[:, :, 0:3].astype(np.float32)
-        alpha = x[:, :, 3:4].astype(np.float32) / 255.0
-        y = color * alpha + 255.0 * (1.0 - alpha)
-        return y.clip(0, 255).astype(np.uint8)
-    raise ValueError("Unexpected channel count")
-def _pad64(n: int) -> int:
-    return int(np.ceil(float(n) / 64.0) * 64 - n)
-def _resize_long_side(image_u8: np.ndarray, long_side: int) -> np.ndarray:
     """
-    Resize so that max(H,W) == long_side. If long_side equals current long side -> no change.
     """
-    h, w = image_u8.shape[:2]
-    cur_long = max(h, w)
-    if long_side <= 0 or long_side == cur_long:
-        return image_u8
-    scale = float(long_side) / float(cur_long)
-    new_w = int(round(w * scale))
-    new_h = int(round(h * scale))
-    pil = Image.fromarray(image_u8)
-    # Downscale with LANCZOS, upscale with BICUBIC
-    resample = Image.BICUBIC if scale > 1.0 else Image.LANCZOS
-    pil = pil.resize((new_w, new_h), resample=resample)
-    return np.array(pil, dtype=np.uint8)
-def _pad_to_64(image_u8: np.ndarray, mode: str = "edge"):
-    h, w = image_u8.shape[:2]
-    hp = _pad64(h)
-    wp = _pad64(w)
-    padded = np.pad(image_u8, ((0, hp), (0, wp), (0, 0)), mode=mode)
-    def remove_pad(x: np.ndarray) -> np.ndarray:
-        return x[:h, :w, :]
-    return padded, remove_pad
-def _comfy_to_u8(img: torch.Tensor) -> np.ndarray:
     """
-    Comfy IMAGE is float [0..1], shape [H,W,C] or [B,H,W,C]
     """
-    if img.ndim == 4:
-        img = img[0]
-    img = img.detach().cpu().float().clamp(0, 1)
-    arr = (img.numpy() * 255.0).round().astype(np.uint8)
-    return arr
-def _u8_to_comfy(img_u8: np.ndarray) -> torch.Tensor:
-    img_u8 = _hwc3(img_u8)
-    t = torch.from_numpy(img_u8.astype(np.float32) / 255.0)
-    return t.unsqueeze(0)  # [1,H,W,C]
-def _depth_to_uint8(pipe, input_u8: np.ndarray, detect_long_side: int) -> np.ndarray:
     """
-    Run depth estimation:
-    - resize (long side)
-    - pad to 64
-    - infer
-    - normalize (percentiles like your zoe code)
-    - remove pad
-    - return 3-channel uint8
     """
-    input_u8 = _hwc3(input_u8)
-    resized = _resize_long_side(input_u8, detect_long_side)
-    padded, remove_pad = _pad_to_64(resized, mode="edge")
-    pil = Image.fromarray(padded)
-    with torch.no_grad():
-        result = pipe(pil)
-        depth = result["depth"]
-        if isinstance(depth, Image.Image):
-            depth_arr = np.array(depth, dtype=np.float32)
-        else:
-            depth_arr = np.array(depth, dtype=np.float32)
-        vmin = np.percentile(depth_arr, 2)
-        vmax = np.percentile(depth_arr, 85)
-        denom = (vmax - vmin) if (vmax - vmin) > 1e-6 else 1e-6
-        depth_arr = (depth_arr - vmin) / denom
-        depth_arr = 1.0 - depth_arr
-        depth_u8 = (depth_arr * 255.0).clip(0, 255).astype(np.uint8)
-    depth_rgb = _hwc3(depth_u8)
-    depth_rgb = remove_pad(depth_rgb)
-    return depth_rgb
-# --------------------------------------------------------------------------------------
 # ComfyUI Node
-# --------------------------------------------------------------------------------------
 class Salia_Depth_Preprocessor:
     @classmethod
@@ -291,51 +270,74 @@ class Salia_Depth_Preprocessor:
     FUNCTION = "execute"
     CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
-    def execute(self, image, resolution=-1):
         """
-        If everything fails (local model + zoe fallback), return input image unchanged.
         """
         try:
-            device = model_management.get_torch_device()
         except Exception:
-            device = torch.device("cpu")
-        pipe = get_depth_pipeline(device)
-        if pipe is None:
-            # Hard fail: return input image unchanged
-            print("[SaliaDepth] No pipeline available. Returning input image unchanged.")
             return (image,)
-        # Batch support: image is [B,H,W,C]
-        if image.ndim == 3:
-            image = image.unsqueeze(0)
-        outs = []
-        for i in range(image.shape[0]):
-            # original size
-            h0 = int(image[i].shape[0])
-            w0 = int(image[i].shape[1])
-            long_side = max(w0, h0)
-            detect_long_side = long_side if int(resolution) == -1 else int(resolution)
             try:
-                inp_u8 = _comfy_to_u8(image[i])
-                depth_u8 = _depth_to_uint8(pipe, inp_u8, detect_long_side)
-                # resize depth back to original input size
-                pil = Image.fromarray(depth_u8)
-                pil = pil.resize((w0, h0), resample=Image.BILINEAR)
-                depth_u8 = np.array(pil, dtype=np.uint8)
-                outs.append(_u8_to_comfy(depth_u8))
             except Exception as e:
-                # Per-image fail: return that image unchanged
-                print(f"[SaliaDepth] Inference failed for batch index {i}: {repr(e)}. Passing through input.")
-                outs.append(image[i].unsqueeze(0))
-        out = torch.cat(outs, dim=0)
-        return (out,)
 NODE_CLASS_MAPPINGS = {
@@ -343,5 +345,5 @@ NODE_CLASS_MAPPINGS = {
 }
 NODE_DISPLAY_NAME_MAPPINGS = {
-    "SaliaDepthPreprocessor": "Salia Depth (assets/depth local-first)"
 }

+from __future__ import annotations
+import os
 import shutil
 import urllib.request
 from pathlib import Path
+from typing import Dict, Tuple, Optional, List
 import numpy as np
 import torch
 import comfy.model_management as model_management
 try:
+    import cv2
+except Exception:
+    cv2 = None
+# -----------------------------
+# Paths / URLs (per your spec)
+# -----------------------------
+# nodes/Salia_Depth.py -> comfyui-salia_online/
+PLUGIN_ROOT = Path(__file__).resolve().parents[1]
+# MUST be assets/depth (not assets/assets, not assets/)
+ASSETS_DEPTH_DIR = PLUGIN_ROOT / "assets" / "depth"
+REQUIRED_FILES = ["config.json", "preprocessor_config.json", "model.safetensors"]
+HF_BASE = "https://huggingface.co/saliacoel/depth/resolve/main"
+FILE_URLS = {
+    "config.json": f"{HF_BASE}/config.json",
+    "preprocessor_config.json": f"{HF_BASE}/preprocessor_config.json",
+    "model.safetensors": f"{HF_BASE}/model.safetensors",
 }
+# Fallback “zoe-path”
+FALLBACK_ZOE_REPO = "Intel/zoedepth-nyu-kitti"
+# -----------------------------
+# Global model cache
+# -----------------------------
+# key: (device_str, source_id) -> (processor, model)
+_MODEL_CACHE: Dict[Tuple[str, str], Tuple[object, object]] = {}
+# -----------------------------
+# Utility
+# -----------------------------
+def _ensure_dir(p: Path) -> None:
+    p.mkdir(parents=True, exist_ok=True)
+def _file_ok(p: Path) -> bool:
+    # existence + non-empty is a good baseline against partial downloads
+    return p.exists() and p.is_file() and p.stat().st_size > 0
+def _have_local_files() -> bool:
+    return all(_file_ok(ASSETS_DEPTH_DIR / f) for f in REQUIRED_FILES)
+def _download_file(url: str, dst: Path, timeout: int = 60) -> None:
     """
+    Download url -> dst atomically (tmp + replace).
+    Raises on failure.
     """
+    _ensure_dir(dst.parent)
     tmp = dst.with_suffix(dst.suffix + ".tmp")
+    req = urllib.request.Request(url, headers={"User-Agent": "ComfyUI-Salia-Depth/1.0"})
     with urllib.request.urlopen(req, timeout=timeout) as r, open(tmp, "wb") as f:
         shutil.copyfileobj(r, f)
+    if not _file_ok(tmp):
+        raise RuntimeError(f"Downloaded file is empty/corrupt: {tmp}")
+    os.replace(tmp, dst)
+def _ensure_local_model_files() -> bool:
     """
+    Ensure the 3 required files exist in assets/depth.
+    Returns True if available afterwards, False if download failed.
     """
+    _ensure_dir(ASSETS_DEPTH_DIR)
+    # already present
+    if _have_local_files():
+        return True
+    # try download missing ones
     try:
+        for fname in REQUIRED_FILES:
+            dst = ASSETS_DEPTH_DIR / fname
+            if not _file_ok(dst):
+                _download_file(FILE_URLS[fname], dst)
+        return _have_local_files()
     except Exception as e:
+        print(f"[SaliaDepth] Download from saliacoel/depth failed: {e}")
         return False
+def _resize_max_side_uint8(img_u8: np.ndarray, max_side: int) -> np.ndarray:
     """
+    Resize uint8 HWC so that max(H,W) == max_side, keep aspect ratio.
+    If max_side <= 0 or already matches, returns original.
     """
+    if max_side <= 0:
+        return img_u8
+    h, w = img_u8.shape[:2]
+    cur_max = max(h, w)
+    if cur_max == 0 or cur_max == max_side:
+        return img_u8
+    scale = float(max_side) / float(cur_max)
+    new_w = max(1, int(round(w * scale)))
+    new_h = max(1, int(round(h * scale)))
+    if cv2 is not None:
+        interp = cv2.INTER_AREA if scale < 1 else cv2.INTER_CUBIC
+        return cv2.resize(img_u8, (new_w, new_h), interpolation=interp)
+    # PIL fallback
+    pil = Image.fromarray(img_u8)
+    resample = Image.Resampling.LANCZOS if scale < 1 else Image.Resampling.BICUBIC
+    pil = pil.resize((new_w, new_h), resample=resample)
+    return np.array(pil, dtype=np.uint8)
+def _depth_to_hint_rgb(depth_2d: np.ndarray) -> np.ndarray:
     """
+    Normalize depth to a ControlNet-style grayscale RGB hint.
+    Uses percentile normalization (2..85) and inverts.
     """
+    d = depth_2d.astype(np.float32)
+    if not np.isfinite(d).all():
+        d = np.nan_to_num(d, nan=0.0, posinf=0.0, neginf=0.0)
+    vmin = np.percentile(d, 2)
+    vmax = np.percentile(d, 85)
+    denom = max(vmax - vmin, 1e-6)
+    dn = (d - vmin) / denom
+    dn = np.clip(dn, 0.0, 1.0)
+    dn = 1.0 - dn
+    u8 = (dn * 255.0).round().clip(0, 255).astype(np.uint8)
+    return np.stack([u8, u8, u8], axis=-1)
+def _comfy_tensor_to_uint8_hwc(img: torch.Tensor) -> np.ndarray:
+    """
+    ComfyUI IMAGE: float [0..1], shape [H,W,3]
+    -> uint8 HWC
+    """
+    x = img.detach()
+    if x.is_cuda:
+        x = x.cpu()
+    x = x.float().clamp(0, 1).numpy()
+    return (x * 255.0).round().clip(0, 255).astype(np.uint8)
+def _uint8_hwc_to_comfy_tensor(img_u8: np.ndarray) -> torch.Tensor:
     """
+    uint8 HWC -> float32 tensor HWC [0..1]
     """
+    return torch.from_numpy(img_u8.astype(np.float32) / 255.0)
+def _post_process_depth(processor, outputs, target_h: int, target_w: int) -> np.ndarray:
+    """
+    Transformers API compatibility shim.
+    Some versions use target_sizes, some source_sizes.
+    Returns depth as float32 HxW.
+    """
+    # Try the most common signature first
+    try:
+        post = processor.post_process_depth_estimation(outputs, target_sizes=[(target_h, target_w)])
+    except TypeError:
+        post = processor.post_process_depth_estimation(outputs, source_sizes=[(target_h, target_w)])
+    # expected: list[{"predicted_depth": tensor[H,W]}]
+    depth_t = post[0]["predicted_depth"]
+    return depth_t.detach().float().cpu().numpy()
+def _load_zoedepth_from_local(device: torch.device):
     """
+    Load ZoeDepth from ASSETS_DEPTH_DIR (offline).
     """
+    from transformers import AutoImageProcessor, ZoeDepthForDepthEstimation
+    key = (str(device), f"local::{ASSETS_DEPTH_DIR}")
+    if key in _MODEL_CACHE:
+        return _MODEL_CACHE[key]
+    processor = AutoImageProcessor.from_pretrained(str(ASSETS_DEPTH_DIR), local_files_only=True)
+    model = ZoeDepthForDepthEstimation.from_pretrained(str(ASSETS_DEPTH_DIR), local_files_only=True)
+    model.eval().to(device)
+    _MODEL_CACHE[key] = (processor, model)
+    return processor, model
+def _load_zoedepth_fallback(device: torch.device):
     """
+    Load ZoeDepth from HF (zoe-path fallback).
     """
+    from transformers import AutoImageProcessor, ZoeDepthForDepthEstimation
+    key = (str(device), f"hf::{FALLBACK_ZOE_REPO}")
+    if key in _MODEL_CACHE:
+        return _MODEL_CACHE[key]
+    processor = AutoImageProcessor.from_pretrained(FALLBACK_ZOE_REPO)
+    model = ZoeDepthForDepthEstimation.from_pretrained(FALLBACK_ZOE_REPO)
+    model.eval().to(device)
+    _MODEL_CACHE[key] = (processor, model)
+    return processor, model
+def _get_model(device: torch.device):
+    """
+    1) Try local assets/depth (download if missing)
+    2) If that fails -> zoe-path fallback
+    3) If that fails -> return None
+    """
+    # Local-first
+    try:
+        if _ensure_local_model_files():
+            try:
+                return _load_zoedepth_from_local(device)
+            except Exception as e:
+                print(f"[SaliaDepth] Local load failed (assets/depth). Will fallback to zoe-path. Error: {e}")
+    except Exception as e:
+        print(f"[SaliaDepth] Local ensure/load unexpected error. Fallback to zoe-path. Error: {e}")
+    # Fallback: zoe-path
+    try:
+        return _load_zoedepth_fallback(device)
+    except Exception as e:
+        print(f"[SaliaDepth] Zoe fallback load failed. Will passthrough image. Error: {e}")
+        return None
+# -----------------------------
 # ComfyUI Node
+# -----------------------------
 class Salia_Depth_Preprocessor:
     @classmethod
     FUNCTION = "execute"
     CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
+    def execute(self, image: torch.Tensor, resolution: int = -1):
         """
+        If anything fails:
+          - return (image,) passthrough
         """
+        # Basic shape validation; if weird, passthrough
         try:
+            if image.dim() != 4 or image.shape[-1] != 3:
+                print(f"[SaliaDepth] Unexpected input IMAGE shape {tuple(image.shape)}; passthrough.")
+                return (image,)
         except Exception:
             return (image,)
+        device = model_management.get_torch_device()
+        model_pack = _get_model(device)
+        if model_pack is None:
+            return (image,)
+        processor, model = model_pack
+        outs: List[torch.Tensor] = []
+        for b in range(image.shape[0]):
             try:
+                # input in original size
+                img_u8 = _comfy_tensor_to_uint8_hwc(image[b])
+                h0, w0 = img_u8.shape[0], img_u8.shape[1]
+                # note 5: if -1, use bigger side (max(w,h))
+                max_side = max(w0, h0) if resolution == -1 else int(resolution)
+                # resize for inference (max side rule)
+                img_inf = _resize_max_side_uint8(img_u8, max_side=max_side)
+                pil = Image.fromarray(img_inf)
+                # preprocess
+                inputs = processor(images=pil, return_tensors="pt")
+                pixel_values = inputs["pixel_values"].to(device)
+                with torch.inference_mode():
+                    outputs = model(pixel_values=pixel_values)
+                # postprocess back to inference image size
+                depth_np = _post_process_depth(processor, outputs, pil.height, pil.width)
+                # depth -> grayscale RGB hint
+                hint_rgb = _depth_to_hint_rgb(depth_np)
+                # resize hint back to original size
+                if hint_rgb.shape[0] != h0 or hint_rgb.shape[1] != w0:
+                    if cv2 is not None:
+                        hint_rgb = cv2.resize(hint_rgb, (w0, h0), interpolation=cv2.INTER_CUBIC)
+                    else:
+                        hint_rgb = np.array(
+                            Image.fromarray(hint_rgb).resize((w0, h0), resample=Image.Resampling.BICUBIC),
+                            dtype=np.uint8
+                        )
+                outs.append(_uint8_hwc_to_comfy_tensor(hint_rgb))
             except Exception as e:
+                # Per-image failure -> passthrough that image (keeps batch size consistent)
+                print(f"[SaliaDepth] Inference failed on batch index {b}; passthrough that frame. Error: {e}")
+                outs.append(image[b].detach().cpu() if image[b].is_cuda else image[b])
+        out_batch = torch.stack(outs, dim=0)
+        return (out_batch,)
 NODE_CLASS_MAPPINGS = {
 }
 NODE_DISPLAY_NAME_MAPPINGS = {
+    "SaliaDepthPreprocessor": "Salia Depth"
 }