saliacoel
/

depth

Safetensors

zoedepth

Model card Files Files and versions

xet

Community

saliacoel commited on 8 days ago

Commit

3d625c7

verified ·

1 Parent(s): d02b8fc

Upload salia_depth.py

Browse files

Files changed (1) hide show

salia_depth.py +347 -0

salia_depth.py ADDED Viewed

	@@ -0,0 +1,347 @@

+import shutil
+import urllib.request
+from pathlib import Path
+from typing import Dict, Tuple, Any, Optional
+import numpy as np
+import torch
+from PIL import Image
+import comfy.model_management as model_management
+# transformers is required
+try:
+    from transformers import pipeline
+except Exception as e:
+    pipeline = None
+    _TRANSFORMERS_IMPORT_ERROR = e
+# --------------------------------------------------------------------------------------
+# Paths / sources
+# --------------------------------------------------------------------------------------
+# This file: comfyui-salia_online/nodes/Salia_Depth.py
+# Plugin root: comfyui-salia_online/
+PLUGIN_ROOT = Path(__file__).resolve().parent.parent
+# Requested local path: assets/depth
+MODEL_DIR = PLUGIN_ROOT / "assets" / "depth"
+MODEL_DIR.mkdir(parents=True, exist_ok=True)
+REQUIRED_FILES = {
+    "config.json": "https://huggingface.co/saliacoel/depth/resolve/main/config.json",
+    "model.safetensors": "https://huggingface.co/saliacoel/depth/resolve/main/model.safetensors",
+    "preprocessor_config.json": "https://huggingface.co/saliacoel/depth/resolve/main/preprocessor_config.json",
+}
+# "zoe-path" fallback (matches what your current ZoeDetector code pulls)
+ZOE_FALLBACK_REPO_ID = "Intel/zoedepth-nyu-kitti"
+# --------------------------------------------------------------------------------------
+# Download + validation helpers
+# --------------------------------------------------------------------------------------
+def _have_required_files() -> bool:
+    return all((MODEL_DIR / name).exists() for name in REQUIRED_FILES.keys())
+def _download_url_to_file(url: str, dst: Path, timeout: int = 120) -> None:
+    """
+    Download with an atomic temp file -> rename.
+    """
+    dst.parent.mkdir(parents=True, exist_ok=True)
+    tmp = dst.with_suffix(dst.suffix + ".tmp")
+    if tmp.exists():
+        try:
+            tmp.unlink()
+        except Exception:
+            pass
+    req = urllib.request.Request(url, headers={"User-Agent": "ComfyUI-SaliaDepth/1.0"})
+    with urllib.request.urlopen(req, timeout=timeout) as r, open(tmp, "wb") as f:
+        shutil.copyfileobj(r, f)
+    tmp.replace(dst)
+def ensure_local_model_files() -> bool:
+    """
+    Ensure assets/depth contains config.json, model.safetensors, preprocessor_config.json.
+    Returns True if files are present (either already or downloaded).
+    Returns False if download failed.
+    """
+    if _have_required_files():
+        return True
+    print("[SaliaDepth] Local model files missing in:", str(MODEL_DIR))
+    print("[SaliaDepth] Attempting to download required files from saliacoel/depth ...")
+    try:
+        for fname, url in REQUIRED_FILES.items():
+            fpath = MODEL_DIR / fname
+            if fpath.exists():
+                continue
+            print(f"[SaliaDepth] Downloading {fname} ...")
+            _download_url_to_file(url, fpath)
+        ok = _have_required_files()
+        print(f"[SaliaDepth] Download complete. ok={ok}")
+        return ok
+    except Exception as e:
+        print("[SaliaDepth] Download failed:", repr(e))
+        return False
+# --------------------------------------------------------------------------------------
+# Pipeline cache / load
+# --------------------------------------------------------------------------------------
+_PIPE_CACHE: Dict[Tuple[str, str], Any] = {}  # (model_source, device_str) -> pipeline
+def _pipeline_device_arg(device: torch.device) -> int:
+    # transformers.pipeline: device=-1 for CPU, 0..N for CUDA index
+    if device.type == "cuda":
+        return int(device.index) if device.index is not None else 0
+    return -1
+def _try_load_pipeline(model_source: str, device: torch.device):
+    """
+    model_source can be:
+      - local directory path (string)
+      - HF repo id
+    """
+    if pipeline is None:
+        raise RuntimeError(f"transformers import failed: {_TRANSFORMERS_IMPORT_ERROR}")
+    key = (model_source, str(device))
+    if key in _PIPE_CACHE:
+        return _PIPE_CACHE[key]
+    dev_arg = _pipeline_device_arg(device)
+    print(f"[SaliaDepth] Loading depth-estimation pipeline from '{model_source}' (device={dev_arg})")
+    p = pipeline(task="depth-estimation", model=model_source, device=dev_arg)
+    # If Comfy gives MPS (mac), pipeline device arg is -1; try moving model anyway.
+    try:
+        p.model = p.model.to(device)
+    except Exception:
+        pass
+    _PIPE_CACHE[key] = p
+    return p
+def get_depth_pipeline(device: torch.device):
+    """
+    1) Try local assets/depth (download if missing)
+    2) Fallback to zoe-path Intel/zoedepth-nyu-kitti
+    3) If both fail -> return None
+    """
+    # 1) local-first
+    if ensure_local_model_files():
+        try:
+            return _try_load_pipeline(str(MODEL_DIR), device)
+        except Exception as e:
+            print("[SaliaDepth] Local model load failed:", repr(e))
+    # 2) zoe fallback
+    try:
+        print("[SaliaDepth] Falling back to Zoe path:", ZOE_FALLBACK_REPO_ID)
+        return _try_load_pipeline(ZOE_FALLBACK_REPO_ID, device)
+    except Exception as e:
+        print("[SaliaDepth] Zoe fallback load failed:", repr(e))
+    # 3) total failure
+    return None
+# --------------------------------------------------------------------------------------
+# Image utilities
+# --------------------------------------------------------------------------------------
+def _hwc3(x: np.ndarray) -> np.ndarray:
+    assert x.dtype == np.uint8
+    if x.ndim == 2:
+        x = x[:, :, None]
+    if x.shape[2] == 1:
+        return np.concatenate([x, x, x], axis=2)
+    if x.shape[2] == 3:
+        return x
+    if x.shape[2] == 4:
+        color = x[:, :, 0:3].astype(np.float32)
+        alpha = x[:, :, 3:4].astype(np.float32) / 255.0
+        y = color * alpha + 255.0 * (1.0 - alpha)
+        return y.clip(0, 255).astype(np.uint8)
+    raise ValueError("Unexpected channel count")
+def _pad64(n: int) -> int:
+    return int(np.ceil(float(n) / 64.0) * 64 - n)
+def _resize_long_side(image_u8: np.ndarray, long_side: int) -> np.ndarray:
+    """
+    Resize so that max(H,W) == long_side. If long_side equals current long side -> no change.
+    """
+    h, w = image_u8.shape[:2]
+    cur_long = max(h, w)
+    if long_side <= 0 or long_side == cur_long:
+        return image_u8
+    scale = float(long_side) / float(cur_long)
+    new_w = int(round(w * scale))
+    new_h = int(round(h * scale))
+    pil = Image.fromarray(image_u8)
+    # Downscale with LANCZOS, upscale with BICUBIC
+    resample = Image.BICUBIC if scale > 1.0 else Image.LANCZOS
+    pil = pil.resize((new_w, new_h), resample=resample)
+    return np.array(pil, dtype=np.uint8)
+def _pad_to_64(image_u8: np.ndarray, mode: str = "edge"):
+    h, w = image_u8.shape[:2]
+    hp = _pad64(h)
+    wp = _pad64(w)
+    padded = np.pad(image_u8, ((0, hp), (0, wp), (0, 0)), mode=mode)
+    def remove_pad(x: np.ndarray) -> np.ndarray:
+        return x[:h, :w, :]
+    return padded, remove_pad
+def _comfy_to_u8(img: torch.Tensor) -> np.ndarray:
+    """
+    Comfy IMAGE is float [0..1], shape [H,W,C] or [B,H,W,C]
+    """
+    if img.ndim == 4:
+        img = img[0]
+    img = img.detach().cpu().float().clamp(0, 1)
+    arr = (img.numpy() * 255.0).round().astype(np.uint8)
+    return arr
+def _u8_to_comfy(img_u8: np.ndarray) -> torch.Tensor:
+    img_u8 = _hwc3(img_u8)
+    t = torch.from_numpy(img_u8.astype(np.float32) / 255.0)
+    return t.unsqueeze(0)  # [1,H,W,C]
+def _depth_to_uint8(pipe, input_u8: np.ndarray, detect_long_side: int) -> np.ndarray:
+    """
+    Run depth estimation:
+    - resize (long side)
+    - pad to 64
+    - infer
+    - normalize (percentiles like your zoe code)
+    - remove pad
+    - return 3-channel uint8
+    """
+    input_u8 = _hwc3(input_u8)
+    resized = _resize_long_side(input_u8, detect_long_side)
+    padded, remove_pad = _pad_to_64(resized, mode="edge")
+    pil = Image.fromarray(padded)
+    with torch.no_grad():
+        result = pipe(pil)
+        depth = result["depth"]
+        if isinstance(depth, Image.Image):
+            depth_arr = np.array(depth, dtype=np.float32)
+        else:
+            depth_arr = np.array(depth, dtype=np.float32)
+        vmin = np.percentile(depth_arr, 2)
+        vmax = np.percentile(depth_arr, 85)
+        denom = (vmax - vmin) if (vmax - vmin) > 1e-6 else 1e-6
+        depth_arr = (depth_arr - vmin) / denom
+        depth_arr = 1.0 - depth_arr
+        depth_u8 = (depth_arr * 255.0).clip(0, 255).astype(np.uint8)
+    depth_rgb = _hwc3(depth_u8)
+    depth_rgb = remove_pad(depth_rgb)
+    return depth_rgb
+# --------------------------------------------------------------------------------------
+# ComfyUI Node
+# --------------------------------------------------------------------------------------
+class Salia_Depth_Preprocessor:
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "image": ("IMAGE",),
+                # note 5: default -1, min -1
+                "resolution": ("INT", {"default": -1, "min": -1, "max": 8192, "step": 1}),
+            }
+        }
+    RETURN_TYPES = ("IMAGE",)
+    FUNCTION = "execute"
+    CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
+    def execute(self, image, resolution=-1):
+        """
+        If everything fails (local model + zoe fallback), return input image unchanged.
+        """
+        try:
+            device = model_management.get_torch_device()
+        except Exception:
+            device = torch.device("cpu")
+        pipe = get_depth_pipeline(device)
+        if pipe is None:
+            # Hard fail: return input image unchanged
+            print("[SaliaDepth] No pipeline available. Returning input image unchanged.")
+            return (image,)
+        # Batch support: image is [B,H,W,C]
+        if image.ndim == 3:
+            image = image.unsqueeze(0)
+        outs = []
+        for i in range(image.shape[0]):
+            # original size
+            h0 = int(image[i].shape[0])
+            w0 = int(image[i].shape[1])
+            long_side = max(w0, h0)
+            detect_long_side = long_side if int(resolution) == -1 else int(resolution)
+            try:
+                inp_u8 = _comfy_to_u8(image[i])
+                depth_u8 = _depth_to_uint8(pipe, inp_u8, detect_long_side)
+                # resize depth back to original input size
+                pil = Image.fromarray(depth_u8)
+                pil = pil.resize((w0, h0), resample=Image.BILINEAR)
+                depth_u8 = np.array(pil, dtype=np.uint8)
+                outs.append(_u8_to_comfy(depth_u8))
+            except Exception as e:
+                # Per-image fail: return that image unchanged
+                print(f"[SaliaDepth] Inference failed for batch index {i}: {repr(e)}. Passing through input.")
+                outs.append(image[i].unsqueeze(0))
+        out = torch.cat(outs, dim=0)
+        return (out,)
+NODE_CLASS_MAPPINGS = {
+    "SaliaDepthPreprocessor": Salia_Depth_Preprocessor
+}
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "SaliaDepthPreprocessor": "Salia Depth (assets/depth local-first)"
+}