| | import os |
| | import shutil |
| | import urllib.request |
| | from pathlib import Path |
| | from typing import Dict, Tuple, Any, Optional, List |
| |
|
| | import numpy as np |
| | import torch |
| | from PIL import Image |
| |
|
| | import comfy.model_management as model_management |
| |
|
| | |
| | try: |
| | from transformers import pipeline |
| | except Exception as e: |
| | pipeline = None |
| | _TRANSFORMERS_IMPORT_ERROR = e |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | PLUGIN_ROOT = Path(__file__).resolve().parent.parent |
| |
|
| | |
| | MODEL_DIR = PLUGIN_ROOT / "assets" / "depth" |
| | MODEL_DIR.mkdir(parents=True, exist_ok=True) |
| |
|
| | REQUIRED_FILES = { |
| | "config.json": "https://huggingface.co/saliacoel/depth/resolve/main/config.json", |
| | "model.safetensors": "https://huggingface.co/saliacoel/depth/resolve/main/model.safetensors", |
| | "preprocessor_config.json": "https://huggingface.co/saliacoel/depth/resolve/main/preprocessor_config.json", |
| | } |
| |
|
| | |
| | ZOE_FALLBACK_REPO_ID = "Intel/zoedepth-nyu-kitti" |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def _make_logger() -> Tuple[List[str], Any]: |
| | lines: List[str] = [] |
| |
|
| | def log(msg: str): |
| | |
| | try: |
| | print(msg) |
| | except Exception: |
| | pass |
| | |
| | lines.append(str(msg)) |
| |
|
| | return lines, log |
| |
|
| |
|
| | def _fmt_bytes(n: Optional[int]) -> str: |
| | if n is None: |
| | return "?" |
| | |
| | for unit in ["B", "KB", "MB", "GB", "TB"]: |
| | if n < 1024: |
| | return f"{n:.0f}{unit}" |
| | n /= 1024.0 |
| | return f"{n:.1f}PB" |
| |
|
| |
|
| | def _file_size(path: Path) -> Optional[int]: |
| | try: |
| | return path.stat().st_size |
| | except Exception: |
| | return None |
| |
|
| |
|
| | def _hf_cache_info() -> Dict[str, str]: |
| | info: Dict[str, str] = {} |
| | info["env.HF_HOME"] = os.environ.get("HF_HOME", "") |
| | info["env.HF_HUB_CACHE"] = os.environ.get("HF_HUB_CACHE", "") |
| | info["env.TRANSFORMERS_CACHE"] = os.environ.get("TRANSFORMERS_CACHE", "") |
| | info["env.HUGGINGFACE_HUB_CACHE"] = os.environ.get("HUGGINGFACE_HUB_CACHE", "") |
| |
|
| | try: |
| | from huggingface_hub import constants as hf_constants |
| | |
| | info["huggingface_hub.constants.HF_HOME"] = str(getattr(hf_constants, "HF_HOME", "")) |
| | info["huggingface_hub.constants.HF_HUB_CACHE"] = str(getattr(hf_constants, "HF_HUB_CACHE", "")) |
| | except Exception: |
| | pass |
| |
|
| | return info |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def _have_required_files() -> bool: |
| | return all((MODEL_DIR / name).exists() for name in REQUIRED_FILES.keys()) |
| |
|
| |
|
| | def _download_url_to_file(url: str, dst: Path, timeout: int = 180) -> None: |
| | """ |
| | Download with atomic temp rename. |
| | """ |
| | dst.parent.mkdir(parents=True, exist_ok=True) |
| | tmp = dst.with_suffix(dst.suffix + ".tmp") |
| |
|
| | if tmp.exists(): |
| | try: |
| | tmp.unlink() |
| | except Exception: |
| | pass |
| |
|
| | req = urllib.request.Request(url, headers={"User-Agent": "ComfyUI-SaliaDepth/1.1"}) |
| | with urllib.request.urlopen(req, timeout=timeout) as r, open(tmp, "wb") as f: |
| | shutil.copyfileobj(r, f) |
| |
|
| | tmp.replace(dst) |
| |
|
| |
|
| | def ensure_local_model_files(log) -> bool: |
| | """ |
| | Ensure assets/depth contains the 3 files. |
| | Returns True if present or downloaded successfully, else False. |
| | """ |
| | |
| | log("[SaliaDepth] ===== Local model file check =====") |
| | log(f"[SaliaDepth] Plugin root: {PLUGIN_ROOT}") |
| | log(f"[SaliaDepth] Local model dir (on drive): {MODEL_DIR}") |
| |
|
| | for fname, url in REQUIRED_FILES.items(): |
| | fpath = MODEL_DIR / fname |
| | exists = fpath.exists() |
| | size = _file_size(fpath) if exists else None |
| | log(f"[SaliaDepth] - {fname}") |
| | log(f"[SaliaDepth] local path: {fpath} exists={exists} size={_fmt_bytes(size)}") |
| | log(f"[SaliaDepth] remote url : {url}") |
| |
|
| | if _have_required_files(): |
| | log("[SaliaDepth] All required local files already exist. No download needed.") |
| | return True |
| |
|
| | log("[SaliaDepth] One or more local files missing. Attempting download...") |
| |
|
| | try: |
| | for fname, url in REQUIRED_FILES.items(): |
| | fpath = MODEL_DIR / fname |
| | if fpath.exists(): |
| | continue |
| | log(f"[SaliaDepth] Downloading '{fname}' -> '{fpath}'") |
| | _download_url_to_file(url, fpath) |
| | log(f"[SaliaDepth] Downloaded '{fname}' size={_fmt_bytes(_file_size(fpath))}") |
| |
|
| | ok = _have_required_files() |
| | log(f"[SaliaDepth] Download finished. ok={ok}") |
| | return ok |
| | except Exception as e: |
| | log(f"[SaliaDepth] Download failed with error: {repr(e)}") |
| | return False |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def HWC3(x: np.ndarray) -> np.ndarray: |
| | assert x.dtype == np.uint8 |
| | if x.ndim == 2: |
| | x = x[:, :, None] |
| | assert x.ndim == 3 |
| | H, W, C = x.shape |
| | assert C == 1 or C == 3 or C == 4 |
| | if C == 3: |
| | return x |
| | if C == 1: |
| | return np.concatenate([x, x, x], axis=2) |
| | |
| | color = x[:, :, 0:3].astype(np.float32) |
| | alpha = x[:, :, 3:4].astype(np.float32) / 255.0 |
| | y = color * alpha + 255.0 * (1.0 - alpha) |
| | y = y.clip(0, 255).astype(np.uint8) |
| | return y |
| |
|
| |
|
| | def pad64(x: int) -> int: |
| | return int(np.ceil(float(x) / 64.0) * 64 - x) |
| |
|
| |
|
| | def safer_memory(x: np.ndarray) -> np.ndarray: |
| | return np.ascontiguousarray(x.copy()).copy() |
| |
|
| |
|
| | def resize_image_with_pad_min_side( |
| | input_image: np.ndarray, |
| | resolution: int, |
| | upscale_method: str = "INTER_CUBIC", |
| | skip_hwc3: bool = False, |
| | mode: str = "edge", |
| | log=None |
| | ) -> Tuple[np.ndarray, Any]: |
| | """ |
| | EXACT behavior like your zoe.transformers.py: |
| | k = resolution / min(H,W) |
| | resize to (W_target, H_target) |
| | pad to multiple of 64 |
| | return padded image and remove_pad() closure |
| | """ |
| | |
| | cv2 = None |
| | try: |
| | import cv2 as _cv2 |
| | cv2 = _cv2 |
| | except Exception: |
| | cv2 = None |
| | if log: |
| | log("[SaliaDepth] WARN: cv2 not available; resizing will use PIL fallback (may change results).") |
| |
|
| | if skip_hwc3: |
| | img = input_image |
| | else: |
| | img = HWC3(input_image) |
| |
|
| | H_raw, W_raw, _ = img.shape |
| | if resolution <= 0: |
| | |
| | return img, (lambda x: x) |
| |
|
| | k = float(resolution) / float(min(H_raw, W_raw)) |
| | H_target = int(np.round(float(H_raw) * k)) |
| | W_target = int(np.round(float(W_raw) * k)) |
| |
|
| | if cv2 is not None: |
| | upscale_methods = { |
| | "INTER_NEAREST": cv2.INTER_NEAREST, |
| | "INTER_LINEAR": cv2.INTER_LINEAR, |
| | "INTER_AREA": cv2.INTER_AREA, |
| | "INTER_CUBIC": cv2.INTER_CUBIC, |
| | "INTER_LANCZOS4": cv2.INTER_LANCZOS4, |
| | } |
| | method = upscale_methods.get(upscale_method, cv2.INTER_CUBIC) |
| | img = cv2.resize(img, (W_target, H_target), interpolation=method if k > 1 else cv2.INTER_AREA) |
| | else: |
| | |
| | pil = Image.fromarray(img) |
| | resample = Image.BICUBIC if k > 1 else Image.LANCZOS |
| | pil = pil.resize((W_target, H_target), resample=resample) |
| | img = np.array(pil, dtype=np.uint8) |
| |
|
| | H_pad, W_pad = pad64(H_target), pad64(W_target) |
| | img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode=mode) |
| |
|
| | def remove_pad(x: np.ndarray) -> np.ndarray: |
| | return safer_memory(x[:H_target, :W_target, ...]) |
| |
|
| | return safer_memory(img_padded), remove_pad |
| |
|
| |
|
| | def pad_only_to_64(img_u8: np.ndarray, mode: str = "edge") -> Tuple[np.ndarray, Any]: |
| | """ |
| | For resolution == -1: keep original resolution but still pad to multiples of 64, |
| | then provide remove_pad that returns original size. |
| | """ |
| | img = HWC3(img_u8) |
| | H_raw, W_raw, _ = img.shape |
| | H_pad, W_pad = pad64(H_raw), pad64(W_raw) |
| | img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode=mode) |
| |
|
| | def remove_pad(x: np.ndarray) -> np.ndarray: |
| | return safer_memory(x[:H_raw, :W_raw, ...]) |
| |
|
| | return safer_memory(img_padded), remove_pad |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def composite_rgba_over_white_keep_alpha(inp_u8: np.ndarray) -> Tuple[np.ndarray, Optional[np.ndarray]]: |
| | """ |
| | If RGBA: return RGB composited over WHITE + alpha_u8 kept separately. |
| | If RGB: return input RGB + None alpha. |
| | """ |
| | if inp_u8.ndim == 3 and inp_u8.shape[2] == 4: |
| | rgba = inp_u8.astype(np.uint8) |
| | rgb = rgba[:, :, 0:3].astype(np.float32) |
| | a = (rgba[:, :, 3:4].astype(np.float32) / 255.0) |
| | rgb_white = (rgb * a + 255.0 * (1.0 - a)).clip(0, 255).astype(np.uint8) |
| | alpha_u8 = rgba[:, :, 3].copy() |
| | return rgb_white, alpha_u8 |
| | |
| | return HWC3(inp_u8), None |
| |
|
| |
|
| | def apply_alpha_then_black_background(depth_rgb_u8: np.ndarray, alpha_u8: np.ndarray) -> np.ndarray: |
| | """ |
| | Requested output rule: |
| | - attach alpha to depth (conceptually RGBA) |
| | - composite over BLACK |
| | - output RGB |
| | That is equivalent to depth_rgb * alpha. |
| | """ |
| | depth_rgb_u8 = HWC3(depth_rgb_u8) |
| | a = (alpha_u8.astype(np.float32) / 255.0)[:, :, None] |
| | out = (depth_rgb_u8.astype(np.float32) * a).clip(0, 255).astype(np.uint8) |
| | return out |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def comfy_tensor_to_u8(img: torch.Tensor) -> np.ndarray: |
| | """ |
| | Comfy IMAGE: float [0..1], shape [H,W,C] or [B,H,W,C] |
| | Convert to uint8 HWC. |
| | """ |
| | if img.ndim == 4: |
| | img = img[0] |
| | arr = img.detach().cpu().float().clamp(0, 1).numpy() |
| | u8 = (arr * 255.0).round().astype(np.uint8) |
| | return u8 |
| |
|
| |
|
| | def u8_to_comfy_tensor(img_u8: np.ndarray) -> torch.Tensor: |
| | img_u8 = HWC3(img_u8) |
| | t = torch.from_numpy(img_u8.astype(np.float32) / 255.0) |
| | return t.unsqueeze(0) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | _PIPE_CACHE: Dict[Tuple[str, str], Any] = {} |
| |
|
| |
|
| | def _try_load_pipeline(model_source: str, device: torch.device, log): |
| | """ |
| | Use transformers.pipeline like Zoe code does. |
| | We intentionally do NOT pass device=... here, and instead move model like Zoe node. |
| | """ |
| | if pipeline is None: |
| | raise RuntimeError(f"transformers import failed: {_TRANSFORMERS_IMPORT_ERROR}") |
| |
|
| | key = (model_source, str(device)) |
| | if key in _PIPE_CACHE: |
| | log(f"[SaliaDepth] Using cached pipeline for source='{model_source}' device='{device}'") |
| | return _PIPE_CACHE[key] |
| |
|
| | log(f"[SaliaDepth] Creating pipeline(task='depth-estimation', model='{model_source}')") |
| | p = pipeline(task="depth-estimation", model=model_source) |
| |
|
| | |
| | try: |
| | p.model = p.model.to(device) |
| | p.device = device |
| | log(f"[SaliaDepth] Moved pipeline model to device: {device}") |
| | except Exception as e: |
| | log(f"[SaliaDepth] WARN: Could not move pipeline model to device {device}: {repr(e)}") |
| |
|
| | |
| | try: |
| | cfg = p.model.config |
| | log(f"[SaliaDepth] Model class: {p.model.__class__.__name__}") |
| | log(f"[SaliaDepth] Config class: {cfg.__class__.__name__}") |
| | log(f"[SaliaDepth] Config model_type: {getattr(cfg, 'model_type', '')}") |
| | log(f"[SaliaDepth] Config _name_or_path: {getattr(cfg, '_name_or_path', '')}") |
| | except Exception as e: |
| | log(f"[SaliaDepth] WARN: Could not log model config: {repr(e)}") |
| |
|
| | _PIPE_CACHE[key] = p |
| | return p |
| |
|
| |
|
| | def get_depth_pipeline(device: torch.device, log): |
| | """ |
| | 1) Ensure assets/depth files exist (download if missing) |
| | 2) Try load local dir |
| | 3) Fallback to Intel/zoedepth-nyu-kitti |
| | 4) If both fail -> None |
| | """ |
| | |
| | log("[SaliaDepth] ===== Hugging Face cache info (fallback path) =====") |
| | for k, v in _hf_cache_info().items(): |
| | if v: |
| | log(f"[SaliaDepth] {k} = {v}") |
| | log(f"[SaliaDepth] Zoe fallback repo id: {ZOE_FALLBACK_REPO_ID}") |
| |
|
| | |
| | local_ok = ensure_local_model_files(log) |
| | if local_ok: |
| | try: |
| | log(f"[SaliaDepth] Trying LOCAL model from directory: {MODEL_DIR}") |
| | return _try_load_pipeline(str(MODEL_DIR), device, log) |
| | except Exception as e: |
| | log(f"[SaliaDepth] Local model load FAILED: {repr(e)}") |
| |
|
| | |
| | try: |
| | log(f"[SaliaDepth] Trying ZOE fallback model: {ZOE_FALLBACK_REPO_ID}") |
| | return _try_load_pipeline(ZOE_FALLBACK_REPO_ID, device, log) |
| | except Exception as e: |
| | log(f"[SaliaDepth] Zoe fallback load FAILED: {repr(e)}") |
| |
|
| | return None |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def depth_estimate_zoe_style( |
| | pipe, |
| | input_rgb_u8: np.ndarray, |
| | detect_resolution: int, |
| | log, |
| | upscale_method: str = "INTER_CUBIC" |
| | ) -> np.ndarray: |
| | """ |
| | Matches your ZoeDetector.__call__ logic very closely. |
| | Returns uint8 RGB depth map. |
| | """ |
| | |
| | |
| | |
| | if detect_resolution == -1: |
| | work_img, remove_pad = pad_only_to_64(input_rgb_u8, mode="edge") |
| | log(f"[SaliaDepth] Preprocess: resolution=-1 (no resize), padded to 64. work={work_img.shape}") |
| | else: |
| | work_img, remove_pad = resize_image_with_pad_min_side( |
| | input_rgb_u8, |
| | int(detect_resolution), |
| | upscale_method=upscale_method, |
| | skip_hwc3=False, |
| | mode="edge", |
| | log=log |
| | ) |
| | log(f"[SaliaDepth] Preprocess: min-side resized to {detect_resolution}, padded to 64. work={work_img.shape}") |
| |
|
| | pil_image = Image.fromarray(work_img) |
| |
|
| | with torch.no_grad(): |
| | result = pipe(pil_image) |
| | depth = result["depth"] |
| |
|
| | if isinstance(depth, Image.Image): |
| | depth_array = np.array(depth, dtype=np.float32) |
| | else: |
| | depth_array = np.array(depth, dtype=np.float32) |
| |
|
| | |
| | vmin = float(np.percentile(depth_array, 2)) |
| | vmax = float(np.percentile(depth_array, 85)) |
| |
|
| | log(f"[SaliaDepth] Depth raw stats: shape={depth_array.shape} vmin(p2)={vmin:.6f} vmax(p85)={vmax:.6f} mean={float(depth_array.mean()):.6f}") |
| |
|
| | depth_array = depth_array - vmin |
| | denom = (vmax - vmin) |
| | if abs(denom) < 1e-12: |
| | |
| | log("[SaliaDepth] WARN: vmax==vmin; forcing denom epsilon to avoid NaNs.") |
| | denom = 1e-6 |
| | depth_array = depth_array / denom |
| |
|
| | |
| | depth_array = 1.0 - depth_array |
| |
|
| | depth_image = (depth_array * 255.0).clip(0, 255).astype(np.uint8) |
| |
|
| | detected_map = remove_pad(HWC3(depth_image)) |
| | log(f"[SaliaDepth] Output (post-remove_pad): {detected_map.shape} dtype={detected_map.dtype}") |
| | return detected_map |
| |
|
| |
|
| | def resize_to_original(depth_rgb_u8: np.ndarray, w0: int, h0: int, log) -> np.ndarray: |
| | """ |
| | Resize depth output back to original input size. |
| | Use cv2 if available, else PIL. |
| | """ |
| | try: |
| | import cv2 |
| | out = cv2.resize(depth_rgb_u8, (w0, h0), interpolation=cv2.INTER_LINEAR) |
| | return out.astype(np.uint8) |
| | except Exception as e: |
| | log(f"[SaliaDepth] WARN: cv2 resize failed ({repr(e)}); using PIL.") |
| | pil = Image.fromarray(depth_rgb_u8) |
| | pil = pil.resize((w0, h0), resample=Image.BILINEAR) |
| | return np.array(pil, dtype=np.uint8) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | class Salia_Depth_Preprocessor: |
| | @classmethod |
| | def INPUT_TYPES(cls): |
| | return { |
| | "required": { |
| | "image": ("IMAGE",), |
| | |
| | "resolution": ("INT", {"default": -1, "min": -1, "max": 8192, "step": 1}), |
| | } |
| | } |
| |
|
| | |
| | RETURN_TYPES = ("IMAGE", "STRING") |
| | FUNCTION = "execute" |
| | CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators" |
| |
|
| | def execute(self, image, resolution=-1): |
| | lines, log = _make_logger() |
| | log("[SaliaDepth] ==================================================") |
| | log("[SaliaDepth] SaliaDepthPreprocessor starting") |
| | log(f"[SaliaDepth] resolution input = {resolution}") |
| |
|
| | |
| | try: |
| | device = model_management.get_torch_device() |
| | except Exception as e: |
| | device = torch.device("cpu") |
| | log(f"[SaliaDepth] WARN: model_management.get_torch_device failed: {repr(e)} -> using CPU") |
| |
|
| | log(f"[SaliaDepth] torch device = {device}") |
| |
|
| | |
| | pipe = None |
| | try: |
| | pipe = get_depth_pipeline(device, log) |
| | except Exception as e: |
| | log(f"[SaliaDepth] ERROR: get_depth_pipeline crashed: {repr(e)}") |
| | pipe = None |
| |
|
| | if pipe is None: |
| | log("[SaliaDepth] FATAL: No pipeline available. Returning input image unchanged.") |
| | return (image, "\n".join(lines)) |
| |
|
| | |
| | if image.ndim == 3: |
| | image = image.unsqueeze(0) |
| |
|
| | outs = [] |
| | for i in range(image.shape[0]): |
| | try: |
| | |
| | h0 = int(image[i].shape[0]) |
| | w0 = int(image[i].shape[1]) |
| | c0 = int(image[i].shape[2]) |
| | log(f"[SaliaDepth] ---- Batch index {i} input shape = ({h0},{w0},{c0}) ----") |
| |
|
| | inp_u8 = comfy_tensor_to_u8(image[i]) |
| |
|
| | |
| | rgb_for_depth, alpha_u8 = composite_rgba_over_white_keep_alpha(inp_u8) |
| | had_rgba = alpha_u8 is not None |
| | log(f"[SaliaDepth] had_rgba={had_rgba}") |
| |
|
| | |
| | depth_rgb = depth_estimate_zoe_style( |
| | pipe=pipe, |
| | input_rgb_u8=rgb_for_depth, |
| | detect_resolution=int(resolution), |
| | log=log, |
| | upscale_method="INTER_CUBIC" |
| | ) |
| |
|
| | |
| | depth_rgb = resize_to_original(depth_rgb, w0=w0, h0=h0, log=log) |
| |
|
| | |
| | if had_rgba: |
| | |
| | |
| | if alpha_u8.shape[0] != h0 or alpha_u8.shape[1] != w0: |
| | log("[SaliaDepth] Alpha size mismatch; resizing alpha to original size.") |
| | try: |
| | import cv2 |
| | alpha_u8 = cv2.resize(alpha_u8, (w0, h0), interpolation=cv2.INTER_LINEAR).astype(np.uint8) |
| | except Exception: |
| | pil_a = Image.fromarray(alpha_u8) |
| | pil_a = pil_a.resize((w0, h0), resample=Image.BILINEAR) |
| | alpha_u8 = np.array(pil_a, dtype=np.uint8) |
| |
|
| | |
| | depth_rgb = apply_alpha_then_black_background(depth_rgb, alpha_u8) |
| | log("[SaliaDepth] Applied RGBA post-step (alpha + black background).") |
| |
|
| | outs.append(u8_to_comfy_tensor(depth_rgb)) |
| |
|
| | except Exception as e: |
| | log(f"[SaliaDepth] ERROR: Inference failed at batch index {i}: {repr(e)}") |
| | log("[SaliaDepth] Passing through original input image for this batch item.") |
| | outs.append(image[i].unsqueeze(0)) |
| |
|
| | out = torch.cat(outs, dim=0) |
| | log("[SaliaDepth] Done.") |
| | return (out, "\n".join(lines)) |
| |
|
| |
|
| | NODE_CLASS_MAPPINGS = { |
| | "SaliaDepthPreprocessor": Salia_Depth_Preprocessor |
| | } |
| |
|
| | NODE_DISPLAY_NAME_MAPPINGS = { |
| | "SaliaDepthPreprocessor": "Salia Depth (local assets/depth + logs)" |
| | } |