saliacoel
/

MyCustomNodes

ONNX

Safetensors

depth_anything

Model card Files Files and versions

xet

Community

saliacoel commited on Feb 25

Commit

fc6c8da

verified ·

1 Parent(s): 0911006

Upload salia_detailer_ezpz.py

Browse files

Files changed (1) hide show

salia_detailer_ezpz.py +85 -57

salia_detailer_ezpz.py CHANGED Viewed

@@ -22,7 +22,7 @@ except Exception as e:
 # -------------------------------------------------------------------------------------
-# Global caches (lazy-load + don't load duplicates across multiple node instances)
 # -------------------------------------------------------------------------------------
 _CKPT_CACHE: Dict[str, Tuple[Any, Any, Any]] = {}
@@ -32,19 +32,19 @@ _CN_LOCK = threading.Lock()
 # -------------------------------------------------------------------------------------
-# Plugin root detection (robust against hyphen/underscore module naming)
 # -------------------------------------------------------------------------------------
 def _find_plugin_root() -> Path:
     """
     Walk upwards from this file until we find an 'assets' folder.
-    This works regardless of how Comfy names the python module.
     """
     here = Path(__file__).resolve()
-    for parent in [here.parent] + list(here.parents)[:10]:
         if (parent / "assets").is_dir():
             return parent
-    # fallback: typical layout nodes/<thisfile>.py -> plugin root is parent.parent
     return here.parent.parent
@@ -135,6 +135,48 @@ def _resize_mask_lanczos(mask: torch.Tensor, w: int, h: int) -> torch.Tensor:
     return torch.cat(outs, dim=0)
 # -------------------------------------------------------------------------------------
 # Core lazy loaders (checkpoint + controlnet), cached globally
 # -------------------------------------------------------------------------------------
@@ -202,10 +244,13 @@ def _safe_asset_path(asset_rel_path: str) -> Path:
     base = img_dir.resolve()
     rel = Path(asset_rel_path)
     if rel.is_absolute():
         raise ValueError("Absolute paths are not allowed for asset_image.")
     full = (base / rel).resolve()
     if base != full and base not in full.parents:
         raise ValueError(f"Invalid asset path (path traversal blocked): {asset_rel_path}")
@@ -222,37 +267,31 @@ def _load_asset_image_and_mask(asset_rel_path: str) -> Tuple[torch.Tensor, torch
     Returns (IMAGE, MASK) in ComfyUI formats.
     Mask semantics: match ComfyUI core LoadImage:
-      - If PNG has alpha: mask = 1 - alpha
-      - Else: mask = 0
     """
     p = _safe_asset_path(asset_rel_path)
     im = Image.open(p)
     im = ImageOps.exif_transpose(im)
-    had_alpha = ("A" in im.getbands())
     rgba = im.convert("RGBA")
     rgb = rgba.convert("RGB")
     rgb_arr = np.array(rgb).astype(np.float32) / 255.0  # [H,W,3]
     img_t = torch.from_numpy(rgb_arr)[None, ...]
-    if had_alpha:
-        alpha = np.array(rgba.getchannel("A")).astype(np.float32) / 255.0
-        mask = 1.0 - alpha
-    else:
-        h, w = rgb.size[1], rgb.size[0]
-        mask = np.zeros((h, w), dtype=np.float32)
     mask_t = torch.from_numpy(mask)[None, ...]
     return img_t, mask_t
 # -------------------------------------------------------------------------------------
-# Salia_Depth (INLINED: exact logic, no imports from other files)
 # -------------------------------------------------------------------------------------
-# Local model path: assets/depth
 MODEL_DIR = PLUGIN_ROOT / "assets" / "depth"
 MODEL_DIR.mkdir(parents=True, exist_ok=True)
@@ -264,6 +303,9 @@ REQUIRED_FILES = {
 ZOE_FALLBACK_REPO_ID = "Intel/zoedepth-nyu-kitti"
 def _have_required_files() -> bool:
     return all((MODEL_DIR / name).exists() for name in REQUIRED_FILES.keys())
@@ -420,26 +462,24 @@ def u8_to_comfy_tensor(img_u8: np.ndarray) -> torch.Tensor:
     return t.unsqueeze(0)  # [1,H,W,C]
-_PIPE_CACHE: Dict[Tuple[str, str], Any] = {}  # (model_source, device_str) -> pipeline
 def _try_load_pipeline(model_source: str, device: torch.device):
     if pipeline is None:
         raise RuntimeError(f"transformers import failed: {_TRANSFORMERS_IMPORT_ERROR}")
     key = (model_source, str(device))
-    if key in _PIPE_CACHE:
-        return _PIPE_CACHE[key]
-    p = pipeline(task="depth-estimation", model=model_source)
-    try:
-        p.model = p.model.to(device)
-        p.device = device
-    except Exception:
-        pass
-    _PIPE_CACHE[key] = p
-    return p
 def get_depth_pipeline(device: torch.device):
@@ -511,28 +551,24 @@ def resize_to_original(depth_rgb_u8: np.ndarray, w0: int, h0: int) -> np.ndarray
 def _salia_depth_execute(image: torch.Tensor, resolution: int = -1) -> torch.Tensor:
     """
-    Internal callable version of your Salia_Depth node:
       input: IMAGE [B,H,W,3 or 4]
       output: IMAGE [B,H,W,3]
     """
-    # Get torch device
     try:
         device = model_management.get_torch_device()
     except Exception:
         device = torch.device("cpu")
-    # Load pipeline
-    pipe = None
     try:
-        pipe = get_depth_pipeline(device)
     except Exception:
-        pipe = None
-    # If everything fails, pass-through
-    if pipe is None:
         return image
-    # Batch support
     if image.ndim == 3:
         image = image.unsqueeze(0)
@@ -544,22 +580,18 @@ def _salia_depth_execute(image: torch.Tensor, resolution: int = -1) -> torch.Ten
             inp_u8 = comfy_tensor_to_u8(image[i])
-            # RGBA rule (pre)
             rgb_for_depth, alpha_u8 = composite_rgba_over_white_keep_alpha(inp_u8)
             had_rgba = alpha_u8 is not None
-            # Depth
             depth_rgb = depth_estimate_zoe_style(
-                pipe=pipe,
                 input_rgb_u8=rgb_for_depth,
                 detect_resolution=int(resolution),
                 upscale_method="INTER_CUBIC",
             )
-            # Resize back to original size
             depth_rgb = resize_to_original(depth_rgb, w0=w0, h0=h0)
-            # RGBA rule (post)
             if had_rgba:
                 if alpha_u8.shape[0] != h0 or alpha_u8.shape[1] != w0:
                     try:
@@ -602,7 +634,6 @@ def _alpha_over_region(base: torch.Tensor, overlay_rgba: torch.Tensor, x: int, y
     if x < 0 or y < 0 or x + s > W or y + s > H:
         raise ValueError(f"Square paste out of bounds. base={W}x{H}, paste at ({x},{y}) size={s}")
-    # Match batch
     if b2 != B:
         if b2 == 1 and B > 1:
             overlay_rgba = overlay_rgba.expand(B, -1, -1, -1)
@@ -618,7 +649,6 @@ def _alpha_over_region(base: torch.Tensor, overlay_rgba: torch.Tensor, x: int, y
     comp_rgb = overlay_rgb * overlay_a + base_rgb * (1.0 - overlay_a)
     out[:, y:y + s, x:x + s, 0:3] = comp_rgb
-    # If base has alpha, composite alpha too
     if C == 4:
         base_a = out[:, y:y + s, x:x + s, 3:4].clamp(0, 1)
         comp_a = overlay_a + base_a * (1.0 - overlay_a)
@@ -703,6 +733,7 @@ class Salia_OneNode_WorkflowSquare:
         scheduler: str,
         denoise: float,
     ):
         if image.ndim == 3:
             image = image.unsqueeze(0)
         if image.ndim != 4:
@@ -719,6 +750,7 @@ class Salia_OneNode_WorkflowSquare:
         up = int(upscale_factor)
         if up not in (1, 2, 4, 6, 8, 10, 12, 14, 16):
             raise ValueError("upscale_factor must be one of: 1,2,4,6,8,10,12,14,16")
         if s <= 0:
             raise ValueError("square_size must be > 0")
         if x < 0 or y < 0 or x + s > W or y + s > H:
@@ -727,6 +759,7 @@ class Salia_OneNode_WorkflowSquare:
         up_w = s * up
         up_h = s * up
         if (up_w % 8) != 0 or (up_h % 8) != 0:
             raise ValueError("square_size * upscale_factor must be divisible by 8 (required by VAE pipeline).")
@@ -735,18 +768,18 @@ class Salia_OneNode_WorkflowSquare:
         if end_p < start_p:
             start_p, end_p = end_p, start_p
-        # 1) Crop
         crop = image[:, y:y + s, x:x + s, :]
         crop_rgb = crop[:, :, :, 0:3].contiguous()
-        # 2) Depth (inline Salia_Depth) then Lanczos upscale
         depth_small = _salia_depth_execute(crop_rgb, resolution=s)
         depth_up = _resize_image_lanczos(depth_small, up_w, up_h)
-        # 3) Upscale crop for VAE Encode
         crop_up = _resize_image_lanczos(crop_rgb, up_w, up_h)
-        # 4) Load asset mask (inline) and resize
         if asset_image == "<no pngs found>":
             raise FileNotFoundError("No PNGs found in assets/images for this plugin.")
         _asset_img_unused, asset_mask = _load_asset_image_and_mask(asset_image)
@@ -826,18 +859,13 @@ class Salia_OneNode_WorkflowSquare:
             latent_image=latent,
         )
-        # 10) VAE Decode
         vae_dec = nodes.VAEDecode()
         vae_dec_fn = getattr(vae_dec, vae_dec.FUNCTION)
         (decoded_rgb,) = vae_dec_fn(samples=sampled_latent, vae=vae)
-        # 11) JoinImageWithAlpha
-        join = nodes.JoinImageWithAlpha()
-        join_fn = getattr(join, join.FUNCTION)
-        try:
-            (rgba_up,) = join_fn(image=decoded_rgb, alpha=asset_mask_up)
-        except TypeError:
-            (rgba_up,) = join_fn(image=decoded_rgb, mask=asset_mask_up)
         # 12) Downscale RGBA back to crop size
         rgba_square = _resize_image_lanczos(rgba_up, s, s)

 # -------------------------------------------------------------------------------------
+# Global caches (checkpoint + controlnet) so using the node multiple times won't reload
 # -------------------------------------------------------------------------------------
 _CKPT_CACHE: Dict[str, Tuple[Any, Any, Any]] = {}
 # -------------------------------------------------------------------------------------
+# Plugin root detection (works whether file is in plugin root or nodes/)
 # -------------------------------------------------------------------------------------
 def _find_plugin_root() -> Path:
     """
     Walk upwards from this file until we find an 'assets' folder.
+    Robust against hyphen/underscore package naming and different file placement.
     """
     here = Path(__file__).resolve()
+    for parent in [here.parent] + list(here.parents)[:12]:
         if (parent / "assets").is_dir():
             return parent
+    # fallback: typical nodes/<file>.py
     return here.parent.parent
     return torch.cat(outs, dim=0)
+# -------------------------------------------------------------------------------------
+# ✅ ComfyUI 0.5.1 FIX: Manual JoinImageWithAlpha equivalent
+# -------------------------------------------------------------------------------------
+def _rgb_to_rgba_with_comfy_mask(rgb: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
+    """
+    Make RGBA from:
+      rgb:  IMAGE [B,H,W,3] float [0..1]
+      mask: MASK  [B,H,W]   float [0..1]  (Comfy convention: 1=masked/transparent)
+    Output:
+      rgba: IMAGE [B,H,W,4] where alpha = 1 - mask  (1=opaque, 0=transparent)
+    """
+    if rgb.ndim == 3:
+        rgb = rgb.unsqueeze(0)
+    if mask.ndim == 2:
+        mask = mask.unsqueeze(0)
+    if rgb.ndim != 4 or rgb.shape[-1] != 3:
+        raise ValueError(f"rgb must be [B,H,W,3], got {tuple(rgb.shape)}")
+    if mask.ndim != 3:
+        raise ValueError(f"mask must be [B,H,W], got {tuple(mask.shape)}")
+    # Batch match
+    if mask.shape[0] != rgb.shape[0]:
+        if mask.shape[0] == 1 and rgb.shape[0] > 1:
+            mask = mask.expand(rgb.shape[0], -1, -1)
+        else:
+            raise ValueError("Batch mismatch between rgb and mask.")
+    # Size match
+    if mask.shape[1] != rgb.shape[1] or mask.shape[2] != rgb.shape[2]:
+        raise ValueError(
+            f"Mask size mismatch. rgb={rgb.shape[2]}x{rgb.shape[1]} mask={mask.shape[2]}x{mask.shape[1]}"
+        )
+    mask = mask.to(device=rgb.device, dtype=rgb.dtype).clamp(0, 1)
+    alpha = (1.0 - mask).unsqueeze(-1).clamp(0, 1)  # [B,H,W,1]
+    rgba = torch.cat([rgb.clamp(0, 1), alpha], dim=-1)  # [B,H,W,4]
+    return rgba
 # -------------------------------------------------------------------------------------
 # Core lazy loaders (checkpoint + controlnet), cached globally
 # -------------------------------------------------------------------------------------
     base = img_dir.resolve()
     rel = Path(asset_rel_path)
     if rel.is_absolute():
         raise ValueError("Absolute paths are not allowed for asset_image.")
     full = (base / rel).resolve()
+    # path traversal protection
     if base != full and base not in full.parents:
         raise ValueError(f"Invalid asset path (path traversal blocked): {asset_rel_path}")
     Returns (IMAGE, MASK) in ComfyUI formats.
     Mask semantics: match ComfyUI core LoadImage:
+      - alpha is RGBA alpha channel normalized to [0..1]
+      - mask = 1 - alpha
     """
     p = _safe_asset_path(asset_rel_path)
     im = Image.open(p)
     im = ImageOps.exif_transpose(im)
     rgba = im.convert("RGBA")
     rgb = rgba.convert("RGB")
     rgb_arr = np.array(rgb).astype(np.float32) / 255.0  # [H,W,3]
     img_t = torch.from_numpy(rgb_arr)[None, ...]
+    alpha = np.array(rgba.getchannel("A")).astype(np.float32) / 255.0  # [H,W]
+    mask = 1.0 - alpha  # Comfy MASK convention
     mask_t = torch.from_numpy(mask)[None, ...]
     return img_t, mask_t
 # -------------------------------------------------------------------------------------
+# Salia_Depth (INLINED, no imports from other files)
 # -------------------------------------------------------------------------------------
 MODEL_DIR = PLUGIN_ROOT / "assets" / "depth"
 MODEL_DIR.mkdir(parents=True, exist_ok=True)
 ZOE_FALLBACK_REPO_ID = "Intel/zoedepth-nyu-kitti"
+_PIPE_CACHE: Dict[Tuple[str, str], Any] = {}  # (model_source, device_str) -> pipeline
+_PIPE_LOCK = threading.Lock()
 def _have_required_files() -> bool:
     return all((MODEL_DIR / name).exists() for name in REQUIRED_FILES.keys())
     return t.unsqueeze(0)  # [1,H,W,C]
 def _try_load_pipeline(model_source: str, device: torch.device):
     if pipeline is None:
         raise RuntimeError(f"transformers import failed: {_TRANSFORMERS_IMPORT_ERROR}")
     key = (model_source, str(device))
+    with _PIPE_LOCK:
+        if key in _PIPE_CACHE:
+            return _PIPE_CACHE[key]
+        p = pipeline(task="depth-estimation", model=model_source)
+        try:
+            p.model = p.model.to(device)
+            p.device = device
+        except Exception:
+            pass
+        _PIPE_CACHE[key] = p
+        return p
 def get_depth_pipeline(device: torch.device):
 def _salia_depth_execute(image: torch.Tensor, resolution: int = -1) -> torch.Tensor:
     """
+    Internal callable version of Salia_Depth:
       input: IMAGE [B,H,W,3 or 4]
       output: IMAGE [B,H,W,3]
     """
     try:
         device = model_management.get_torch_device()
     except Exception:
         device = torch.device("cpu")
+    pipe_obj = None
     try:
+        pipe_obj = get_depth_pipeline(device)
     except Exception:
+        pipe_obj = None
+    if pipe_obj is None:
         return image
     if image.ndim == 3:
         image = image.unsqueeze(0)
             inp_u8 = comfy_tensor_to_u8(image[i])
             rgb_for_depth, alpha_u8 = composite_rgba_over_white_keep_alpha(inp_u8)
             had_rgba = alpha_u8 is not None
             depth_rgb = depth_estimate_zoe_style(
+                pipe=pipe_obj,
                 input_rgb_u8=rgb_for_depth,
                 detect_resolution=int(resolution),
                 upscale_method="INTER_CUBIC",
             )
             depth_rgb = resize_to_original(depth_rgb, w0=w0, h0=h0)
             if had_rgba:
                 if alpha_u8.shape[0] != h0 or alpha_u8.shape[1] != w0:
                     try:
     if x < 0 or y < 0 or x + s > W or y + s > H:
         raise ValueError(f"Square paste out of bounds. base={W}x{H}, paste at ({x},{y}) size={s}")
     if b2 != B:
         if b2 == 1 and B > 1:
             overlay_rgba = overlay_rgba.expand(B, -1, -1, -1)
     comp_rgb = overlay_rgb * overlay_a + base_rgb * (1.0 - overlay_a)
     out[:, y:y + s, x:x + s, 0:3] = comp_rgb
     if C == 4:
         base_a = out[:, y:y + s, x:x + s, 3:4].clamp(0, 1)
         comp_a = overlay_a + base_a * (1.0 - overlay_a)
         scheduler: str,
         denoise: float,
     ):
+        # Normalize input to [B,H,W,C]
         if image.ndim == 3:
             image = image.unsqueeze(0)
         if image.ndim != 4:
         up = int(upscale_factor)
         if up not in (1, 2, 4, 6, 8, 10, 12, 14, 16):
             raise ValueError("upscale_factor must be one of: 1,2,4,6,8,10,12,14,16")
         if s <= 0:
             raise ValueError("square_size must be > 0")
         if x < 0 or y < 0 or x + s > W or y + s > H:
         up_w = s * up
         up_h = s * up
+        # VAE/UNet path likes multiples of 8
         if (up_w % 8) != 0 or (up_h % 8) != 0:
             raise ValueError("square_size * upscale_factor must be divisible by 8 (required by VAE pipeline).")
         if end_p < start_p:
             start_p, end_p = end_p, start_p
+        # 1) Crop square
         crop = image[:, y:y + s, x:x + s, :]
         crop_rgb = crop[:, :, :, 0:3].contiguous()
+        # 2) Depth (inline Salia_Depth) then upscale with Lanczos
         depth_small = _salia_depth_execute(crop_rgb, resolution=s)
         depth_up = _resize_image_lanczos(depth_small, up_w, up_h)
+        # 3) Upscale crop for VAE encode
         crop_up = _resize_image_lanczos(crop_rgb, up_w, up_h)
+        # 4) Load asset mask and resize
         if asset_image == "<no pngs found>":
             raise FileNotFoundError("No PNGs found in assets/images for this plugin.")
         _asset_img_unused, asset_mask = _load_asset_image_and_mask(asset_image)
             latent_image=latent,
         )
+        # 10) VAE Decode -> RGB
         vae_dec = nodes.VAEDecode()
         vae_dec_fn = getattr(vae_dec, vae_dec.FUNCTION)
         (decoded_rgb,) = vae_dec_fn(samples=sampled_latent, vae=vae)
+        # 11) ✅ Manual "JoinImageWithAlpha"
+        rgba_up = _rgb_to_rgba_with_comfy_mask(decoded_rgb, asset_mask_up)
         # 12) Downscale RGBA back to crop size
         rgba_square = _resize_image_lanczos(rgba_up, s, s)