saliacoel
/

MyCustomNodes

ONNX

Safetensors

depth_anything

Model card Files Files and versions

xet

Community

saliacoel commited on Feb 25

Commit

0911006

verified ·

1 Parent(s): 41e34bb

Upload salia_detailer_ezpz.py

Browse files

Files changed (1) hide show

salia_detailer_ezpz.py +403 -162

salia_detailer_ezpz.py CHANGED Viewed

@@ -1,12 +1,24 @@
 import hashlib
 import threading
 from typing import Any, Dict, Tuple, Optional
-import torch
 import numpy as np
 from PIL import Image, ImageOps
 import folder_paths
 # -------------------------------------------------------------------------------------
@@ -19,12 +31,31 @@ _CKPT_LOCK = threading.Lock()
 _CN_LOCK = threading.Lock()
 # -------------------------------------------------------------------------------------
 # PIL helpers (Lanczos resize for IMAGE and MASK)
 # -------------------------------------------------------------------------------------
 def _pil_lanczos():
-    # Pillow compatibility
     if hasattr(Image, "Resampling"):
         return Image.Resampling.LANCZOS
     return Image.LANCZOS
@@ -32,14 +63,12 @@ def _pil_lanczos():
 def _image_tensor_to_pil(img: torch.Tensor) -> Image.Image:
     """
-    Comfy IMAGE: [B,H,W,C] or [H,W,C], float [0..1]
-    -> PIL RGB/RGBA
     """
     if img.ndim == 4:
         img = img[0]
     img = img.detach().cpu().float().clamp(0, 1)
     arr = (img.numpy() * 255.0).round().astype(np.uint8)
     if arr.shape[-1] == 4:
         return Image.fromarray(arr, mode="RGBA")
     return Image.fromarray(arr, mode="RGB")
@@ -80,11 +109,10 @@ def _pil_to_mask_tensor(pil_l: Image.Image) -> torch.Tensor:
 def _resize_image_lanczos(img: torch.Tensor, w: int, h: int) -> torch.Tensor:
     """
-    Resize Comfy IMAGE [B,H,W,C] with Lanczos via PIL, preserving channels.
     """
     if img.ndim != 4:
         raise ValueError("Expected IMAGE tensor with shape [B,H,W,C].")
     outs = []
     for i in range(img.shape[0]):
         pil = _image_tensor_to_pil(img[i].unsqueeze(0))
@@ -99,7 +127,6 @@ def _resize_mask_lanczos(mask: torch.Tensor, w: int, h: int) -> torch.Tensor:
     """
     if mask.ndim != 3:
         raise ValueError("Expected MASK tensor with shape [B,H,W].")
     outs = []
     for i in range(mask.shape[0]):
         pil = _mask_tensor_to_pil(mask[i].unsqueeze(0))
@@ -121,7 +148,7 @@ def _load_checkpoint_cached(ckpt_name: str):
         if ckpt_name in _CKPT_CACHE:
             return _CKPT_CACHE[ckpt_name]
-        import nodes  # lazy
         loader = nodes.CheckpointLoaderSimple()
         fn = getattr(loader, loader.FUNCTION)
         model, clip, vae = fn(ckpt_name=ckpt_name)
@@ -139,7 +166,7 @@ def _load_controlnet_cached(control_net_name: str):
         if control_net_name in _CN_CACHE:
             return _CN_CACHE[control_net_name]
-        import nodes  # lazy
         loader = nodes.ControlNetLoader()
         fn = getattr(loader, loader.FUNCTION)
         (cn,) = fn(control_net_name=control_net_name)
@@ -149,110 +176,60 @@ def _load_controlnet_cached(control_net_name: str):
 # -------------------------------------------------------------------------------------
-# Assets/images dropdown + loader (INLINED, no LoadImage_SaliaOnline_Assets dependency)
 # -------------------------------------------------------------------------------------
-_ASSETS_DIR_CACHE: Optional["object"] = None
-_ASSETS_DIR_LOCK = threading.Lock()
-def _find_assets_images_dir():
-    """
-    Find the plugin's assets/images folder by walking upward from this file.
-    This is robust even if Comfy imports modules in weird ways.
-    """
-    from pathlib import Path
-    here = Path(__file__).resolve()
-    # check a few levels up; plugin root should be near
-    for parent in [here.parent] + list(here.parents)[:8]:
-        candidate = parent / "assets" / "images"
-        if candidate.is_dir():
-            return candidate
-    return None
-def _assets_images_dir():
-    global _ASSETS_DIR_CACHE
-    with _ASSETS_DIR_LOCK:
-        if _ASSETS_DIR_CACHE is not None:
-            # If it was found once, reuse.
-            try:
-                if _ASSETS_DIR_CACHE.is_dir():
-                    return _ASSETS_DIR_CACHE
-            except Exception:
-                pass
-        found = _find_assets_images_dir()
-        _ASSETS_DIR_CACHE = found
-        return found
-def _list_asset_pngs():
-    """
-    List PNGs inside assets/images (recursive), returning paths relative to assets/images.
-    """
     img_dir = _assets_images_dir()
-    if img_dir is None:
         return []
     files = []
-    try:
-        for p in img_dir.rglob("*"):
-            if p.is_file() and p.suffix.lower() == ".png":
-                rel = p.relative_to(img_dir).as_posix()
-                files.append(rel)
-        files.sort()
-        return files
-    except Exception:
-        return []
-def _safe_asset_path(asset_rel_path: str):
-    """
-    Resolve a selected dropdown entry to an actual file path inside assets/images.
-    Prevents path traversal.
-    """
-    from pathlib import Path
     img_dir = _assets_images_dir()
-    if img_dir is None:
-        raise FileNotFoundError("assets/images folder not found (could not locate plugin assets).")
     base = img_dir.resolve()
     rel = Path(asset_rel_path)
     if rel.is_absolute():
         raise ValueError("Absolute paths are not allowed for asset_image.")
-    # Resolve and verify containment
     full = (base / rel).resolve()
     if base != full and base not in full.parents:
         raise ValueError(f"Invalid asset path (path traversal blocked): {asset_rel_path}")
     if not full.is_file():
         raise FileNotFoundError(f"Asset PNG not found in assets/images: {asset_rel_path}")
     if full.suffix.lower() != ".png":
         raise ValueError(f"Asset is not a PNG: {asset_rel_path}")
     return full
-def _load_asset_image_and_mask(asset_rel_path: str):
     """
-    Load PNG from assets/images and return (IMAGE, MASK) in ComfyUI formats.
-    IMPORTANT: Mask semantics match ComfyUI core LoadImage:
       - If PNG has alpha: mask = 1 - alpha
-      - If no alpha: mask = 0 (opaque)
     """
     p = _safe_asset_path(asset_rel_path)
     im = Image.open(p)
     im = ImageOps.exif_transpose(im)
-    # Ensure we can extract alpha if present
     had_alpha = ("A" in im.getbands())
     rgba = im.convert("RGBA")
     rgb = rgba.convert("RGB")
@@ -261,8 +238,8 @@ def _load_asset_image_and_mask(asset_rel_path: str):
     img_t = torch.from_numpy(rgb_arr)[None, ...]
     if had_alpha:
-        alpha = np.array(rgba.getchannel("A")).astype(np.float32) / 255.0  # [H,W], 1=opaque
-        mask = 1.0 - alpha  # Comfy MASK convention
     else:
         h, w = rgb.size[1], rgb.size[0]
         mask = np.zeros((h, w), dtype=np.float32)
@@ -272,20 +249,334 @@ def _load_asset_image_and_mask(asset_rel_path: str):
 # -------------------------------------------------------------------------------------
-# Salia_Depth (still lazy import, unchanged)
 # -------------------------------------------------------------------------------------
-def _run_salia_depth(image: torch.Tensor, resolution: int) -> torch.Tensor:
     """
-    Lazy-import and run your Salia_Depth node.
-    Returns IMAGE (depth)
     """
-    from .salia_depth import Salia_Depth  # heavy -> lazy import here
-    node = Salia_Depth()
-    fn = getattr(node, node.FUNCTION)
-    (depth_img,) = fn(image=image, resolution=int(resolution))
-    return depth_img
 # -------------------------------------------------------------------------------------
@@ -327,7 +618,7 @@ def _alpha_over_region(base: torch.Tensor, overlay_rgba: torch.Tensor, x: int, y
     comp_rgb = overlay_rgb * overlay_a + base_rgb * (1.0 - overlay_a)
     out[:, y:y + s, x:x + s, 0:3] = comp_rgb
-    # If base has alpha, composite alpha too (optional)
     if C == 4:
         base_a = out[:, y:y + s, x:x + s, 3:4].clamp(0, 1)
         comp_a = overlay_a + base_a * (1.0 - overlay_a)
@@ -340,11 +631,7 @@ def _alpha_over_region(base: torch.Tensor, overlay_rgba: torch.Tensor, x: int, y
 # The One-Node Workflow
 # -------------------------------------------------------------------------------------
-class Salia_Detailer_EZPZ:
-    """
-    One node that replicates the workflow you described.
-    """
     CATEGORY = "image/salia"
     RETURN_TYPES = ("IMAGE",)
     RETURN_NAMES = ("image",)
@@ -352,12 +639,10 @@ class Salia_Detailer_EZPZ:
     @classmethod
     def INPUT_TYPES(cls):
-        # Dropdowns
         ckpts = folder_paths.get_filename_list("checkpoints") or ["<no checkpoints found>"]
         cns = folder_paths.get_filename_list("controlnet") or ["<no controlnets found>"]
         assets = _list_asset_pngs() or ["<no pngs found>"]
-        # KSampler dropdowns (match comfy-core)
         try:
             import comfy.samplers
             sampler_names = comfy.samplers.KSampler.SAMPLERS
@@ -366,7 +651,6 @@ class Salia_Detailer_EZPZ:
             sampler_names = ["euler"]
             scheduler_names = ["karras"]
-        # Upscale dropdown as requested
         upscale_choices = ["1", "2", "4", "6", "8", "10", "12", "14", "16"]
         return {
@@ -382,17 +666,14 @@ class Salia_Detailer_EZPZ:
                 "upscale_factor": (upscale_choices, {"default": "4"}),
-                # 3 dropdown menus you requested
                 "ckpt_name": (ckpts, {}),
                 "control_net_name": (cns, {}),
                 "asset_image": (assets, {}),
-                # ControlNet params
                 "controlnet_strength": ("FLOAT", {"default": 0.33, "min": 0.00, "max": 10.00, "step": 0.01}),
                 "controlnet_start_percent": ("FLOAT", {"default": 0.00, "min": 0.00, "max": 1.00, "step": 0.01}),
                 "controlnet_end_percent": ("FLOAT", {"default": 1.00, "min": 0.00, "max": 1.00, "step": 0.01}),
-                # KSampler params
                 "steps": ("INT", {"default": 30, "min": 1, "max": 200, "step": 1}),
                 "cfg": ("FLOAT", {"default": 2.6, "min": 0.00, "max": 10.00, "step": 0.05}),
                 "sampler_name": (sampler_names, {"default": "euler"} if "euler" in sampler_names else {}),
@@ -409,7 +690,7 @@ class Salia_Detailer_EZPZ:
         square_size: int,
         positive_prompt: str,
         negative_prompt: str,
-        upscale_factor: str,  # dropdown returns str
         ckpt_name: str,
         control_net_name: str,
         asset_image: str,
@@ -422,12 +703,8 @@ class Salia_Detailer_EZPZ:
         scheduler: str,
         denoise: float,
     ):
-        # -------------------------
-        # Validate / normalize
-        # -------------------------
         if image.ndim == 3:
             image = image.unsqueeze(0)
         if image.ndim != 4:
             raise ValueError("Input image must be [B,H,W,C].")
@@ -442,56 +719,43 @@ class Salia_Detailer_EZPZ:
         up = int(upscale_factor)
         if up not in (1, 2, 4, 6, 8, 10, 12, 14, 16):
             raise ValueError("upscale_factor must be one of: 1,2,4,6,8,10,12,14,16")
         if s <= 0:
             raise ValueError("square_size must be > 0")
         if x < 0 or y < 0 or x + s > W or y + s > H:
             raise ValueError(f"Crop out of bounds. image={W}x{H}, crop at ({x},{y}) size={s}")
         up_w = s * up
         up_h = s * up
-        # VAE/UNet path is happiest with multiples of 8
         if (up_w % 8) != 0 or (up_h % 8) != 0:
             raise ValueError("square_size * upscale_factor must be divisible by 8 (required by VAE pipeline).")
-        # Clamp controlnet percent range
         start_p = float(max(0.0, min(1.0, controlnet_start_percent)))
         end_p = float(max(0.0, min(1.0, controlnet_end_percent)))
         if end_p < start_p:
             start_p, end_p = end_p, start_p
-        # -------------------------
-        # 1) Crop square (we use it twice internally)
-        # -------------------------
         crop = image[:, y:y + s, x:x + s, :]
-        crop_rgb = crop[:, :, :, 0:3].contiguous()  # force RGB for model/depth
-        # -------------------------
-        # 2) Depth path: Salia_Depth(crop) then upscale depth with Lanczos
-        # -------------------------
-        depth_small = _run_salia_depth(crop_rgb, resolution=s)
         depth_up = _resize_image_lanczos(depth_small, up_w, up_h)
-        # -------------------------
-        # 3) Generation path: upscale crop with Lanczos then VAE Encode
-        # -------------------------
         crop_up = _resize_image_lanczos(crop_rgb, up_w, up_h)
-        # -------------------------
-        # 4) Load asset mask (INLINE assets loader) and resize to match upscaled resolution
-        # -------------------------
         if asset_image == "<no pngs found>":
             raise FileNotFoundError("No PNGs found in assets/images for this plugin.")
-        _asset_img_unused, asset_mask = _load_asset_image_and_mask(asset_image)  # MASK is what we need
         if asset_mask.ndim == 2:
             asset_mask = asset_mask.unsqueeze(0)
         if asset_mask.ndim != 3:
             raise ValueError("Asset mask must be [B,H,W].")
-        # Match batch
         if asset_mask.shape[0] != B:
             if asset_mask.shape[0] == 1 and B > 1:
                 asset_mask = asset_mask.expand(B, -1, -1)
@@ -500,37 +764,28 @@ class Salia_Detailer_EZPZ:
         asset_mask_up = _resize_mask_lanczos(asset_mask, up_w, up_h)
-        # -------------------------
-        # 5) Load checkpoint + controlnet (lazy + cached)
-        # -------------------------
         if ckpt_name == "<no checkpoints found>":
-            raise FileNotFoundError("No checkpoints found in your ComfyUI models/checkpoints folder.")
         if control_net_name == "<no controlnets found>":
-            raise FileNotFoundError("No controlnets found in your ComfyUI models/controlnet folder.")
         model, clip, vae = _load_checkpoint_cached(ckpt_name)
         controlnet = _load_controlnet_cached(control_net_name)
-        # -------------------------
-        # 6) Encode prompts (CLIPTextEncode)
-        # -------------------------
-        import nodes  # lazy
         pos_enc = nodes.CLIPTextEncode()
         neg_enc = nodes.CLIPTextEncode()
         pos_fn = getattr(pos_enc, pos_enc.FUNCTION)
         neg_fn = getattr(neg_enc, neg_enc.FUNCTION)
         (pos_cond,) = pos_fn(text=str(positive_prompt), clip=clip)
         (neg_cond,) = neg_fn(text=str(negative_prompt), clip=clip)
-        # -------------------------
-        # 7) Apply ControlNet (ControlNetApplyAdvanced)
-        # -------------------------
         cn_apply = nodes.ControlNetApplyAdvanced()
         cn_fn = getattr(cn_apply, cn_apply.FUNCTION)
         pos_cn, neg_cn = cn_fn(
             strength=float(controlnet_strength),
             start_percent=float(start_p),
@@ -542,16 +797,12 @@ class Salia_Detailer_EZPZ:
             vae=vae,
         )
-        # -------------------------
-        # 8) VAE Encode (crop_up) -> latent
-        # -------------------------
         vae_enc = nodes.VAEEncode()
         vae_enc_fn = getattr(vae_enc, vae_enc.FUNCTION)
         (latent,) = vae_enc_fn(pixels=crop_up, vae=vae)
-        # -------------------------
-        # 9) KSampler
-        # -------------------------
         seed_material = (
             f"{ckpt_name}|{control_net_name}|{asset_image}|{x}|{y}|{s}|{up}|"
             f"{steps}|{cfg}|{sampler_name}|{scheduler}|{denoise}|"
@@ -575,41 +826,31 @@ class Salia_Detailer_EZPZ:
             latent_image=latent,
         )
-        # -------------------------
-        # 10) VAE Decode -> RGB image
-        # -------------------------
         vae_dec = nodes.VAEDecode()
         vae_dec_fn = getattr(vae_dec, vae_dec.FUNCTION)
         (decoded_rgb,) = vae_dec_fn(samples=sampled_latent, vae=vae)
-        # -------------------------
-        # 11) JoinImageWithAlpha (decoded_rgb + asset_mask_up) -> RGBA
-        # -------------------------
         join = nodes.JoinImageWithAlpha()
         join_fn = getattr(join, join.FUNCTION)
         try:
             (rgba_up,) = join_fn(image=decoded_rgb, alpha=asset_mask_up)
         except TypeError:
             (rgba_up,) = join_fn(image=decoded_rgb, mask=asset_mask_up)
-        # -------------------------
-        # 12) Downscale RGBA back to original crop resolution (square_size) with Lanczos
-        # -------------------------
         rgba_square = _resize_image_lanczos(rgba_up, s, s)
-        # -------------------------
-        # 13) Paste RGBA square onto original input image at X,Y using alpha-over
-        # -------------------------
         out = _alpha_over_region(image, rgba_square, x=x, y=y)
         return (out,)
 NODE_CLASS_MAPPINGS = {
-    "Salia_Detailer_EZPZ": Salia_Detailer_EZPZ,
 }
 NODE_DISPLAY_NAME_MAPPINGS = {
-    "Salia_Detailer_EZPZ": "Salia_Detailer_EZPZ",
 }

 import hashlib
+import shutil
 import threading
+import urllib.request
+from pathlib import Path
 from typing import Any, Dict, Tuple, Optional
 import numpy as np
+import torch
 from PIL import Image, ImageOps
 import folder_paths
+import comfy.model_management as model_management
+# transformers is required for depth-estimation pipeline
+try:
+    from transformers import pipeline
+except Exception as e:
+    pipeline = None
+    _TRANSFORMERS_IMPORT_ERROR = e
 # -------------------------------------------------------------------------------------
 _CN_LOCK = threading.Lock()
+# -------------------------------------------------------------------------------------
+# Plugin root detection (robust against hyphen/underscore module naming)
+# -------------------------------------------------------------------------------------
+def _find_plugin_root() -> Path:
+    """
+    Walk upwards from this file until we find an 'assets' folder.
+    This works regardless of how Comfy names the python module.
+    """
+    here = Path(__file__).resolve()
+    for parent in [here.parent] + list(here.parents)[:10]:
+        if (parent / "assets").is_dir():
+            return parent
+    # fallback: typical layout nodes/<thisfile>.py -> plugin root is parent.parent
+    return here.parent.parent
+PLUGIN_ROOT = _find_plugin_root()
 # -------------------------------------------------------------------------------------
 # PIL helpers (Lanczos resize for IMAGE and MASK)
 # -------------------------------------------------------------------------------------
 def _pil_lanczos():
     if hasattr(Image, "Resampling"):
         return Image.Resampling.LANCZOS
     return Image.LANCZOS
 def _image_tensor_to_pil(img: torch.Tensor) -> Image.Image:
     """
+    Comfy IMAGE: [B,H,W,C] or [H,W,C], float [0..1] -> PIL RGB/RGBA
     """
     if img.ndim == 4:
         img = img[0]
     img = img.detach().cpu().float().clamp(0, 1)
     arr = (img.numpy() * 255.0).round().astype(np.uint8)
     if arr.shape[-1] == 4:
         return Image.fromarray(arr, mode="RGBA")
     return Image.fromarray(arr, mode="RGB")
 def _resize_image_lanczos(img: torch.Tensor, w: int, h: int) -> torch.Tensor:
     """
+    Resize Comfy IMAGE [B,H,W,C] with Lanczos via PIL.
     """
     if img.ndim != 4:
         raise ValueError("Expected IMAGE tensor with shape [B,H,W,C].")
     outs = []
     for i in range(img.shape[0]):
         pil = _image_tensor_to_pil(img[i].unsqueeze(0))
     """
     if mask.ndim != 3:
         raise ValueError("Expected MASK tensor with shape [B,H,W].")
     outs = []
     for i in range(mask.shape[0]):
         pil = _mask_tensor_to_pil(mask[i].unsqueeze(0))
         if ckpt_name in _CKPT_CACHE:
             return _CKPT_CACHE[ckpt_name]
+        import nodes
         loader = nodes.CheckpointLoaderSimple()
         fn = getattr(loader, loader.FUNCTION)
         model, clip, vae = fn(ckpt_name=ckpt_name)
         if control_net_name in _CN_CACHE:
             return _CN_CACHE[control_net_name]
+        import nodes
         loader = nodes.ControlNetLoader()
         fn = getattr(loader, loader.FUNCTION)
         (cn,) = fn(control_net_name=control_net_name)
 # -------------------------------------------------------------------------------------
+# Assets/images dropdown + loader (inlined; no LoadImage_SaliaOnline_Assets dependency)
 # -------------------------------------------------------------------------------------
+def _assets_images_dir() -> Path:
+    return PLUGIN_ROOT / "assets" / "images"
+def _list_asset_pngs() -> list:
     img_dir = _assets_images_dir()
+    if not img_dir.is_dir():
         return []
     files = []
+    for p in img_dir.rglob("*"):
+        if p.is_file() and p.suffix.lower() == ".png":
+            files.append(p.relative_to(img_dir).as_posix())
+    files.sort()
+    return files
+def _safe_asset_path(asset_rel_path: str) -> Path:
     img_dir = _assets_images_dir()
+    if not img_dir.is_dir():
+        raise FileNotFoundError(f"assets/images folder not found: {img_dir}")
     base = img_dir.resolve()
     rel = Path(asset_rel_path)
     if rel.is_absolute():
         raise ValueError("Absolute paths are not allowed for asset_image.")
     full = (base / rel).resolve()
     if base != full and base not in full.parents:
         raise ValueError(f"Invalid asset path (path traversal blocked): {asset_rel_path}")
     if not full.is_file():
         raise FileNotFoundError(f"Asset PNG not found in assets/images: {asset_rel_path}")
     if full.suffix.lower() != ".png":
         raise ValueError(f"Asset is not a PNG: {asset_rel_path}")
     return full
+def _load_asset_image_and_mask(asset_rel_path: str) -> Tuple[torch.Tensor, torch.Tensor]:
     """
+    Returns (IMAGE, MASK) in ComfyUI formats.
+    Mask semantics: match ComfyUI core LoadImage:
       - If PNG has alpha: mask = 1 - alpha
+      - Else: mask = 0
     """
     p = _safe_asset_path(asset_rel_path)
     im = Image.open(p)
     im = ImageOps.exif_transpose(im)
     had_alpha = ("A" in im.getbands())
     rgba = im.convert("RGBA")
     rgb = rgba.convert("RGB")
     img_t = torch.from_numpy(rgb_arr)[None, ...]
     if had_alpha:
+        alpha = np.array(rgba.getchannel("A")).astype(np.float32) / 255.0
+        mask = 1.0 - alpha
     else:
         h, w = rgb.size[1], rgb.size[0]
         mask = np.zeros((h, w), dtype=np.float32)
 # -------------------------------------------------------------------------------------
+# Salia_Depth (INLINED: exact logic, no imports from other files)
 # -------------------------------------------------------------------------------------
+# Local model path: assets/depth
+MODEL_DIR = PLUGIN_ROOT / "assets" / "depth"
+MODEL_DIR.mkdir(parents=True, exist_ok=True)
+REQUIRED_FILES = {
+    "config.json": "https://huggingface.co/saliacoel/depth/resolve/main/config.json",
+    "model.safetensors": "https://huggingface.co/saliacoel/depth/resolve/main/model.safetensors",
+    "preprocessor_config.json": "https://huggingface.co/saliacoel/depth/resolve/main/preprocessor_config.json",
+}
+ZOE_FALLBACK_REPO_ID = "Intel/zoedepth-nyu-kitti"
+def _have_required_files() -> bool:
+    return all((MODEL_DIR / name).exists() for name in REQUIRED_FILES.keys())
+def _download_url_to_file(url: str, dst: Path, timeout: int = 180) -> None:
+    dst.parent.mkdir(parents=True, exist_ok=True)
+    tmp = dst.with_suffix(dst.suffix + ".tmp")
+    if tmp.exists():
+        try:
+            tmp.unlink()
+        except Exception:
+            pass
+    req = urllib.request.Request(url, headers={"User-Agent": "ComfyUI-SaliaDepth/1.1"})
+    with urllib.request.urlopen(req, timeout=timeout) as r, open(tmp, "wb") as f:
+        shutil.copyfileobj(r, f)
+    tmp.replace(dst)
+def ensure_local_model_files() -> bool:
+    if _have_required_files():
+        return True
+    try:
+        for fname, url in REQUIRED_FILES.items():
+            fpath = MODEL_DIR / fname
+            if fpath.exists():
+                continue
+            _download_url_to_file(url, fpath)
+        return _have_required_files()
+    except Exception:
+        return False
+def HWC3(x: np.ndarray) -> np.ndarray:
+    assert x.dtype == np.uint8
+    if x.ndim == 2:
+        x = x[:, :, None]
+    assert x.ndim == 3
+    H, W, C = x.shape
+    assert C == 1 or C == 3 or C == 4
+    if C == 3:
+        return x
+    if C == 1:
+        return np.concatenate([x, x, x], axis=2)
+    # C == 4
+    color = x[:, :, 0:3].astype(np.float32)
+    alpha = x[:, :, 3:4].astype(np.float32) / 255.0
+    y = color * alpha + 255.0 * (1.0 - alpha)  # white background
+    y = y.clip(0, 255).astype(np.uint8)
+    return y
+def pad64(x: int) -> int:
+    return int(np.ceil(float(x) / 64.0) * 64 - x)
+def safer_memory(x: np.ndarray) -> np.ndarray:
+    return np.ascontiguousarray(x.copy()).copy()
+def resize_image_with_pad_min_side(
+    input_image: np.ndarray,
+    resolution: int,
+    upscale_method: str = "INTER_CUBIC",
+    skip_hwc3: bool = False,
+    mode: str = "edge",
+) -> Tuple[np.ndarray, Any]:
+    cv2 = None
+    try:
+        import cv2 as _cv2
+        cv2 = _cv2
+    except Exception:
+        cv2 = None
+    img = input_image if skip_hwc3 else HWC3(input_image)
+    H_raw, W_raw, _ = img.shape
+    if resolution <= 0:
+        return img, (lambda x: x)
+    k = float(resolution) / float(min(H_raw, W_raw))
+    H_target = int(np.round(float(H_raw) * k))
+    W_target = int(np.round(float(W_raw) * k))
+    if cv2 is not None:
+        upscale_methods = {
+            "INTER_NEAREST": cv2.INTER_NEAREST,
+            "INTER_LINEAR": cv2.INTER_LINEAR,
+            "INTER_AREA": cv2.INTER_AREA,
+            "INTER_CUBIC": cv2.INTER_CUBIC,
+            "INTER_LANCZOS4": cv2.INTER_LANCZOS4,
+        }
+        method = upscale_methods.get(upscale_method, cv2.INTER_CUBIC)
+        img = cv2.resize(img, (W_target, H_target), interpolation=method if k > 1 else cv2.INTER_AREA)
+    else:
+        pil = Image.fromarray(img)
+        resample = Image.BICUBIC if k > 1 else Image.LANCZOS
+        pil = pil.resize((W_target, H_target), resample=resample)
+        img = np.array(pil, dtype=np.uint8)
+    H_pad, W_pad = pad64(H_target), pad64(W_target)
+    img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode=mode)
+    def remove_pad(x: np.ndarray) -> np.ndarray:
+        return safer_memory(x[:H_target, :W_target, ...])
+    return safer_memory(img_padded), remove_pad
+def pad_only_to_64(img_u8: np.ndarray, mode: str = "edge") -> Tuple[np.ndarray, Any]:
+    img = HWC3(img_u8)
+    H_raw, W_raw, _ = img.shape
+    H_pad, W_pad = pad64(H_raw), pad64(W_raw)
+    img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode=mode)
+    def remove_pad(x: np.ndarray) -> np.ndarray:
+        return safer_memory(x[:H_raw, :W_raw, ...])
+    return safer_memory(img_padded), remove_pad
+def composite_rgba_over_white_keep_alpha(inp_u8: np.ndarray) -> Tuple[np.ndarray, Optional[np.ndarray]]:
+    if inp_u8.ndim == 3 and inp_u8.shape[2] == 4:
+        rgba = inp_u8.astype(np.uint8)
+        rgb = rgba[:, :, 0:3].astype(np.float32)
+        a = (rgba[:, :, 3:4].astype(np.float32) / 255.0)
+        rgb_white = (rgb * a + 255.0 * (1.0 - a)).clip(0, 255).astype(np.uint8)
+        alpha_u8 = rgba[:, :, 3].copy()
+        return rgb_white, alpha_u8
+    return HWC3(inp_u8), None
+def apply_alpha_then_black_background(depth_rgb_u8: np.ndarray, alpha_u8: np.ndarray) -> np.ndarray:
+    depth_rgb_u8 = HWC3(depth_rgb_u8)
+    a = (alpha_u8.astype(np.float32) / 255.0)[:, :, None]
+    out = (depth_rgb_u8.astype(np.float32) * a).clip(0, 255).astype(np.uint8)
+    return out
+def comfy_tensor_to_u8(img: torch.Tensor) -> np.ndarray:
+    if img.ndim == 4:
+        img = img[0]
+    arr = img.detach().cpu().float().clamp(0, 1).numpy()
+    u8 = (arr * 255.0).round().astype(np.uint8)
+    return u8
+def u8_to_comfy_tensor(img_u8: np.ndarray) -> torch.Tensor:
+    img_u8 = HWC3(img_u8)
+    t = torch.from_numpy(img_u8.astype(np.float32) / 255.0)
+    return t.unsqueeze(0)  # [1,H,W,C]
+_PIPE_CACHE: Dict[Tuple[str, str], Any] = {}  # (model_source, device_str) -> pipeline
+def _try_load_pipeline(model_source: str, device: torch.device):
+    if pipeline is None:
+        raise RuntimeError(f"transformers import failed: {_TRANSFORMERS_IMPORT_ERROR}")
+    key = (model_source, str(device))
+    if key in _PIPE_CACHE:
+        return _PIPE_CACHE[key]
+    p = pipeline(task="depth-estimation", model=model_source)
+    try:
+        p.model = p.model.to(device)
+        p.device = device
+    except Exception:
+        pass
+    _PIPE_CACHE[key] = p
+    return p
+def get_depth_pipeline(device: torch.device):
+    if ensure_local_model_files():
+        try:
+            return _try_load_pipeline(str(MODEL_DIR), device)
+        except Exception:
+            pass
+    try:
+        return _try_load_pipeline(ZOE_FALLBACK_REPO_ID, device)
+    except Exception:
+        return None
+def depth_estimate_zoe_style(
+    pipe,
+    input_rgb_u8: np.ndarray,
+    detect_resolution: int,
+    upscale_method: str = "INTER_CUBIC",
+) -> np.ndarray:
+    if detect_resolution == -1:
+        work_img, remove_pad = pad_only_to_64(input_rgb_u8, mode="edge")
+    else:
+        work_img, remove_pad = resize_image_with_pad_min_side(
+            input_rgb_u8,
+            int(detect_resolution),
+            upscale_method=upscale_method,
+            skip_hwc3=False,
+            mode="edge",
+        )
+    pil_image = Image.fromarray(work_img)
+    with torch.no_grad():
+        result = pipe(pil_image)
+        depth = result["depth"]
+        if isinstance(depth, Image.Image):
+            depth_array = np.array(depth, dtype=np.float32)
+        else:
+            depth_array = np.array(depth, dtype=np.float32)
+        vmin = float(np.percentile(depth_array, 2))
+        vmax = float(np.percentile(depth_array, 85))
+        depth_array = depth_array - vmin
+        denom = (vmax - vmin)
+        if abs(denom) < 1e-12:
+            denom = 1e-6
+        depth_array = depth_array / denom
+        depth_array = 1.0 - depth_array
+        depth_image = (depth_array * 255.0).clip(0, 255).astype(np.uint8)
+    detected_map = remove_pad(HWC3(depth_image))
+    return detected_map
+def resize_to_original(depth_rgb_u8: np.ndarray, w0: int, h0: int) -> np.ndarray:
+    try:
+        import cv2
+        out = cv2.resize(depth_rgb_u8, (w0, h0), interpolation=cv2.INTER_LINEAR)
+        return out.astype(np.uint8)
+    except Exception:
+        pil = Image.fromarray(depth_rgb_u8)
+        pil = pil.resize((w0, h0), resample=Image.BILINEAR)
+        return np.array(pil, dtype=np.uint8)
+def _salia_depth_execute(image: torch.Tensor, resolution: int = -1) -> torch.Tensor:
     """
+    Internal callable version of your Salia_Depth node:
+      input: IMAGE [B,H,W,3 or 4]
+      output: IMAGE [B,H,W,3]
     """
+    # Get torch device
+    try:
+        device = model_management.get_torch_device()
+    except Exception:
+        device = torch.device("cpu")
+    # Load pipeline
+    pipe = None
+    try:
+        pipe = get_depth_pipeline(device)
+    except Exception:
+        pipe = None
+    # If everything fails, pass-through
+    if pipe is None:
+        return image
+    # Batch support
+    if image.ndim == 3:
+        image = image.unsqueeze(0)
+    outs = []
+    for i in range(image.shape[0]):
+        try:
+            h0 = int(image[i].shape[0])
+            w0 = int(image[i].shape[1])
+            inp_u8 = comfy_tensor_to_u8(image[i])
+            # RGBA rule (pre)
+            rgb_for_depth, alpha_u8 = composite_rgba_over_white_keep_alpha(inp_u8)
+            had_rgba = alpha_u8 is not None
+            # Depth
+            depth_rgb = depth_estimate_zoe_style(
+                pipe=pipe,
+                input_rgb_u8=rgb_for_depth,
+                detect_resolution=int(resolution),
+                upscale_method="INTER_CUBIC",
+            )
+            # Resize back to original size
+            depth_rgb = resize_to_original(depth_rgb, w0=w0, h0=h0)
+            # RGBA rule (post)
+            if had_rgba:
+                if alpha_u8.shape[0] != h0 or alpha_u8.shape[1] != w0:
+                    try:
+                        import cv2
+                        alpha_u8 = cv2.resize(alpha_u8, (w0, h0), interpolation=cv2.INTER_LINEAR).astype(np.uint8)
+                    except Exception:
+                        pil_a = Image.fromarray(alpha_u8)
+                        pil_a = pil_a.resize((w0, h0), resample=Image.BILINEAR)
+                        alpha_u8 = np.array(pil_a, dtype=np.uint8)
+                depth_rgb = apply_alpha_then_black_background(depth_rgb, alpha_u8)
+            outs.append(u8_to_comfy_tensor(depth_rgb))
+        except Exception:
+            outs.append(image[i].unsqueeze(0))
+    return torch.cat(outs, dim=0)
 # -------------------------------------------------------------------------------------
     comp_rgb = overlay_rgb * overlay_a + base_rgb * (1.0 - overlay_a)
     out[:, y:y + s, x:x + s, 0:3] = comp_rgb
+    # If base has alpha, composite alpha too
     if C == 4:
         base_a = out[:, y:y + s, x:x + s, 3:4].clamp(0, 1)
         comp_a = overlay_a + base_a * (1.0 - overlay_a)
 # The One-Node Workflow
 # -------------------------------------------------------------------------------------
+class Salia_OneNode_WorkflowSquare:
     CATEGORY = "image/salia"
     RETURN_TYPES = ("IMAGE",)
     RETURN_NAMES = ("image",)
     @classmethod
     def INPUT_TYPES(cls):
         ckpts = folder_paths.get_filename_list("checkpoints") or ["<no checkpoints found>"]
         cns = folder_paths.get_filename_list("controlnet") or ["<no controlnets found>"]
         assets = _list_asset_pngs() or ["<no pngs found>"]
         try:
             import comfy.samplers
             sampler_names = comfy.samplers.KSampler.SAMPLERS
             sampler_names = ["euler"]
             scheduler_names = ["karras"]
         upscale_choices = ["1", "2", "4", "6", "8", "10", "12", "14", "16"]
         return {
                 "upscale_factor": (upscale_choices, {"default": "4"}),
                 "ckpt_name": (ckpts, {}),
                 "control_net_name": (cns, {}),
                 "asset_image": (assets, {}),
                 "controlnet_strength": ("FLOAT", {"default": 0.33, "min": 0.00, "max": 10.00, "step": 0.01}),
                 "controlnet_start_percent": ("FLOAT", {"default": 0.00, "min": 0.00, "max": 1.00, "step": 0.01}),
                 "controlnet_end_percent": ("FLOAT", {"default": 1.00, "min": 0.00, "max": 1.00, "step": 0.01}),
                 "steps": ("INT", {"default": 30, "min": 1, "max": 200, "step": 1}),
                 "cfg": ("FLOAT", {"default": 2.6, "min": 0.00, "max": 10.00, "step": 0.05}),
                 "sampler_name": (sampler_names, {"default": "euler"} if "euler" in sampler_names else {}),
         square_size: int,
         positive_prompt: str,
         negative_prompt: str,
+        upscale_factor: str,
         ckpt_name: str,
         control_net_name: str,
         asset_image: str,
         scheduler: str,
         denoise: float,
     ):
         if image.ndim == 3:
             image = image.unsqueeze(0)
         if image.ndim != 4:
             raise ValueError("Input image must be [B,H,W,C].")
         up = int(upscale_factor)
         if up not in (1, 2, 4, 6, 8, 10, 12, 14, 16):
             raise ValueError("upscale_factor must be one of: 1,2,4,6,8,10,12,14,16")
         if s <= 0:
             raise ValueError("square_size must be > 0")
         if x < 0 or y < 0 or x + s > W or y + s > H:
             raise ValueError(f"Crop out of bounds. image={W}x{H}, crop at ({x},{y}) size={s}")
         up_w = s * up
         up_h = s * up
         if (up_w % 8) != 0 or (up_h % 8) != 0:
             raise ValueError("square_size * upscale_factor must be divisible by 8 (required by VAE pipeline).")
         start_p = float(max(0.0, min(1.0, controlnet_start_percent)))
         end_p = float(max(0.0, min(1.0, controlnet_end_percent)))
         if end_p < start_p:
             start_p, end_p = end_p, start_p
+        # 1) Crop
         crop = image[:, y:y + s, x:x + s, :]
+        crop_rgb = crop[:, :, :, 0:3].contiguous()
+        # 2) Depth (inline Salia_Depth) then Lanczos upscale
+        depth_small = _salia_depth_execute(crop_rgb, resolution=s)
         depth_up = _resize_image_lanczos(depth_small, up_w, up_h)
+        # 3) Upscale crop for VAE Encode
         crop_up = _resize_image_lanczos(crop_rgb, up_w, up_h)
+        # 4) Load asset mask (inline) and resize
         if asset_image == "<no pngs found>":
             raise FileNotFoundError("No PNGs found in assets/images for this plugin.")
+        _asset_img_unused, asset_mask = _load_asset_image_and_mask(asset_image)
         if asset_mask.ndim == 2:
             asset_mask = asset_mask.unsqueeze(0)
         if asset_mask.ndim != 3:
             raise ValueError("Asset mask must be [B,H,W].")
         if asset_mask.shape[0] != B:
             if asset_mask.shape[0] == 1 and B > 1:
                 asset_mask = asset_mask.expand(B, -1, -1)
         asset_mask_up = _resize_mask_lanczos(asset_mask, up_w, up_h)
+        # 5) Load checkpoint + controlnet (cached)
         if ckpt_name == "<no checkpoints found>":
+            raise FileNotFoundError("No checkpoints found in models/checkpoints.")
         if control_net_name == "<no controlnets found>":
+            raise FileNotFoundError("No controlnets found in models/controlnet.")
         model, clip, vae = _load_checkpoint_cached(ckpt_name)
         controlnet = _load_controlnet_cached(control_net_name)
+        import nodes
+        # 6) CLIP encodes
         pos_enc = nodes.CLIPTextEncode()
         neg_enc = nodes.CLIPTextEncode()
         pos_fn = getattr(pos_enc, pos_enc.FUNCTION)
         neg_fn = getattr(neg_enc, neg_enc.FUNCTION)
         (pos_cond,) = pos_fn(text=str(positive_prompt), clip=clip)
         (neg_cond,) = neg_fn(text=str(negative_prompt), clip=clip)
+        # 7) Apply ControlNet
         cn_apply = nodes.ControlNetApplyAdvanced()
         cn_fn = getattr(cn_apply, cn_apply.FUNCTION)
         pos_cn, neg_cn = cn_fn(
             strength=float(controlnet_strength),
             start_percent=float(start_p),
             vae=vae,
         )
+        # 8) VAE Encode
         vae_enc = nodes.VAEEncode()
         vae_enc_fn = getattr(vae_enc, vae_enc.FUNCTION)
         (latent,) = vae_enc_fn(pixels=crop_up, vae=vae)
+        # 9) KSampler (deterministic seed derived from inputs)
         seed_material = (
             f"{ckpt_name}|{control_net_name}|{asset_image}|{x}|{y}|{s}|{up}|"
             f"{steps}|{cfg}|{sampler_name}|{scheduler}|{denoise}|"
             latent_image=latent,
         )
+        # 10) VAE Decode
         vae_dec = nodes.VAEDecode()
         vae_dec_fn = getattr(vae_dec, vae_dec.FUNCTION)
         (decoded_rgb,) = vae_dec_fn(samples=sampled_latent, vae=vae)
+        # 11) JoinImageWithAlpha
         join = nodes.JoinImageWithAlpha()
         join_fn = getattr(join, join.FUNCTION)
         try:
             (rgba_up,) = join_fn(image=decoded_rgb, alpha=asset_mask_up)
         except TypeError:
             (rgba_up,) = join_fn(image=decoded_rgb, mask=asset_mask_up)
+        # 12) Downscale RGBA back to crop size
         rgba_square = _resize_image_lanczos(rgba_up, s, s)
+        # 13) Paste back onto original at X,Y (alpha-over)
         out = _alpha_over_region(image, rgba_square, x=x, y=y)
         return (out,)
 NODE_CLASS_MAPPINGS = {
+    "Salia_OneNode_WorkflowSquare": Salia_OneNode_WorkflowSquare,
 }
 NODE_DISPLAY_NAME_MAPPINGS = {
+    "Salia_OneNode_WorkflowSquare": "Salia One-Node Workflow (Crop+Depth+CN+Sample+Paste)",
 }