saliacoel
/

MyCustomNodes

+import hashlib
+import threading
+from typing import Any, Dict, Tuple, Optional
+import torch
+import numpy as np
+from PIL import Image
+import folder_paths
+# -------------------------------------------------------------------------------------
+# Global caches (lazy-load + don't load duplicates across multiple node instances)
+# -------------------------------------------------------------------------------------
+_CKPT_CACHE: Dict[str, Tuple[Any, Any, Any]] = {}
+_CN_CACHE: Dict[str, Any] = {}
+_CKPT_LOCK = threading.Lock()
+_CN_LOCK = threading.Lock()
+# -------------------------------------------------------------------------------------
+# PIL helpers (Lanczos resize for IMAGE and MASK)
+# -------------------------------------------------------------------------------------
+def _pil_lanczos():
+    # Pillow compatibility
+    if hasattr(Image, "Resampling"):
+        return Image.Resampling.LANCZOS
+    return Image.LANCZOS
+def _image_tensor_to_pil(img: torch.Tensor) -> Image.Image:
+    """
+    Comfy IMAGE: [B,H,W,C] or [H,W,C], float [0..1]
+    -> PIL RGB/RGBA
+    """
+    if img.ndim == 4:
+        img = img[0]
+    img = img.detach().cpu().float().clamp(0, 1)
+    arr = (img.numpy() * 255.0).round().astype(np.uint8)
+    if arr.shape[-1] == 4:
+        return Image.fromarray(arr, mode="RGBA")
+    return Image.fromarray(arr, mode="RGB")
+def _pil_to_image_tensor(pil: Image.Image) -> torch.Tensor:
+    """
+    PIL RGB/RGBA -> Comfy IMAGE [1,H,W,C], float [0..1]
+    """
+    if pil.mode not in ("RGB", "RGBA"):
+        pil = pil.convert("RGBA") if "A" in pil.getbands() else pil.convert("RGB")
+    arr = np.array(pil).astype(np.float32) / 255.0
+    t = torch.from_numpy(arr)  # [H,W,C]
+    return t.unsqueeze(0)
+def _mask_tensor_to_pil(mask: torch.Tensor) -> Image.Image:
+    """
+    Comfy MASK: [B,H,W] or [H,W], float [0..1] -> PIL L
+    """
+    if mask.ndim == 3:
+        mask = mask[0]
+    mask = mask.detach().cpu().float().clamp(0, 1)
+    arr = (mask.numpy() * 255.0).round().astype(np.uint8)
+    return Image.fromarray(arr, mode="L")
+def _pil_to_mask_tensor(pil_l: Image.Image) -> torch.Tensor:
+    """
+    PIL L -> Comfy MASK [1,H,W], float [0..1]
+    """
+    if pil_l.mode != "L":
+        pil_l = pil_l.convert("L")
+    arr = np.array(pil_l).astype(np.float32) / 255.0
+    t = torch.from_numpy(arr)  # [H,W]
+    return t.unsqueeze(0)
+def _resize_image_lanczos(img: torch.Tensor, w: int, h: int) -> torch.Tensor:
+    """
+    Resize Comfy IMAGE [B,H,W,C] with Lanczos via PIL, preserving channels.
+    """
+    if img.ndim != 4:
+        raise ValueError("Expected IMAGE tensor with shape [B,H,W,C].")
+    outs = []
+    for i in range(img.shape[0]):
+        pil = _image_tensor_to_pil(img[i].unsqueeze(0))
+        pil = pil.resize((int(w), int(h)), resample=_pil_lanczos())
+        outs.append(_pil_to_image_tensor(pil))
+    return torch.cat(outs, dim=0)
+def _resize_mask_lanczos(mask: torch.Tensor, w: int, h: int) -> torch.Tensor:
+    """
+    Resize Comfy MASK [B,H,W] with Lanczos via PIL.
+    """
+    if mask.ndim != 3:
+        raise ValueError("Expected MASK tensor with shape [B,H,W].")
+    outs = []
+    for i in range(mask.shape[0]):
+        pil = _mask_tensor_to_pil(mask[i].unsqueeze(0))
+        pil = pil.resize((int(w), int(h)), resample=_pil_lanczos())
+        outs.append(_pil_to_mask_tensor(pil))
+    return torch.cat(outs, dim=0)
+# -------------------------------------------------------------------------------------
+# Core lazy loaders (checkpoint + controlnet), cached globally
+# -------------------------------------------------------------------------------------
+def _load_checkpoint_cached(ckpt_name: str):
+    """
+    Mirrors comfy-core CheckpointLoaderSimple, but cached to avoid double-loads.
+    Returns: (model, clip, vae)
+    """
+    with _CKPT_LOCK:
+        if ckpt_name in _CKPT_CACHE:
+            return _CKPT_CACHE[ckpt_name]
+        import nodes  # lazy
+        loader = nodes.CheckpointLoaderSimple()
+        fn = getattr(loader, loader.FUNCTION)
+        model, clip, vae = fn(ckpt_name=ckpt_name)
+        _CKPT_CACHE[ckpt_name] = (model, clip, vae)
+        return model, clip, vae
+def _load_controlnet_cached(control_net_name: str):
+    """
+    Mirrors comfy-core ControlNetLoader, but cached to avoid double-loads.
+    Returns: controlnet
+    """
+    with _CN_LOCK:
+        if control_net_name in _CN_CACHE:
+            return _CN_CACHE[control_net_name]
+        import nodes  # lazy
+        loader = nodes.ControlNetLoader()
+        fn = getattr(loader, loader.FUNCTION)
+        (cn,) = fn(control_net_name=control_net_name)
+        _CN_CACHE[control_net_name] = cn
+        return cn
+# -------------------------------------------------------------------------------------
+# Asset dropdown support (from comfyui-salia_online assets/images)
+# (We still lazy-call the user's LoadImage_SaliaOnline_Assets for consistent mask behavior.)
+# -------------------------------------------------------------------------------------
+def _list_asset_pngs_fallback():
+    # Fallback scanner (if utils import fails)
+    try:
+        from pathlib import Path
+        plugin_root = Path(__file__).resolve().parent.parent
+        img_dir = plugin_root / "assets" / "images"
+        if not img_dir.exists():
+            return []
+        files = sorted([p.name for p in img_dir.glob("*.png")])
+        return files
+    except Exception:
+        return []
+def _list_asset_pngs():
+    try:
+        # Prefer your plugin's own list function (same dropdown as your node)
+        from ..utils.io import list_pngs  # type: ignore
+        return list_pngs() or []
+    except Exception:
+        return _list_asset_pngs_fallback()
+def _load_asset_mask(asset_name: str):
+    """
+    Lazy-import and run your LoadImage_SaliaOnline_Assets node.
+    Returns: MASK
+    """
+    # NOTE: Keep this lazy so importing the plugin doesn't force-load anything.
+    from .salia_loadimage_assets import LoadImage_SaliaOnline_Assets  # lazy-ish (light)
+    loader = LoadImage_SaliaOnline_Assets()
+    img, mask = loader.run(asset_name)
+    return mask
+def _run_salia_depth(image: torch.Tensor, resolution: int) -> torch.Tensor:
+    """
+    Lazy-import and run your Salia_Depth node.
+    Returns IMAGE (depth)
+    """
+    from .salia_depth import Salia_Depth  # heavy -> lazy import here
+    node = Salia_Depth()
+    fn = getattr(node, node.FUNCTION)
+    (depth_img,) = fn(image=image, resolution=int(resolution))
+    return depth_img
+# -------------------------------------------------------------------------------------
+# Alpha-over paste (RGBA square onto base at X,Y)
+# -------------------------------------------------------------------------------------
+def _alpha_over_region(base: torch.Tensor, overlay_rgba: torch.Tensor, x: int, y: int) -> torch.Tensor:
+    """
+    base: [B,H,W,C] where C is 3 or 4, float [0..1]
+    overlay_rgba: [B,s,s,4] float [0..1]
+    """
+    if base.ndim != 4 or overlay_rgba.ndim != 4:
+        raise ValueError("base and overlay must be [B,H,W,C].")
+    B, H, W, C = base.shape
+    b2, sH, sW, c2 = overlay_rgba.shape
+    if c2 != 4:
+        raise ValueError("overlay_rgba must have 4 channels (RGBA).")
+    if sH != sW:
+        raise ValueError("overlay must be square.")
+    s = sH
+    if x < 0 or y < 0 or x + s > W or y + s > H:
+        raise ValueError(f"Square paste out of bounds. base={W}x{H}, paste at ({x},{y}) size={s}")
+    # Match batch
+    if b2 != B:
+        if b2 == 1 and B > 1:
+            overlay_rgba = overlay_rgba.expand(B, -1, -1, -1)
+        else:
+            raise ValueError("Batch mismatch between base and overlay.")
+    out = base.clone()
+    overlay_rgb = overlay_rgba[..., 0:3].clamp(0, 1)
+    overlay_a = overlay_rgba[..., 3:4].clamp(0, 1)
+    base_rgb = out[:, y:y + s, x:x + s, 0:3]
+    comp_rgb = overlay_rgb * overlay_a + base_rgb * (1.0 - overlay_a)
+    out[:, y:y + s, x:x + s, 0:3] = comp_rgb
+    # If base has alpha, composite alpha too (optional)
+    if C == 4:
+        base_a = out[:, y:y + s, x:x + s, 3:4].clamp(0, 1)
+        comp_a = overlay_a + base_a * (1.0 - overlay_a)
+        out[:, y:y + s, x:x + s, 3:4] = comp_a
+    return out.clamp(0, 1)
+# -------------------------------------------------------------------------------------
+# The One-Node Workflow
+# -------------------------------------------------------------------------------------
+class Salia_Detailer_EZPZ:
+    """
+    One node that replicates the workflow you described.
+    """
+    CATEGORY = "image/salia"
+    RETURN_TYPES = ("IMAGE",)
+    RETURN_NAMES = ("image",)
+    FUNCTION = "run"
+    @classmethod
+    def INPUT_TYPES(cls):
+        # Dropdowns
+        ckpts = folder_paths.get_filename_list("checkpoints") or ["<no checkpoints found>"]
+        cns = folder_paths.get_filename_list("controlnet") or ["<no controlnets found>"]
+        assets = _list_asset_pngs() or ["<no pngs found>"]
+        # KSampler dropdowns (match comfy-core)
+        try:
+            import comfy.samplers
+            sampler_names = comfy.samplers.KSampler.SAMPLERS
+            scheduler_names = comfy.samplers.KSampler.SCHEDULERS
+        except Exception:
+            sampler_names = ["euler"]
+            scheduler_names = ["karras"]
+        # Upscale dropdown as requested
+        upscale_choices = ["1", "2", "4", "6", "8", "10", "12", "14", "16"]
+        return {
+            "required": {
+                "image": ("IMAGE",),
+                "X_coord": ("INT", {"default": 0, "min": 0, "max": 16384, "step": 1}),
+                "Y_coord": ("INT", {"default": 0, "min": 0, "max": 16384, "step": 1}),
+                "square_size": ("INT", {"default": 384, "min": 8, "max": 8192, "step": 1}),
+                "positive_prompt": ("STRING", {"default": "", "multiline": True}),
+                "negative_prompt": ("STRING", {"default": "", "multiline": True}),
+                "upscale_factor": (upscale_choices, {"default": "4"}),
+                # 3 dropdown menus you requested
+                "ckpt_name": (ckpts, {}),
+                "control_net_name": (cns, {}),
+                "asset_image": (assets, {}),
+                # ControlNet params
+                "controlnet_strength": ("FLOAT", {"default": 0.33, "min": 0.00, "max": 10.00, "step": 0.01}),
+                "controlnet_start_percent": ("FLOAT", {"default": 0.00, "min": 0.00, "max": 1.00, "step": 0.01}),
+                "controlnet_end_percent": ("FLOAT", {"default": 1.00, "min": 0.00, "max": 1.00, "step": 0.01}),
+                # KSampler params
+                "steps": ("INT", {"default": 30, "min": 1, "max": 200, "step": 1}),
+                "cfg": ("FLOAT", {"default": 2.6, "min": 0.00, "max": 10.00, "step": 0.05}),
+                "sampler_name": (sampler_names, {"default": "euler"} if "euler" in sampler_names else {}),
+                "scheduler": (scheduler_names, {"default": "karras"} if "karras" in scheduler_names else {}),
+                "denoise": ("FLOAT", {"default": 0.35, "min": 0.00, "max": 1.00, "step": 0.01}),
+            }
+        }
+    def run(
+        self,
+        image: torch.Tensor,
+        X_coord: int,
+        Y_coord: int,
+        square_size: int,
+        positive_prompt: str,
+        negative_prompt: str,
+        upscale_factor: str,  # dropdown returns str
+        ckpt_name: str,
+        control_net_name: str,
+        asset_image: str,
+        controlnet_strength: float,
+        controlnet_start_percent: float,
+        controlnet_end_percent: float,
+        steps: int,
+        cfg: float,
+        sampler_name: str,
+        scheduler: str,
+        denoise: float,
+    ):
+        # -------------------------
+        # Validate / normalize
+        # -------------------------
+        if image.ndim == 3:
+            image = image.unsqueeze(0)
+        if image.ndim != 4:
+            raise ValueError("Input image must be [B,H,W,C].")
+        B, H, W, C = image.shape
+        if C not in (3, 4):
+            raise ValueError("Input image must have 3 (RGB) or 4 (RGBA) channels.")
+        x = int(X_coord)
+        y = int(Y_coord)
+        s = int(square_size)
+        up = int(upscale_factor)
+        if up not in (1, 2, 4, 6, 8, 10, 12, 14, 16):
+            raise ValueError("upscale_factor must be one of: 1,2,4,6,8,10,12,14,16")
+        if s <= 0:
+            raise ValueError("square_size must be > 0")
+        if x < 0 or y < 0 or x + s > W or y + s > H:
+            raise ValueError(f"Crop out of bounds. image={W}x{H}, crop at ({x},{y}) size={s}")
+        up_w = s * up
+        up_h = s * up
+        # VAE/UNet path is happiest with multiples of 8
+        if (up_w % 8) != 0 or (up_h % 8) != 0:
+            raise ValueError("square_size * upscale_factor must be divisible by 8 (required by VAE pipeline).")
+        # Clamp controlnet percent range
+        start_p = float(max(0.0, min(1.0, controlnet_start_percent)))
+        end_p = float(max(0.0, min(1.0, controlnet_end_percent)))
+        if end_p < start_p:
+            start_p, end_p = end_p, start_p
+        # -------------------------
+        # 1) Crop square (we use it twice internally)
+        # -------------------------
+        crop = image[:, y:y + s, x:x + s, :]
+        crop_rgb = crop[:, :, :, 0:3].contiguous()  # force RGB for model/depth
+        # -------------------------
+        # 2) Depth path: Salia_Depth(crop) then upscale depth with Lanczos
+        # -------------------------
+        depth_small = _run_salia_depth(crop_rgb, resolution=s)
+        depth_up = _resize_image_lanczos(depth_small, up_w, up_h)
+        # -------------------------
+        # 3) Generation path: upscale crop with Lanczos then VAE Encode
+        # -------------------------
+        crop_up = _resize_image_lanczos(crop_rgb, up_w, up_h)
+        # -------------------------
+        # 4) Load asset mask (dropdown) and resize it to match upscaled resolution
+        # -------------------------
+        if asset_image == "<no pngs found>":
+            raise FileNotFoundError("No PNGs found in comfyui-salia_online/assets/images")
+        asset_mask = _load_asset_mask(asset_image)  # MASK
+        if asset_mask.ndim == 2:
+            asset_mask = asset_mask.unsqueeze(0)
+        if asset_mask.ndim != 3:
+            raise ValueError("Asset mask must be [B,H,W].")
+        # Match batch
+        if asset_mask.shape[0] != B:
+            if asset_mask.shape[0] == 1 and B > 1:
+                asset_mask = asset_mask.expand(B, -1, -1)
+            else:
+                raise ValueError("Batch mismatch for asset mask.")
+        asset_mask_up = _resize_mask_lanczos(asset_mask, up_w, up_h)
+        # -------------------------
+        # 5) Load checkpoint + controlnet (lazy + cached)
+        # -------------------------
+        if ckpt_name == "<no checkpoints found>":
+            raise FileNotFoundError("No checkpoints found in your ComfyUI models/checkpoints folder.")
+        if control_net_name == "<no controlnets found>":
+            raise FileNotFoundError("No controlnets found in your ComfyUI models/controlnet folder.")
+        model, clip, vae = _load_checkpoint_cached(ckpt_name)
+        controlnet = _load_controlnet_cached(control_net_name)
+        # -------------------------
+        # 6) Encode prompts (CLIPTextEncode)
+        # -------------------------
+        import nodes  # lazy
+        pos_enc = nodes.CLIPTextEncode()
+        neg_enc = nodes.CLIPTextEncode()
+        pos_fn = getattr(pos_enc, pos_enc.FUNCTION)
+        neg_fn = getattr(neg_enc, neg_enc.FUNCTION)
+        (pos_cond,) = pos_fn(text=str(positive_prompt), clip=clip)
+        (neg_cond,) = neg_fn(text=str(negative_prompt), clip=clip)
+        # -------------------------
+        # 7) Apply ControlNet (ControlNetApplyAdvanced)
+        # -------------------------
+        cn_apply = nodes.ControlNetApplyAdvanced()
+        cn_fn = getattr(cn_apply, cn_apply.FUNCTION)
+        pos_cn, neg_cn = cn_fn(
+            strength=float(controlnet_strength),
+            start_percent=float(start_p),
+            end_percent=float(end_p),
+            positive=pos_cond,
+            negative=neg_cond,
+            control_net=controlnet,
+            image=depth_up,
+            vae=vae,
+        )
+        # -------------------------
+        # 8) VAE Encode (crop_up) -> latent
+        # -------------------------
+        vae_enc = nodes.VAEEncode()
+        vae_enc_fn = getattr(vae_enc, vae_enc.FUNCTION)
+        (latent,) = vae_enc_fn(pixels=crop_up, vae=vae)
+        # -------------------------
+        # 9) KSampler
+        # -------------------------
+        # No seed input requested: derive a stable seed from inputs so changing anything changes seed.
+        seed_material = (
+            f"{ckpt_name}|{control_net_name}|{asset_image}|{x}|{y}|{s}|{up}|"
+            f"{steps}|{cfg}|{sampler_name}|{scheduler}|{denoise}|"
+            f"{controlnet_strength}|{start_p}|{end_p}|"
+            f"{positive_prompt}|{negative_prompt}"
+        ).encode("utf-8", errors="ignore")
+        seed64 = int(hashlib.sha256(seed_material).hexdigest()[:16], 16)
+        ksampler = nodes.KSampler()
+        k_fn = getattr(ksampler, ksampler.FUNCTION)
+        (sampled_latent,) = k_fn(
+            seed=seed64,
+            steps=int(steps),
+            cfg=float(cfg),
+            sampler_name=str(sampler_name),
+            scheduler=str(scheduler),
+            denoise=float(denoise),
+            model=model,
+            positive=pos_cn,
+            negative=neg_cn,
+            latent_image=latent,
+        )
+        # -------------------------
+        # 10) VAE Decode -> RGB image
+        # -------------------------
+        vae_dec = nodes.VAEDecode()
+        vae_dec_fn = getattr(vae_dec, vae_dec.FUNCTION)
+        (decoded_rgb,) = vae_dec_fn(samples=sampled_latent, vae=vae)
+        # -------------------------
+        # 11) JoinImageWithAlpha (decoded_rgb + asset_mask_up) -> RGBA
+        # -------------------------
+        join = nodes.JoinImageWithAlpha()
+        join_fn = getattr(join, join.FUNCTION)
+        # Some Comfy versions name the mask input "alpha", others "mask".
+        try:
+            (rgba_up,) = join_fn(image=decoded_rgb, alpha=asset_mask_up)
+        except TypeError:
+            (rgba_up,) = join_fn(image=decoded_rgb, mask=asset_mask_up)
+        # -------------------------
+        # 12) Downscale RGBA back to original crop resolution (square_size) with Lanczos
+        # -------------------------
+        rgba_square = _resize_image_lanczos(rgba_up, s, s)
+        # -------------------------
+        # 13) Paste RGBA square onto original input image at X,Y using alpha-over
+        # -------------------------
+        out = _alpha_over_region(image, rgba_square, x=x, y=y)
+        return (out,)
+NODE_CLASS_MAPPINGS = {
+    "Salia_Detailer_EZPZ": Salia_Detailer_EZPZ,
+}
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "Salia_Detailer_EZPZ": "Salia_Detailer_EZPZ",
+}