saliacoel
/

MyCustomNodes

+from __future__ import annotations
+import math
+from typing import Dict, Tuple
+import numpy as np
+import torch
+# --- OpenCV is required for the perspective warp used by the GIMP-like tilt.
+try:
+    import cv2  # type: ignore
+except Exception:
+    cv2 = None
+# ComfyUI internals (available inside ComfyUI runtime)
+from comfy.utils import common_upscale
+from comfy import model_management
+# -----------------------------
+# GIMP-like 3D Transform (Tilt)
+# -----------------------------
+def _interp_flag(name: str) -> int:
+    name = (name or "").lower()
+    if name == "nearest":
+        return cv2.INTER_NEAREST
+    if name == "linear":
+        return cv2.INTER_LINEAR
+    if name == "cubic":
+        return cv2.INTER_CUBIC
+    if name == "lanczos4":
+        return cv2.INTER_LANCZOS4
+    # fallback
+    return cv2.INTER_LINEAR
+_H_CACHE: Dict[Tuple[int, int, float, float, float, float, float, float, float, str], np.ndarray] = {}
+def _compute_homography(
+    w: int,
+    h: int,
+    *,
+    angle_x_deg: float,
+    fov_deg: float,
+    offset_x: float,
+    offset_y: float,
+    offset_z: float,
+    vp_x_frac: float,
+    vp_y_frac: float,
+    fov_basis: str,
+) -> np.ndarray:
+    key = (
+        w,
+        h,
+        float(angle_x_deg),
+        float(fov_deg),
+        float(offset_x),
+        float(offset_y),
+        float(offset_z),
+        float(vp_x_frac),
+        float(vp_y_frac),
+        str(fov_basis),
+    )
+    cached = _H_CACHE.get(key)
+    if cached is not None:
+        return cached
+    fov_rad = math.radians(float(fov_deg))
+    # avoid singularities
+    fov_rad = min(max(fov_rad, math.radians(1e-3)), math.radians(179.999))
+    # Derive focal length in pixels (approx. like "Angle of view" in GIMP's tool).
+    if fov_basis == "height":
+        ref = (h - 1)
+    elif fov_basis == "max":
+        ref = max(w - 1, h - 1)
+    elif fov_basis == "min":
+        ref = min(w - 1, h - 1)
+    elif fov_basis == "diagonal":
+        ref = math.hypot(w - 1, h - 1)
+    else:
+        # default: width
+        ref = (w - 1)
+    f = 0.5 * float(ref) / math.tan(fov_rad / 2.0)
+    cam_dist = f  # chosen so that with 0 rotation, the result is identity (no scaling)
+    hw = (w - 1) / 2.0
+    hh = (h - 1) / 2.0
+    # Plane corners in 3D, centered at origin (local object space).
+    # y is "up" here, so top edge has +hh.
+    corners = np.array(
+        [
+            [-hw, +hh, 0.0],  # top-left
+            [+hw, +hh, 0.0],  # top-right
+            [+hw, -hh, 0.0],  # bottom-right
+            [-hw, -hh, 0.0],  # bottom-left
+        ],
+        dtype=np.float32,
+    )
+    ax = math.radians(float(angle_x_deg))
+    c, s = math.cos(ax), math.sin(ax)
+    x = corners[:, 0]
+    y = corners[:, 1]
+    z = corners[:, 2]
+    # Rotation around X axis
+    y_r = y * c - z * s
+    z_r = y * s + z * c
+    x_r = x
+    # Translation in object space (pixels)
+    x_r = x_r + float(offset_x)
+    y_r = y_r + float(offset_y)
+    z_r = z_r + float(offset_z)
+    # Vanishing point in output pixel coords
+    vp_x = float(vp_x_frac) * (w - 1)
+    vp_y = float(vp_y_frac) * (h - 1)
+    # Perspective projection to 2D
+    depth = cam_dist + z_r
+    depth = np.maximum(depth, 1e-4).astype(np.float32)
+    dst = np.stack(
+        [
+            vp_x + (x_r * f) / depth,
+            vp_y - (y_r * f) / depth,
+        ],
+        axis=1,
+    ).astype(np.float32)
+    src = np.array(
+        [
+            [0.0, 0.0],
+            [w - 1.0, 0.0],
+            [w - 1.0, h - 1.0],
+            [0.0, h - 1.0],
+        ],
+        dtype=np.float32,
+    )
+    Hm = cv2.getPerspectiveTransform(src, dst)
+    _H_CACHE[key] = Hm
+    return Hm
+def _warp_rgba(
+    img_rgba: np.ndarray,
+    Hm: np.ndarray,
+    *,
+    interpolation: int,
+) -> np.ndarray:
+    # img_rgba: HxWx4 float32 in [0..1]
+    h, w = img_rgba.shape[:2]  # noqa: F841
+    rgba = np.clip(img_rgba.astype(np.float32), 0.0, 1.0)
+    # Premultiply alpha to avoid dark fringes on transparent edges during resampling
+    a = rgba[..., 3:4]
+    premul = rgba.copy()
+    premul[..., :3] *= a
+    out = cv2.warpPerspective(
+        premul,
+        Hm,
+        (w, h),
+        flags=interpolation,
+        borderMode=cv2.BORDER_CONSTANT,
+        borderValue=(0, 0, 0, 0),
+    )
+    out = np.clip(out, 0.0, 1.0)
+    # Un-premultiply
+    alpha = out[..., 3:4]
+    rgb_p = out[..., :3]
+    rgb = np.zeros_like(rgb_p, dtype=np.float32)
+    mask = alpha[..., 0] > 1e-6
+    rgb[mask] = rgb_p[mask] / alpha[mask]
+    out_rgba = np.concatenate([np.clip(rgb, 0.0, 1.0), alpha], axis=2)
+    return out_rgba
+# -----------------------------
+# Resize (KJ Resize Image v2-like, simplified for fixed use)
+# -----------------------------
+def _round_down_divisible(x: int, divisible_by: int) -> int:
+    if divisible_by and divisible_by > 1:
+        return int(x - (x % divisible_by))
+    return int(x)
+def _resize_like_kj_v2_stretch_rgba(
+    image_rgba: torch.Tensor,
+    *,
+    width: int,
+    height: int,
+    upscale_method: str,
+    divisible_by: int = 2,
+    device: str = "gpu",
+) -> torch.Tensor:
+    """
+    Mimics KJ ImageResizeKJv2 for the specific case we need:
+      - keep_proportion == "stretch"
+      - crop_position == "center" (irrelevant in stretch)
+      - pad_color == "0,0,0" (irrelevant in stretch)
+      - device == "gpu"
+    Supports RGBA by treating it as 4 channels in IMAGE tensor.
+    """
+    if not isinstance(image_rgba, torch.Tensor):
+        raise TypeError("image must be a torch.Tensor")
+    if image_rgba.ndim != 4:
+        raise ValueError(f"Expected image shape [B,H,W,C], got {tuple(image_rgba.shape)}")
+    b, h0, w0, c = image_rgba.shape  # noqa: F841
+    if c < 4:
+        raise ValueError(f"Expected RGBA (4 channels). Got C={c}.")
+    # KJ behavior: if width/height are 0, keep original (not used here).
+    if width == 0:
+        width = int(w0)
+    if height == 0:
+        height = int(h0)
+    width = _round_down_divisible(int(width), int(divisible_by))
+    height = _round_down_divisible(int(height), int(divisible_by))
+    # Select device (KJ: GPU uses model_management.get_torch_device()).
+    if device.lower() == "gpu":
+        dev = model_management.get_torch_device()
+    else:
+        dev = torch.device("cpu")
+    out = image_rgba.to(dev, dtype=torch.float32)
+    # common_upscale expects [B,C,H,W]
+    out = out.movedim(-1, 1)
+    out = common_upscale(out, width, height, upscale_method, crop="disabled")
+    out = out.movedim(1, -1)
+    return out
+# -----------------------------
+# Alpha compositing utilities
+# -----------------------------
+def _alpha_composite_over(
+    base: torch.Tensor,
+    over: torch.Tensor,
+    *,
+    opacity: float = 1.0,
+    eps: float = 1e-6,
+) -> torch.Tensor:
+    """
+    Porter-Duff 'over' with layer opacity.
+    base, over: [B,H,W,4] in un-premultiplied RGBA, float in [0..1]
+    opacity multiplies the over layer's alpha (like a layer opacity slider).
+    """
+    if base.shape != over.shape:
+        raise ValueError(f"Composite requires same shape. base={tuple(base.shape)} over={tuple(over.shape)}")
+    base = base.to(dtype=torch.float32)
+    over = over.to(dtype=torch.float32)
+    b_rgb = base[..., :3]
+    b_a = base[..., 3:4].clamp(0.0, 1.0)
+    o_rgb = over[..., :3]
+    o_a = (over[..., 3:4].clamp(0.0, 1.0) * float(opacity)).clamp(0.0, 1.0)
+    # Premultiply
+    b_rgb_p = b_rgb * b_a
+    o_rgb_p = o_rgb * o_a
+    out_a = o_a + b_a * (1.0 - o_a)
+    out_rgb_p = o_rgb_p + b_rgb_p * (1.0 - o_a)
+    # Un-premultiply (vectorized)
+    out_a_safe = out_a.clamp(min=eps)
+    out_rgb = out_rgb_p / out_a_safe
+    out_rgb = torch.where(out_a > eps, out_rgb, torch.zeros_like(out_rgb_p))
+    out = torch.cat([out_rgb.clamp(0.0, 1.0), out_a.clamp(0.0, 1.0)], dim=-1)
+    return out
+# -----------------------------
+# The ComfyUI node
+# -----------------------------
+class SaliaTurnToPixelart:
+    """
+    ComfyUI node: salia_turn_to_pixelart
+    Input :  IMAGE  (RGB or RGBA; RGB is auto-upgraded to RGBA with alpha=1)
+    Output:  IMAGE  (RGBA)
+    Single image behavior:
+      - Matches the original pipeline.
+    Batch behavior:
+      - Each image in the batch is processed fully one-by-one through the same pipeline.
+      - If input batch size > 1, outputs are concatenated horizontally into ONE single image:
+          [1, H, W*B, 4]
+        with index 0 on the left, then 1 attached to the right, etc.
+    """
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {"required": {"image": ("IMAGE",)}}
+    RETURN_TYPES = ("IMAGE",)
+    FUNCTION = "apply"
+    CATEGORY = "salia"
+    # Fixed pipeline constants
+    _TILT_ANGLE_X_DEG = -21.0
+    _TILT_FOV_DEG = 80.0
+    _TILT_OFFSET_X_PX = 0.0
+    _TILT_OFFSET_Y_PX = 100.0   # positive is "down" in typical image coordinates
+    _TILT_OFFSET_Z_PX = 0.0
+    _TILT_VP_X = 0.5
+    _TILT_VP_Y = 0.5
+    _TILT_FOV_BASIS = "max"
+    _TILT_INTERP = "lanczos4"
+    _RESIZE_W = 134
+    _RESIZE_H = 165
+    _DIVISIBLE_BY = 2
+    _DEVICE = "gpu"
+    def apply(self, image: torch.Tensor):
+        if cv2 is None:
+            raise RuntimeError(
+                "salia_turn_to_pixelart requires OpenCV (cv2). "
+                "Install it in your ComfyUI environment (e.g. pip install opencv-python)."
+            )
+        if not isinstance(image, torch.Tensor):
+            raise TypeError("image must be a torch.Tensor")
+        if image.ndim != 4:
+            raise ValueError(f"Expected image shape [B,H,W,C], got {tuple(image.shape)}")
+        b, h, w, c = image.shape
+        if c not in (3, 4):
+            raise ValueError(f"Expected 3 or 4 channels (RGB/RGBA), got {c}")
+        # Ensure float32 in [0..1]
+        img = image.detach().to(dtype=torch.float32)
+        # Force RGBA
+        if c == 3:
+            alpha = torch.ones((b, h, w, 1), dtype=img.dtype, device=img.device)
+            rgba = torch.cat([img, alpha], dim=-1)
+        else:
+            rgba = img[..., :4]
+        # Our 3D math uses +Y as "up", while image UIs usually treat +Y as "down".
+        # Convert a "downward" pixel offset into the +Y-up coordinate system.
+        offset_y_internal = -float(self._TILT_OFFSET_Y_PX)
+        # Precompute homography once (same for all images in the batch)
+        Hm = _compute_homography(
+            int(w),
+            int(h),
+            angle_x_deg=float(self._TILT_ANGLE_X_DEG),
+            fov_deg=float(self._TILT_FOV_DEG),
+            offset_x=float(self._TILT_OFFSET_X_PX),
+            offset_y=float(offset_y_internal),
+            offset_z=float(self._TILT_OFFSET_Z_PX),
+            vp_x_frac=float(self._TILT_VP_X),
+            vp_y_frac=float(self._TILT_VP_Y),
+            fov_basis=str(self._TILT_FOV_BASIS),
+        )
+        interp = _interp_flag(self._TILT_INTERP)
+        def _process_one(rgba_one: torch.Tensor) -> torch.Tensor:
+            """
+            rgba_one: [1,H,W,4] float32, any device.
+            returns:  [1,H2,W2,4] float32 on CPU (clamped).
+            """
+            # ----- Step 1: GIMP 3D Tilt (cv2 on CPU numpy) -----
+            rgba_cpu = rgba_one.cpu().numpy()  # [1,H,W,4]
+            out_np = _warp_rgba(rgba_cpu[0], Hm, interpolation=interp)  # [H,W,4]
+            tilted = torch.from_numpy(out_np).unsqueeze(0).to(dtype=torch.float32)  # [1,H,W,4], CPU
+            # ----- Step 2: Resize (4 ways) -----
+            area = _resize_like_kj_v2_stretch_rgba(
+                tilted,
+                width=self._RESIZE_W,
+                height=self._RESIZE_H,
+                upscale_method="area",
+                divisible_by=self._DIVISIBLE_BY,
+                device=self._DEVICE,
+            )
+            bicubic = _resize_like_kj_v2_stretch_rgba(
+                tilted,
+                width=self._RESIZE_W,
+                height=self._RESIZE_H,
+                upscale_method="bicubic",
+                divisible_by=self._DIVISIBLE_BY,
+                device=self._DEVICE,
+            )
+            nearest = _resize_like_kj_v2_stretch_rgba(
+                tilted,
+                width=self._RESIZE_W,
+                height=self._RESIZE_H,
+                upscale_method="nearest-exact",
+                divisible_by=self._DIVISIBLE_BY,
+                device=self._DEVICE,
+            )
+            lanczos = _resize_like_kj_v2_stretch_rgba(
+                tilted,
+                width=self._RESIZE_W,
+                height=self._RESIZE_H,
+                upscale_method="lanczos",
+                divisible_by=self._DIVISIBLE_BY,
+                device=self._DEVICE,
+            )
+            # Ensure everything is on the same device for compositing
+            dev = area.device
+            bicubic = bicubic.to(dev)
+            nearest = nearest.to(dev)
+            lanczos = lanczos.to(dev)
+            # ----- Step 3: 5x area (stack 5 identical layers) -----
+            area_5x = area
+            for _ in range(4):  # base + 4 overlays = 5 total
+                area_5x = _alpha_composite_over(area_5x, area, opacity=1.0)
+            # ----- Step 4: Composite final layers (bottom -> top) -----
+            # 1) bottom: 5x_area (100%)
+            final = area_5x
+            # 2) bicubic @ 55%
+            final = _alpha_composite_over(final, bicubic, opacity=0.55)
+            # 3) nearest @ 50%
+            final = _alpha_composite_over(final, nearest, opacity=0.50)
+            # 4) lanczos @ 50% (topmost)
+            final = _alpha_composite_over(final, lanczos, opacity=0.50)
+            return final.clamp(0.0, 1.0).cpu()
+        # Process each image fully one-by-one
+        outputs_hw4 = []
+        for i in range(b):
+            out_i = _process_one(rgba[i : i + 1])  # [1,H2,W2,4]
+            outputs_hw4.append(out_i[0])          # [H2,W2,4]
+        # If single image, preserve original output shape [1,H2,W2,4]
+        if b == 1:
+            return (outputs_hw4[0].unsqueeze(0).contiguous(),)
+        # If batch, concatenate horizontally into ONE long image [1,H2,W2*B,4]
+        strip = torch.cat(outputs_hw4, dim=1).unsqueeze(0).contiguous()
+        return (strip,)
+NODE_CLASS_MAPPINGS = {
+    "salia_turn_to_pixelart": SaliaTurnToPixelart,
+}
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "salia_turn_to_pixelart": "salia_turn_to_pixelart",
+}