saliacoel
/

MyCustomNodes

+import torch
+import torch.nn.functional as F
+# -----------------------------
+# Helpers
+# -----------------------------
+def _bhwc_to_nchw(img: torch.Tensor) -> torch.Tensor:
+    # ComfyUI IMAGE is usually float32 in [0,1], shape [B,H,W,C]
+    if img.dim() != 4:
+        raise ValueError(f"Expected IMAGE tensor with 4 dims [B,H,W,C], got {img.shape}")
+    return img.permute(0, 3, 1, 2).contiguous()
+def _drop_alpha_if_any(x: torch.Tensor) -> torch.Tensor:
+    # If RGBA, keep RGB
+    if x.shape[1] > 3:
+        return x[:, :3, :, :].contiguous()
+    return x
+def _to_luma(x: torch.Tensor) -> torch.Tensor:
+    # x: [B,C,H,W], expects C=1 or C=3
+    if x.shape[1] == 1:
+        return x
+    r = x[:, 0:1, :, :]
+    g = x[:, 1:2, :, :]
+    b = x[:, 2:3, :, :]
+    # Standard-ish luma weights
+    return (0.2989 * r + 0.5870 * g + 0.1140 * b)
+def _resize_max(x: torch.Tensor, max_size: int) -> torch.Tensor:
+    if max_size <= 0:
+        return x
+    b, c, h, w = x.shape
+    m = max(h, w)
+    if m <= max_size:
+        return x
+    scale = max_size / float(m)
+    nh = max(1, int(round(h * scale)))
+    nw = max(1, int(round(w * scale)))
+    return F.interpolate(x, size=(nh, nw), mode="bilinear", align_corners=False)
+def _gaussian_blur(x: torch.Tensor, sigma: float) -> torch.Tensor:
+    if sigma <= 0:
+        return x
+    # radius ~ 3*sigma
+    radius = int(max(1, round(3.0 * sigma)))
+    ksize = 2 * radius + 1
+    device = x.device
+    dtype = x.dtype
+    coords = torch.arange(-radius, radius + 1, device=device, dtype=dtype)
+    kernel1d = torch.exp(-(coords * coords) / (2.0 * sigma * sigma))
+    kernel1d = kernel1d / (kernel1d.sum() + 1e-12)
+    c = x.shape[1]
+    # separable conv: horizontal then vertical
+    kh = kernel1d.view(1, 1, 1, ksize).repeat(c, 1, 1, 1)
+    kv = kernel1d.view(1, 1, ksize, 1).repeat(c, 1, 1, 1)
+    out = F.conv2d(x, kh, padding=(0, radius), groups=c)
+    out = F.conv2d(out, kv, padding=(radius, 0), groups=c)
+    return out
+def _sobel_edges(y: torch.Tensor) -> torch.Tensor:
+    # y: [B,1,H,W] or [B,C,H,W]
+    device = y.device
+    dtype = y.dtype
+    c = y.shape[1]
+    kx = torch.tensor(
+        [[-1, 0, 1],
+         [-2, 0, 2],
+         [-1, 0, 1]],
+        device=device, dtype=dtype
+    ) / 8.0
+    ky = torch.tensor(
+        [[-1, -2, -1],
+         [ 0,  0,  0],
+         [ 1,  2,  1]],
+        device=device, dtype=dtype
+    ) / 8.0
+    kx = kx.view(1, 1, 3, 3).repeat(c, 1, 1, 1)
+    ky = ky.view(1, 1, 3, 3).repeat(c, 1, 1, 1)
+    gx = F.conv2d(y, kx, padding=1, groups=c)
+    gy = F.conv2d(y, ky, padding=1, groups=c)
+    return torch.sqrt(gx * gx + gy * gy + 1e-12)
+def _ssim(x: torch.Tensor, y: torch.Tensor, window_size: int = 11, sigma: float = 1.5) -> torch.Tensor:
+    """
+    SSIM per batch item. Returns shape [B], roughly in [0,1] for normal images.
+    x,y: [B,C,H,W]
+    """
+    device = x.device
+    dtype = x.dtype
+    c = x.shape[1]
+    radius = window_size // 2
+    coords = torch.arange(window_size, device=device, dtype=dtype) - radius
+    g = torch.exp(-(coords * coords) / (2.0 * sigma * sigma))
+    g = g / (g.sum() + 1e-12)
+    w2d = (g[:, None] * g[None, :]).view(1, 1, window_size, window_size)
+    w2d = w2d.repeat(c, 1, 1, 1)
+    mu_x = F.conv2d(x, w2d, padding=radius, groups=c)
+    mu_y = F.conv2d(y, w2d, padding=radius, groups=c)
+    mu_x2 = mu_x * mu_x
+    mu_y2 = mu_y * mu_y
+    mu_xy = mu_x * mu_y
+    sigma_x2 = F.conv2d(x * x, w2d, padding=radius, groups=c) - mu_x2
+    sigma_y2 = F.conv2d(y * y, w2d, padding=radius, groups=c) - mu_y2
+    sigma_xy = F.conv2d(x * y, w2d, padding=radius, groups=c) - mu_xy
+    C1 = (0.01) ** 2
+    C2 = (0.03) ** 2
+    num = (2.0 * mu_xy + C1) * (2.0 * sigma_xy + C2)
+    den = (mu_x2 + mu_y2 + C1) * (sigma_x2 + sigma_y2 + C2)
+    ssim_map = num / (den + 1e-12)
+    return ssim_map.mean(dim=[1, 2, 3])  # [B]
+def _hist_chi2(x: torch.Tensor, y: torch.Tensor, bins: int = 32) -> torch.Tensor:
+    """
+    Color histogram chi-square distance. Returns [B].
+    Done on CPU for compatibility (hist ops can be awkward on some GPUs).
+    x,y: [B,C,H,W] in [0,1]
+    """
+    x_cpu = x.detach().float().cpu()
+    y_cpu = y.detach().float().cpu()
+    b, c, _, _ = x_cpu.shape
+    out = []
+    eps = 1e-12
+    for i in range(b):
+        dist = 0.0
+        for ch in range(c):
+            hx = torch.histc(x_cpu[i, ch], bins=bins, min=0.0, max=1.0)
+            hy = torch.histc(y_cpu[i, ch], bins=bins, min=0.0, max=1.0)
+            hx = hx / (hx.sum() + eps)
+            hy = hy / (hy.sum() + eps)
+            # chi-square distance
+            dist += 0.5 * torch.sum((hx - hy) ** 2 / (hx + hy + eps)).item()
+        out.append(dist / float(c))
+    return torch.tensor(out, dtype=torch.float32, device=x.device)
+# -----------------------------
+# ComfyUI Node
+# -----------------------------
+class ImageCompareFloat:
+    """
+    Compares two ComfyUI IMAGE inputs and returns a single float score:
+      - smaller score => more similar (likely frozen)
+      - larger score  => more different (moving)
+    """
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "image_a": ("IMAGE",),
+                "image_b": ("IMAGE",),
+                "mode": (["pixel_mae", "ssim", "hist_chi2", "hybrid"],),
+                "color_space": (["RGB", "LUMA"],),
+                "downscale_max": ("INT", {"default": 256, "min": 32, "max": 2048, "step": 16}),
+                "blur_sigma": ("FLOAT", {"default": 1.2, "min": 0.0, "max": 10.0, "step": 0.1}),
+                "hist_bins": ("INT", {"default": 32, "min": 8, "max": 256, "step": 8}),
+                "scale": ("FLOAT", {"default": 1000.0, "min": 0.001, "max": 1000000.0, "step": 1.0}),
+                # Hybrid weights (used only when mode="hybrid")
+                "w_pixel": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.05}),
+                "w_ssim":  ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.05}),
+                "w_edge":  ("FLOAT", {"default": 0.5, "min": 0.0, "max": 10.0, "step": 0.05}),
+                "w_hist":  ("FLOAT", {"default": 0.2, "min": 0.0, "max": 10.0, "step": 0.05}),
+            }
+        }
+    RETURN_TYPES = ("FLOAT",)
+    RETURN_NAMES = ("difference",)
+    FUNCTION = "compare"
+    CATEGORY = "image/analysis"
+    def compare(
+        self,
+        image_a,
+        image_b,
+        mode,
+        color_space,
+        downscale_max,
+        blur_sigma,
+        hist_bins,
+        scale,
+        w_pixel,
+        w_ssim,
+        w_edge,
+        w_hist,
+    ):
+        a = _bhwc_to_nchw(image_a)
+        b = _bhwc_to_nchw(image_b)
+        a = _drop_alpha_if_any(a)
+        b = _drop_alpha_if_any(b)
+        # Match batch sizes: if one is batch=1 and other is batch>1, broadcast the 1
+        if a.shape[0] != b.shape[0]:
+            if a.shape[0] == 1:
+                a = a.repeat(b.shape[0], 1, 1, 1)
+            elif b.shape[0] == 1:
+                b = b.repeat(a.shape[0], 1, 1, 1)
+            else:
+                m = min(a.shape[0], b.shape[0])
+                a = a[:m]
+                b = b[:m]
+        # Match spatial size (avoid errors if upstream produced different sizes)
+        if a.shape[2:] != b.shape[2:]:
+            b = F.interpolate(b, size=a.shape[2:], mode="bilinear", align_corners=False)
+        # Clamp to safe range
+        a = a.clamp(0.0, 1.0)
+        b = b.clamp(0.0, 1.0)
+        # Downscale for speed + robustness
+        a = _resize_max(a, downscale_max)
+        b = _resize_max(b, downscale_max)
+        # Select comparison space
+        if color_space == "LUMA":
+            a_cs = _to_luma(a)
+            b_cs = _to_luma(b)
+        else:
+            a_cs = a
+            b_cs = b
+        # Blur to ignore tiny diffusion flicker / grain
+        a_blur = _gaussian_blur(a_cs, blur_sigma)
+        b_blur = _gaussian_blur(b_cs, blur_sigma)
+        if mode == "pixel_mae":
+            per_item = torch.mean(torch.abs(a_blur - b_blur), dim=[1, 2, 3])
+        elif mode == "ssim":
+            # SSIM is more stable on luma/structure, so force luma for this metric
+            a_y = _to_luma(a_blur) if a_blur.shape[1] != 1 else a_blur
+            b_y = _to_luma(b_blur) if b_blur.shape[1] != 1 else b_blur
+            s = _ssim(a_y, b_y)
+            per_item = (1.0 - s).clamp(min=0.0)
+        elif mode == "hist_chi2":
+            # Histograms should use RGB if available (color distribution)
+            a_rgb = a if a.shape[1] == 3 else a.repeat(1, 3, 1, 1)
+            b_rgb = b if b.shape[1] == 3 else b.repeat(1, 3, 1, 1)
+            a_rgb = _resize_max(a_rgb, downscale_max)
+            b_rgb = _resize_max(b_rgb, downscale_max)
+            per_item = _hist_chi2(a_rgb, b_rgb, bins=hist_bins)
+        elif mode == "hybrid":
+            # Pixel MAE (blurred)
+            pix = torch.mean(torch.abs(a_blur - b_blur), dim=[1, 2, 3])
+            # SSIM diff on luma
+            a_y = _to_luma(a_blur) if a_blur.shape[1] != 1 else a_blur
+            b_y = _to_luma(b_blur) if b_blur.shape[1] != 1 else b_blur
+            ssim_diff = (1.0 - _ssim(a_y, b_y)).clamp(min=0.0)
+            # Edge MAE on luma (good against tiny color shifts)
+            ea = _sobel_edges(a_y)
+            eb = _sobel_edges(b_y)
+            edge = torch.mean(torch.abs(ea - eb), dim=[1, 2, 3])
+            # Histogram chi2 on RGB (global color changes)
+            a_rgb = a if a.shape[1] == 3 else a.repeat(1, 3, 1, 1)
+            b_rgb = b if b.shape[1] == 3 else b.repeat(1, 3, 1, 1)
+            a_rgb = _resize_max(a_rgb, downscale_max)
+            b_rgb = _resize_max(b_rgb, downscale_max)
+            hist = _hist_chi2(a_rgb, b_rgb, bins=hist_bins)
+            per_item = (w_pixel * pix) + (w_ssim * ssim_diff) + (w_edge * edge) + (w_hist * hist)
+        else:
+            raise ValueError(f"Unknown mode: {mode}")
+        # Reduce to single float (average across batch)
+        score = float(per_item.mean().item() * scale)
+        return (score,)
+NODE_CLASS_MAPPINGS = {
+    "ImageCompareFloat": ImageCompareFloat
+}
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "ImageCompareFloat": "Image Compare → Float (Freeze Detect)"
+}