saliacoel
/

x

Model card Files Files and versions

xet

Community

saliacoel commited on Apr 22

Commit

58a1f6b

verified ·

1 Parent(s): 8de5a16

Upload salia_sprite_batch_stabilizer.py

Browse files

Files changed (1) hide show

salia_sprite_batch_stabilizer.py +306 -0

salia_sprite_batch_stabilizer.py ADDED Viewed

	@@ -0,0 +1,306 @@

+import math
+from collections import deque
+import numpy as np
+import torch
+class SpriteBatchStabilizeToTarget:
+    """
+    ComfyUI IMAGE batch node.
+    Input image tensor: [B, H, W, C], C can be 3/RGB or 4/RGBA.
+    Output image tensor: [B, H_out, W_out, 3].
+    The node:
+      1. Composites RGBA over white, if needed.
+      2. Estimates the white/off-white/grey background color from the image border.
+      3. Looks along coord_y_height for the largest contiguous non-background sprite run.
+      4. Moves the whole sprite image so that that run's center lands on target_center_x/y.
+      5. Expands the whole batch canvas enough that no shifted image pixels are clipped.
+      6. Re-composites onto white and returns RGB.
+    """
+    # Internal tuning constants. Increase MIN_BACKGROUND_TOLERANCE if JPEG/grey
+    # background noise is being detected as sprite. Decrease it if very pale
+    # sprites are being ignored.
+    MIN_BACKGROUND_TOLERANCE = 0.055
+    MAX_BACKGROUND_TOLERANCE = 0.22
+    NOISE_SIGMA_MULTIPLIER = 6.0
+    SMALL_GAP_FRACTION_OF_WIDTH = 0.01
+    SMALL_GAP_MIN_PIXELS = 2
+    SMALL_GAP_MAX_PIXELS = 12
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "images": ("IMAGE",),
+                "coord_y_height": ("INT", {
+                    "default": 0,
+                    "min": 0,
+                    "max": 65535,
+                    "step": 1,
+                    "display": "number",
+                }),
+                "target_center_x": ("INT", {
+                    "default": 0,
+                    "min": -65535,
+                    "max": 65535,
+                    "step": 1,
+                    "display": "number",
+                }),
+                "target_center_y": ("INT", {
+                    "default": 0,
+                    "min": -65535,
+                    "max": 65535,
+                    "step": 1,
+                    "display": "number",
+                }),
+            }
+        }
+    RETURN_TYPES = ("IMAGE",)
+    RETURN_NAMES = ("images",)
+    FUNCTION = "stabilize"
+    CATEGORY = "image/sprite"
+    def stabilize(self, images, coord_y_height, target_center_x, target_center_y):
+        if not torch.is_tensor(images):
+            raise TypeError("images must be a torch.Tensor in ComfyUI IMAGE format [B,H,W,C].")
+        if images.ndim != 4:
+            raise ValueError(f"Expected IMAGE tensor shape [B,H,W,C], got {tuple(images.shape)}.")
+        batch, height, width, channels = images.shape
+        if channels not in (3, 4):
+            raise ValueError(f"Expected RGB or RGBA images with C=3 or C=4, got C={channels}.")
+        if batch < 1 or height < 1 or width < 1:
+            raise ValueError("images must contain at least one non-empty image.")
+        input_device = images.device
+        input_dtype = images.dtype if images.dtype.is_floating_point else torch.float32
+        rgb = self._rgba_or_rgb_to_rgb_float(images)
+        rgb_np = rgb.detach().cpu().numpy().astype(np.float32, copy=False)
+        scan_y = int(np.clip(coord_y_height, 0, height - 1))
+        target_x = int(target_center_x)
+        target_y = int(target_center_y)
+        prepared = []
+        shifts_x = []
+        shifts_y = []
+        for index in range(batch):
+            arr = rgb_np[index]
+            bg_color = self._estimate_background_color(arr)
+            dist = self._color_distance(arr, bg_color)
+            threshold = self._adaptive_background_threshold(dist)
+            center_x, found = self._find_sprite_center_x_on_row(
+                row_distance=dist[scan_y],
+                threshold=threshold,
+                width=width,
+            )
+            if found:
+                dx = int(round(target_x - center_x))
+                dy = int(round(target_y - scan_y))
+            else:
+                # Conservative fallback: if the requested scanline does not hit
+                # any sprite pixels, do not introduce a potentially wild shift.
+                dx = 0
+                dy = 0
+            alpha = self._external_background_alpha(dist, threshold)
+            prepared.append((arr, alpha))
+            shifts_x.append(dx)
+            shifts_y.append(dy)
+        pad_left = int(max(0, max((-dx for dx in shifts_x), default=0)))
+        pad_right = int(max(0, max((dx for dx in shifts_x), default=0)))
+        pad_top = int(max(0, max((-dy for dy in shifts_y), default=0)))
+        pad_bottom = int(max(0, max((dy for dy in shifts_y), default=0)))
+        out_height = height + pad_top + pad_bottom
+        out_width = width + pad_left + pad_right
+        outputs = []
+        for (arr, alpha), dx, dy in zip(prepared, shifts_x, shifts_y):
+            rgba_canvas = np.zeros((out_height, out_width, 4), dtype=np.float32)
+            x0 = pad_left + dx
+            y0 = pad_top + dy
+            rgba_canvas[y0:y0 + height, x0:x0 + width, 0:3] = arr
+            rgba_canvas[y0:y0 + height, x0:x0 + width, 3] = alpha
+            a = rgba_canvas[..., 3:4]
+            out_rgb = rgba_canvas[..., 0:3] * a + (1.0 - a)  # white background
+            outputs.append(np.clip(out_rgb, 0.0, 1.0))
+        out = torch.from_numpy(np.stack(outputs, axis=0)).to(device=input_device, dtype=input_dtype)
+        return (out,)
+    @staticmethod
+    def _rgba_or_rgb_to_rgb_float(images):
+        img = images.to(dtype=torch.float32).clamp(0.0, 1.0)
+        if img.shape[-1] == 4:
+            rgb = img[..., 0:3]
+            alpha = img[..., 3:4]
+            return rgb * alpha + (1.0 - alpha)  # composite over white
+        return img[..., 0:3]
+    @staticmethod
+    def _estimate_background_color(arr):
+        h, w, _ = arr.shape
+        strip = max(1, min(8, min(h, w) // 64 if min(h, w) >= 64 else 1))
+        samples = [
+            arr[:strip, :, :].reshape(-1, 3),
+            arr[h - strip:, :, :].reshape(-1, 3),
+            arr[:, :strip, :].reshape(-1, 3),
+            arr[:, w - strip:, :].reshape(-1, 3),
+        ]
+        border = np.concatenate(samples, axis=0)
+        # Median is robust if a small part of the sprite touches an edge.
+        return np.median(border, axis=0).astype(np.float32)
+    @staticmethod
+    def _color_distance(arr, bg_color):
+        # RMS RGB distance in 0..1. RMS is easier to tune than full Euclidean.
+        delta = arr - bg_color.reshape(1, 1, 3)
+        return np.sqrt(np.mean(delta * delta, axis=2)).astype(np.float32)
+    def _adaptive_background_threshold(self, dist):
+        h, w = dist.shape
+        strip = max(1, min(8, min(h, w) // 64 if min(h, w) >= 64 else 1))
+        border = np.concatenate([
+            dist[:strip, :].reshape(-1),
+            dist[h - strip:, :].reshape(-1),
+            dist[:, :strip].reshape(-1),
+            dist[:, w - strip:].reshape(-1),
+        ])
+        med = float(np.median(border))
+        mad = float(np.median(np.abs(border - med)))
+        robust_sigma = 1.4826 * mad
+        threshold = med + self.NOISE_SIGMA_MULTIPLIER * robust_sigma + self.MIN_BACKGROUND_TOLERANCE
+        return float(np.clip(threshold, self.MIN_BACKGROUND_TOLERANCE, self.MAX_BACKGROUND_TOLERANCE))
+    def _find_sprite_center_x_on_row(self, row_distance, threshold, width):
+        different = row_distance > threshold
+        gap = int(round(width * self.SMALL_GAP_FRACTION_OF_WIDTH))
+        gap = int(np.clip(gap, self.SMALL_GAP_MIN_PIXELS, self.SMALL_GAP_MAX_PIXELS))
+        different = self._close_small_false_gaps(different, gap)
+        runs = self._true_runs(different)
+        if not runs:
+            return 0.0, False
+        # Largest group with strongest total color difference.
+        # score=sum distance; tie-breaker=length.
+        best = None
+        best_score = -1.0
+        best_len = -1
+        for start, end in runs:
+            length = end - start
+            if length <= 0:
+                continue
+            score = float(np.sum(row_distance[start:end]))
+            if score > best_score or (math.isclose(score, best_score) and length > best_len):
+                best = (start, end)
+                best_score = score
+                best_len = length
+        if best is None:
+            return 0.0, False
+        start, end = best  # end is exclusive
+        center_x = (start + end - 1) / 2.0
+        return center_x, True
+    @staticmethod
+    def _close_small_false_gaps(mask, max_gap):
+        # Fill False gaps between True runs when the gap is small.
+        closed = mask.astype(bool).copy()
+        n = closed.size
+        i = 0
+        while i < n:
+            while i < n and closed[i]:
+                i += 1
+            gap_start = i
+            while i < n and not closed[i]:
+                i += 1
+            gap_end = i
+            if gap_start == 0 or gap_end == n:
+                continue
+            if (gap_end - gap_start) <= max_gap and closed[gap_start - 1] and closed[gap_end]:
+                closed[gap_start:gap_end] = True
+        return closed
+    @staticmethod
+    def _true_runs(mask):
+        runs = []
+        n = mask.size
+        i = 0
+        while i < n:
+            while i < n and not mask[i]:
+                i += 1
+            start = i
+            while i < n and mask[i]:
+                i += 1
+            end = i
+            if end > start:
+                runs.append((start, end))
+        return runs
+    @staticmethod
+    def _external_background_alpha(dist, threshold):
+        h, w = dist.shape
+        background_like = dist <= threshold
+        external = np.zeros((h, w), dtype=bool)
+        q = deque()
+        def push(y, x):
+            if background_like[y, x] and not external[y, x]:
+                external[y, x] = True
+                q.append((y, x))
+        for x in range(w):
+            push(0, x)
+            push(h - 1, x)
+        for y in range(h):
+            push(y, 0)
+            push(y, w - 1)
+        while q:
+            y, x = q.popleft()
+            yy = y - 1
+            if yy >= 0:
+                push(yy, x)
+            yy = y + 1
+            if yy < h:
+                push(yy, x)
+            xx = x - 1
+            if xx >= 0:
+                push(y, xx)
+            xx = x + 1
+            if xx < w:
+                push(y, xx)
+        # Keep sprite and enclosed light pixels opaque; make external background transparent.
+        return (~external).astype(np.float32)
+NODE_CLASS_MAPPINGS = {
+    "SpriteBatchStabilizeToTarget": SpriteBatchStabilizeToTarget,
+}
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "SpriteBatchStabilizeToTarget": "Sprite Batch Stabilize To Target",
+}
+__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]