saliacoel commited on
Commit
7049a6e
·
verified ·
1 Parent(s): 663cd18

Upload salia_turn_to_pixelart.py

Browse files
Files changed (1) hide show
  1. salia_turn_to_pixelart.py +476 -0
salia_turn_to_pixelart.py ADDED
@@ -0,0 +1,476 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ from typing import Dict, Tuple
5
+
6
+ import numpy as np
7
+ import torch
8
+
9
+ # --- OpenCV is required for the perspective warp used by the GIMP-like tilt.
10
+ try:
11
+ import cv2 # type: ignore
12
+ except Exception:
13
+ cv2 = None
14
+
15
+ # ComfyUI internals (available inside ComfyUI runtime)
16
+ from comfy.utils import common_upscale
17
+ from comfy import model_management
18
+
19
+
20
+ # -----------------------------
21
+ # GIMP-like 3D Transform (Tilt)
22
+ # -----------------------------
23
+
24
+ def _interp_flag(name: str) -> int:
25
+ name = (name or "").lower()
26
+ if name == "nearest":
27
+ return cv2.INTER_NEAREST
28
+ if name == "linear":
29
+ return cv2.INTER_LINEAR
30
+ if name == "cubic":
31
+ return cv2.INTER_CUBIC
32
+ if name == "lanczos4":
33
+ return cv2.INTER_LANCZOS4
34
+ # fallback
35
+ return cv2.INTER_LINEAR
36
+
37
+
38
+ _H_CACHE: Dict[Tuple[int, int, float, float, float, float, float, float, float, str], np.ndarray] = {}
39
+
40
+
41
+ def _compute_homography(
42
+ w: int,
43
+ h: int,
44
+ *,
45
+ angle_x_deg: float,
46
+ fov_deg: float,
47
+ offset_x: float,
48
+ offset_y: float,
49
+ offset_z: float,
50
+ vp_x_frac: float,
51
+ vp_y_frac: float,
52
+ fov_basis: str,
53
+ ) -> np.ndarray:
54
+ key = (
55
+ w,
56
+ h,
57
+ float(angle_x_deg),
58
+ float(fov_deg),
59
+ float(offset_x),
60
+ float(offset_y),
61
+ float(offset_z),
62
+ float(vp_x_frac),
63
+ float(vp_y_frac),
64
+ str(fov_basis),
65
+ )
66
+ cached = _H_CACHE.get(key)
67
+ if cached is not None:
68
+ return cached
69
+
70
+ fov_rad = math.radians(float(fov_deg))
71
+ # avoid singularities
72
+ fov_rad = min(max(fov_rad, math.radians(1e-3)), math.radians(179.999))
73
+
74
+ # Derive focal length in pixels (approx. like "Angle of view" in GIMP's tool).
75
+ if fov_basis == "height":
76
+ ref = (h - 1)
77
+ elif fov_basis == "max":
78
+ ref = max(w - 1, h - 1)
79
+ elif fov_basis == "min":
80
+ ref = min(w - 1, h - 1)
81
+ elif fov_basis == "diagonal":
82
+ ref = math.hypot(w - 1, h - 1)
83
+ else:
84
+ # default: width
85
+ ref = (w - 1)
86
+
87
+ f = 0.5 * float(ref) / math.tan(fov_rad / 2.0)
88
+ cam_dist = f # chosen so that with 0 rotation, the result is identity (no scaling)
89
+
90
+ hw = (w - 1) / 2.0
91
+ hh = (h - 1) / 2.0
92
+
93
+ # Plane corners in 3D, centered at origin (local object space).
94
+ # y is "up" here, so top edge has +hh.
95
+ corners = np.array(
96
+ [
97
+ [-hw, +hh, 0.0], # top-left
98
+ [+hw, +hh, 0.0], # top-right
99
+ [+hw, -hh, 0.0], # bottom-right
100
+ [-hw, -hh, 0.0], # bottom-left
101
+ ],
102
+ dtype=np.float32,
103
+ )
104
+
105
+ ax = math.radians(float(angle_x_deg))
106
+ c, s = math.cos(ax), math.sin(ax)
107
+
108
+ x = corners[:, 0]
109
+ y = corners[:, 1]
110
+ z = corners[:, 2]
111
+
112
+ # Rotation around X axis
113
+ y_r = y * c - z * s
114
+ z_r = y * s + z * c
115
+ x_r = x
116
+
117
+ # Translation in object space (pixels)
118
+ x_r = x_r + float(offset_x)
119
+ y_r = y_r + float(offset_y)
120
+ z_r = z_r + float(offset_z)
121
+
122
+ # Vanishing point in output pixel coords
123
+ vp_x = float(vp_x_frac) * (w - 1)
124
+ vp_y = float(vp_y_frac) * (h - 1)
125
+
126
+ # Perspective projection to 2D
127
+ depth = cam_dist + z_r
128
+ depth = np.maximum(depth, 1e-4).astype(np.float32)
129
+
130
+ dst = np.stack(
131
+ [
132
+ vp_x + (x_r * f) / depth,
133
+ vp_y - (y_r * f) / depth,
134
+ ],
135
+ axis=1,
136
+ ).astype(np.float32)
137
+
138
+ src = np.array(
139
+ [
140
+ [0.0, 0.0],
141
+ [w - 1.0, 0.0],
142
+ [w - 1.0, h - 1.0],
143
+ [0.0, h - 1.0],
144
+ ],
145
+ dtype=np.float32,
146
+ )
147
+
148
+ Hm = cv2.getPerspectiveTransform(src, dst)
149
+ _H_CACHE[key] = Hm
150
+ return Hm
151
+
152
+
153
+ def _warp_rgba(
154
+ img_rgba: np.ndarray,
155
+ Hm: np.ndarray,
156
+ *,
157
+ interpolation: int,
158
+ ) -> np.ndarray:
159
+ # img_rgba: HxWx4 float32 in [0..1]
160
+ h, w = img_rgba.shape[:2] # noqa: F841
161
+
162
+ rgba = np.clip(img_rgba.astype(np.float32), 0.0, 1.0)
163
+
164
+ # Premultiply alpha to avoid dark fringes on transparent edges during resampling
165
+ a = rgba[..., 3:4]
166
+ premul = rgba.copy()
167
+ premul[..., :3] *= a
168
+
169
+ out = cv2.warpPerspective(
170
+ premul,
171
+ Hm,
172
+ (w, h),
173
+ flags=interpolation,
174
+ borderMode=cv2.BORDER_CONSTANT,
175
+ borderValue=(0, 0, 0, 0),
176
+ )
177
+
178
+ out = np.clip(out, 0.0, 1.0)
179
+
180
+ # Un-premultiply
181
+ alpha = out[..., 3:4]
182
+ rgb_p = out[..., :3]
183
+
184
+ rgb = np.zeros_like(rgb_p, dtype=np.float32)
185
+ mask = alpha[..., 0] > 1e-6
186
+ rgb[mask] = rgb_p[mask] / alpha[mask]
187
+
188
+ out_rgba = np.concatenate([np.clip(rgb, 0.0, 1.0), alpha], axis=2)
189
+ return out_rgba
190
+
191
+
192
+ # -----------------------------
193
+ # Resize (KJ Resize Image v2-like, simplified for fixed use)
194
+ # -----------------------------
195
+
196
+ def _round_down_divisible(x: int, divisible_by: int) -> int:
197
+ if divisible_by and divisible_by > 1:
198
+ return int(x - (x % divisible_by))
199
+ return int(x)
200
+
201
+
202
+ def _resize_like_kj_v2_stretch_rgba(
203
+ image_rgba: torch.Tensor,
204
+ *,
205
+ width: int,
206
+ height: int,
207
+ upscale_method: str,
208
+ divisible_by: int = 2,
209
+ device: str = "gpu",
210
+ ) -> torch.Tensor:
211
+ """
212
+ Mimics KJ ImageResizeKJv2 for the specific case we need:
213
+ - keep_proportion == "stretch"
214
+ - crop_position == "center" (irrelevant in stretch)
215
+ - pad_color == "0,0,0" (irrelevant in stretch)
216
+ - device == "gpu"
217
+ Supports RGBA by treating it as 4 channels in IMAGE tensor.
218
+ """
219
+ if not isinstance(image_rgba, torch.Tensor):
220
+ raise TypeError("image must be a torch.Tensor")
221
+ if image_rgba.ndim != 4:
222
+ raise ValueError(f"Expected image shape [B,H,W,C], got {tuple(image_rgba.shape)}")
223
+
224
+ b, h0, w0, c = image_rgba.shape # noqa: F841
225
+ if c < 4:
226
+ raise ValueError(f"Expected RGBA (4 channels). Got C={c}.")
227
+
228
+ # KJ behavior: if width/height are 0, keep original (not used here).
229
+ if width == 0:
230
+ width = int(w0)
231
+ if height == 0:
232
+ height = int(h0)
233
+
234
+ width = _round_down_divisible(int(width), int(divisible_by))
235
+ height = _round_down_divisible(int(height), int(divisible_by))
236
+
237
+ # Select device (KJ: GPU uses model_management.get_torch_device()).
238
+ if device.lower() == "gpu":
239
+ dev = model_management.get_torch_device()
240
+ else:
241
+ dev = torch.device("cpu")
242
+
243
+ out = image_rgba.to(dev, dtype=torch.float32)
244
+
245
+ # common_upscale expects [B,C,H,W]
246
+ out = out.movedim(-1, 1)
247
+ out = common_upscale(out, width, height, upscale_method, crop="disabled")
248
+ out = out.movedim(1, -1)
249
+
250
+ return out
251
+
252
+
253
+ # -----------------------------
254
+ # Alpha compositing utilities
255
+ # -----------------------------
256
+
257
+ def _alpha_composite_over(
258
+ base: torch.Tensor,
259
+ over: torch.Tensor,
260
+ *,
261
+ opacity: float = 1.0,
262
+ eps: float = 1e-6,
263
+ ) -> torch.Tensor:
264
+ """
265
+ Porter-Duff 'over' with layer opacity.
266
+ base, over: [B,H,W,4] in un-premultiplied RGBA, float in [0..1]
267
+ opacity multiplies the over layer's alpha (like a layer opacity slider).
268
+ """
269
+ if base.shape != over.shape:
270
+ raise ValueError(f"Composite requires same shape. base={tuple(base.shape)} over={tuple(over.shape)}")
271
+
272
+ base = base.to(dtype=torch.float32)
273
+ over = over.to(dtype=torch.float32)
274
+
275
+ b_rgb = base[..., :3]
276
+ b_a = base[..., 3:4].clamp(0.0, 1.0)
277
+
278
+ o_rgb = over[..., :3]
279
+ o_a = (over[..., 3:4].clamp(0.0, 1.0) * float(opacity)).clamp(0.0, 1.0)
280
+
281
+ # Premultiply
282
+ b_rgb_p = b_rgb * b_a
283
+ o_rgb_p = o_rgb * o_a
284
+
285
+ out_a = o_a + b_a * (1.0 - o_a)
286
+ out_rgb_p = o_rgb_p + b_rgb_p * (1.0 - o_a)
287
+
288
+ # Un-premultiply (vectorized)
289
+ out_a_safe = out_a.clamp(min=eps)
290
+ out_rgb = out_rgb_p / out_a_safe
291
+ out_rgb = torch.where(out_a > eps, out_rgb, torch.zeros_like(out_rgb_p))
292
+
293
+ out = torch.cat([out_rgb.clamp(0.0, 1.0), out_a.clamp(0.0, 1.0)], dim=-1)
294
+ return out
295
+
296
+
297
+ # -----------------------------
298
+ # The ComfyUI node
299
+ # -----------------------------
300
+
301
+ class SaliaTurnToPixelart:
302
+ """
303
+ ComfyUI node: salia_turn_to_pixelart
304
+
305
+ Input : IMAGE (RGB or RGBA; RGB is auto-upgraded to RGBA with alpha=1)
306
+ Output: IMAGE (RGBA)
307
+
308
+ Single image behavior:
309
+ - Matches the original pipeline.
310
+
311
+ Batch behavior:
312
+ - Each image in the batch is processed fully one-by-one through the same pipeline.
313
+ - If input batch size > 1, outputs are concatenated horizontally into ONE single image:
314
+ [1, H, W*B, 4]
315
+ with index 0 on the left, then 1 attached to the right, etc.
316
+ """
317
+
318
+ @classmethod
319
+ def INPUT_TYPES(cls):
320
+ return {"required": {"image": ("IMAGE",)}}
321
+
322
+ RETURN_TYPES = ("IMAGE",)
323
+ FUNCTION = "apply"
324
+ CATEGORY = "salia"
325
+
326
+ # Fixed pipeline constants
327
+ _TILT_ANGLE_X_DEG = -21.0
328
+ _TILT_FOV_DEG = 80.0
329
+ _TILT_OFFSET_X_PX = 0.0
330
+ _TILT_OFFSET_Y_PX = 100.0 # positive is "down" in typical image coordinates
331
+ _TILT_OFFSET_Z_PX = 0.0
332
+ _TILT_VP_X = 0.5
333
+ _TILT_VP_Y = 0.5
334
+ _TILT_FOV_BASIS = "max"
335
+ _TILT_INTERP = "lanczos4"
336
+
337
+ _RESIZE_W = 134
338
+ _RESIZE_H = 165
339
+ _DIVISIBLE_BY = 2
340
+ _DEVICE = "gpu"
341
+
342
+ def apply(self, image: torch.Tensor):
343
+ if cv2 is None:
344
+ raise RuntimeError(
345
+ "salia_turn_to_pixelart requires OpenCV (cv2). "
346
+ "Install it in your ComfyUI environment (e.g. pip install opencv-python)."
347
+ )
348
+
349
+ if not isinstance(image, torch.Tensor):
350
+ raise TypeError("image must be a torch.Tensor")
351
+
352
+ if image.ndim != 4:
353
+ raise ValueError(f"Expected image shape [B,H,W,C], got {tuple(image.shape)}")
354
+
355
+ b, h, w, c = image.shape
356
+ if c not in (3, 4):
357
+ raise ValueError(f"Expected 3 or 4 channels (RGB/RGBA), got {c}")
358
+
359
+ # Ensure float32 in [0..1]
360
+ img = image.detach().to(dtype=torch.float32)
361
+
362
+ # Force RGBA
363
+ if c == 3:
364
+ alpha = torch.ones((b, h, w, 1), dtype=img.dtype, device=img.device)
365
+ rgba = torch.cat([img, alpha], dim=-1)
366
+ else:
367
+ rgba = img[..., :4]
368
+
369
+ # Our 3D math uses +Y as "up", while image UIs usually treat +Y as "down".
370
+ # Convert a "downward" pixel offset into the +Y-up coordinate system.
371
+ offset_y_internal = -float(self._TILT_OFFSET_Y_PX)
372
+
373
+ # Precompute homography once (same for all images in the batch)
374
+ Hm = _compute_homography(
375
+ int(w),
376
+ int(h),
377
+ angle_x_deg=float(self._TILT_ANGLE_X_DEG),
378
+ fov_deg=float(self._TILT_FOV_DEG),
379
+ offset_x=float(self._TILT_OFFSET_X_PX),
380
+ offset_y=float(offset_y_internal),
381
+ offset_z=float(self._TILT_OFFSET_Z_PX),
382
+ vp_x_frac=float(self._TILT_VP_X),
383
+ vp_y_frac=float(self._TILT_VP_Y),
384
+ fov_basis=str(self._TILT_FOV_BASIS),
385
+ )
386
+ interp = _interp_flag(self._TILT_INTERP)
387
+
388
+ def _process_one(rgba_one: torch.Tensor) -> torch.Tensor:
389
+ """
390
+ rgba_one: [1,H,W,4] float32, any device.
391
+ returns: [1,H2,W2,4] float32 on CPU (clamped).
392
+ """
393
+ # ----- Step 1: GIMP 3D Tilt (cv2 on CPU numpy) -----
394
+ rgba_cpu = rgba_one.cpu().numpy() # [1,H,W,4]
395
+ out_np = _warp_rgba(rgba_cpu[0], Hm, interpolation=interp) # [H,W,4]
396
+ tilted = torch.from_numpy(out_np).unsqueeze(0).to(dtype=torch.float32) # [1,H,W,4], CPU
397
+
398
+ # ----- Step 2: Resize (4 ways) -----
399
+ area = _resize_like_kj_v2_stretch_rgba(
400
+ tilted,
401
+ width=self._RESIZE_W,
402
+ height=self._RESIZE_H,
403
+ upscale_method="area",
404
+ divisible_by=self._DIVISIBLE_BY,
405
+ device=self._DEVICE,
406
+ )
407
+ bicubic = _resize_like_kj_v2_stretch_rgba(
408
+ tilted,
409
+ width=self._RESIZE_W,
410
+ height=self._RESIZE_H,
411
+ upscale_method="bicubic",
412
+ divisible_by=self._DIVISIBLE_BY,
413
+ device=self._DEVICE,
414
+ )
415
+ nearest = _resize_like_kj_v2_stretch_rgba(
416
+ tilted,
417
+ width=self._RESIZE_W,
418
+ height=self._RESIZE_H,
419
+ upscale_method="nearest-exact",
420
+ divisible_by=self._DIVISIBLE_BY,
421
+ device=self._DEVICE,
422
+ )
423
+ lanczos = _resize_like_kj_v2_stretch_rgba(
424
+ tilted,
425
+ width=self._RESIZE_W,
426
+ height=self._RESIZE_H,
427
+ upscale_method="lanczos",
428
+ divisible_by=self._DIVISIBLE_BY,
429
+ device=self._DEVICE,
430
+ )
431
+
432
+ # Ensure everything is on the same device for compositing
433
+ dev = area.device
434
+ bicubic = bicubic.to(dev)
435
+ nearest = nearest.to(dev)
436
+ lanczos = lanczos.to(dev)
437
+
438
+ # ----- Step 3: 5x area (stack 5 identical layers) -----
439
+ area_5x = area
440
+ for _ in range(4): # base + 4 overlays = 5 total
441
+ area_5x = _alpha_composite_over(area_5x, area, opacity=1.0)
442
+
443
+ # ----- Step 4: Composite final layers (bottom -> top) -----
444
+ # 1) bottom: 5x_area (100%)
445
+ final = area_5x
446
+ # 2) bicubic @ 55%
447
+ final = _alpha_composite_over(final, bicubic, opacity=0.55)
448
+ # 3) nearest @ 50%
449
+ final = _alpha_composite_over(final, nearest, opacity=0.50)
450
+ # 4) lanczos @ 50% (topmost)
451
+ final = _alpha_composite_over(final, lanczos, opacity=0.50)
452
+
453
+ return final.clamp(0.0, 1.0).cpu()
454
+
455
+ # Process each image fully one-by-one
456
+ outputs_hw4 = []
457
+ for i in range(b):
458
+ out_i = _process_one(rgba[i : i + 1]) # [1,H2,W2,4]
459
+ outputs_hw4.append(out_i[0]) # [H2,W2,4]
460
+
461
+ # If single image, preserve original output shape [1,H2,W2,4]
462
+ if b == 1:
463
+ return (outputs_hw4[0].unsqueeze(0).contiguous(),)
464
+
465
+ # If batch, concatenate horizontally into ONE long image [1,H2,W2*B,4]
466
+ strip = torch.cat(outputs_hw4, dim=1).unsqueeze(0).contiguous()
467
+ return (strip,)
468
+
469
+
470
+ NODE_CLASS_MAPPINGS = {
471
+ "salia_turn_to_pixelart": SaliaTurnToPixelart,
472
+ }
473
+
474
+ NODE_DISPLAY_NAME_MAPPINGS = {
475
+ "salia_turn_to_pixelart": "salia_turn_to_pixelart",
476
+ }