Spaces:

LogicGoInfotechSpaces
/

object_remover

Running

App Files Files Community

LogicGoInfotechSpaces commited on Oct 31, 2025

Commit

0964a65

1 Parent(s): dac0915

fix(mask): improve mask detection - handle RGB paint directly, detect magenta and white areas; match reference model behavior

Browse files

Files changed (2) hide show

api/main.py +9 -3
src/core.py +35 -8

api/main.py CHANGED Viewed

@@ -285,10 +285,16 @@ def inpaint_multipart(
             nonzero = int((binmask > 0).sum())
             log.info("fallback detection: %d pixels", nonzero)
-        # Build RGBA mask where painted area has alpha=0 (to be removed)
         mask_rgba = np.zeros((binmask.shape[0], binmask.shape[1], 4), dtype=np.uint8)
-        mask_rgba[:, :, 3] = np.where(binmask > 0, 0, 255).astype(np.uint8)
-        log.info("Final mask: %d pixels marked for removal (alpha=0)", int((mask_rgba[:,:,3] == 0).sum()))
     else:
         mask_rgba = _load_rgba_mask_from_image(m)

             nonzero = int((binmask > 0).sum())
             log.info("fallback detection: %d pixels", nonzero)
+        # Build RGBA mask: painted areas should be white in RGB for direct detection
+        # Use RGB channels with white=remove, black=keep, then set alpha appropriately
         mask_rgba = np.zeros((binmask.shape[0], binmask.shape[1], 4), dtype=np.uint8)
+        # Paint detected areas as white in RGB (will be detected in process_inpaint)
+        mask_rgba[:, :, 0] = binmask  # R
+        mask_rgba[:, :, 1] = binmask  # G
+        mask_rgba[:, :, 2] = binmask  # B
+        # Set alpha to opaque so RGB channels are used
+        mask_rgba[:, :, 3] = 255
+        log.info("Final mask: %d pixels marked for removal (white in RGB)", int((binmask > 0).sum()))
     else:
         mask_rgba = _load_rgba_mask_from_image(m)

src/core.py CHANGED Viewed

@@ -460,17 +460,40 @@ def process_inpaint(image, mask, invert_mask=True):
     # Convert RGBA mask to single-channel mask.
     # Standard LaMa convention: 1 = remove, 0 = keep
-    # User draws with alpha=0 (transparent), we want those to become 1 (remove)
     alpha_channel = mask[:,:,3]
-    # When invert_mask=True: alpha=0 (painted/transparent) → 255 → 1 (remove)
-    # When invert_mask=False: alpha=255 (opaque) → 255 → 1 (remove)
-    if invert_mask:
-        # Inverted: transparent (0) means remove, opaque (255) means keep
-        mask = 255 - alpha_channel
     else:
-        # Normal: opaque (255) means remove, transparent (0) means keep
-        mask = alpha_channel
     mask = resize_max_size(mask, size_limit=size_limit, interpolation=interpolation)
@@ -482,6 +505,10 @@ def process_inpaint(image, mask, invert_mask=True):
     # Normalize: values > 0 become 1.0, 0 stays 0
     # After this, 1.0 = remove, 0.0 = keep (LaMa expects this)
     mask = norm_img(mask)
     res_np_img = run(image, mask)

     # Convert RGBA mask to single-channel mask.
     # Standard LaMa convention: 1 = remove, 0 = keep
+    # The mask can come in different formats:
+    # - RGBA with alpha channel encoding (alpha=0 means remove when invert_mask=True)
+    # - RGBA with RGB encoding (white/colored areas mean remove)
     alpha_channel = mask[:,:,3]
+    rgb_channels = mask[:,:,:3]
+    # Check if alpha channel is meaningful (not all 255)
+    alpha_mean = alpha_channel.mean()
+    if alpha_mean < 50:
+        # Alpha channel is mostly transparent - use alpha directly
+        # Transparent (0) = remove, Opaque (255) = keep
+        if invert_mask:
+            mask = 255 - alpha_channel  # transparent → white (remove)
+        else:
+            mask = alpha_channel  # opaque → white (remove)
+    elif alpha_mean > 200:
+        # Alpha channel is mostly opaque - check RGB channels for paint colors
+        # Detect magenta (255, 0, 255) or any bright colored paint
+        gray = cv2.cvtColor(rgb_channels, cv2.COLOR_RGB2GRAY)
+        # White or bright colors (>200) in RGB = remove
+        mask_rgb = (gray > 200).astype(np.uint8) * 255
+        # Also detect magenta specifically
+        magenta = np.all(rgb_channels == [255, 0, 255], axis=2).astype(np.uint8) * 255
+        mask = np.maximum(mask_rgb, magenta)
+        if not invert_mask:
+            mask = 255 - mask  # invert if needed
     else:
+        # Mixed alpha - use alpha channel with inversion logic
+        if invert_mask:
+            mask = 255 - alpha_channel
+        else:
+            mask = alpha_channel
     mask = resize_max_size(mask, size_limit=size_limit, interpolation=interpolation)
     # Normalize: values > 0 become 1.0, 0 stays 0
     # After this, 1.0 = remove, 0.0 = keep (LaMa expects this)
     mask = norm_img(mask)
+    # Final check: ensure we have some pixels to remove
+    mask_final_pixels = int((mask > 0.5).sum())
+    print(f"After normalization: {mask_final_pixels} pixels marked for removal (value > 0.5)")
     res_np_img = run(image, mask)