Spaces:

LogicGoInfotechSpaces
/

object_remover

Running

App Files Files Community

LogicGoInfotechSpaces commited on Oct 31, 2025

Commit

f854294

1 Parent(s): 0964a65

feat(api): auto-convert painted images to black/white masks (white=remove, black=keep) for better compatibility

Browse files

Files changed (1) hide show

api/main.py +48 -21

api/main.py CHANGED Viewed

@@ -126,40 +126,56 @@ def _load_rgba_image(path: str) -> Image.Image:
 def _load_rgba_mask_from_image(img: Image.Image) -> np.ndarray:
     """
-    Convert mask image to RGBA format.
     Standard convention: white (255) = area to remove, black (0) = area to keep
-    Returns RGBA where alpha=0 means "to remove", alpha=255 means "keep"
-    (This will be inverted in process_inpaint if invert_mask=True)
     """
     if img.mode != "RGBA":
         # For RGB/Grayscale masks: white (value>128) = remove, black (value<=128) = keep
         gray = img.convert("L")
         arr = np.array(gray)
-        # White pixels (>128) should have alpha=0 (to remove after inversion)
-        # Black pixels (<=128) should have alpha=255 (to keep after inversion)
-        alpha = np.where(arr > 128, 0, 255).astype(np.uint8)
         rgba = np.zeros((img.height, img.width, 4), dtype=np.uint8)
-        rgba[:, :, 3] = alpha
-        log.info(f"Loaded {img.mode} mask: {int((alpha == 0).sum())} pixels marked for removal (alpha=0)")
         return rgba
     # For RGBA: check if alpha channel is meaningful
     arr = np.array(img)
     alpha = arr[:, :, 3]
     # If alpha is mostly opaque everywhere (mean > 200), treat RGB channels as mask values
     if alpha.mean() > 200:
-        # Use RGB to determine mask: white in RGB = remove
-        gray = cv2.cvtColor(arr[:, :, :3], cv2.COLOR_RGB2GRAY)
-        alpha = np.where(gray > 128, 0, 255).astype(np.uint8)
         rgba = arr.copy()
-        rgba[:, :, 3] = alpha
-        log.info(f"Loaded RGBA mask (RGB-based): {int((alpha == 0).sum())} pixels marked for removal (alpha=0)")
         return rgba
-    # Alpha channel already encodes the mask
-    log.info(f"Loaded RGBA mask (alpha-based): {int((alpha < 128).sum())} pixels marked for removal (alpha<128)")
-    return arr
 @app.post("/inpaint")
@@ -285,16 +301,27 @@ def inpaint_multipart(
             nonzero = int((binmask > 0).sum())
             log.info("fallback detection: %d pixels", nonzero)
-        # Build RGBA mask: painted areas should be white in RGB for direct detection
-        # Use RGB channels with white=remove, black=keep, then set alpha appropriately
         mask_rgba = np.zeros((binmask.shape[0], binmask.shape[1], 4), dtype=np.uint8)
-        # Paint detected areas as white in RGB (will be detected in process_inpaint)
         mask_rgba[:, :, 0] = binmask  # R
         mask_rgba[:, :, 1] = binmask  # G
         mask_rgba[:, :, 2] = binmask  # B
-        # Set alpha to opaque so RGB channels are used
         mask_rgba[:, :, 3] = 255
-        log.info("Final mask: %d pixels marked for removal (white in RGB)", int((binmask > 0).sum()))
     else:
         mask_rgba = _load_rgba_mask_from_image(m)

 def _load_rgba_mask_from_image(img: Image.Image) -> np.ndarray:
     """
+    Convert mask image to RGBA format (black/white mask).
     Standard convention: white (255) = area to remove, black (0) = area to keep
+    Returns RGBA with white in RGB channels where removal is needed, alpha=255
     """
     if img.mode != "RGBA":
         # For RGB/Grayscale masks: white (value>128) = remove, black (value<=128) = keep
         gray = img.convert("L")
         arr = np.array(gray)
+        # Create proper black/white mask: white pixels (>128) = remove, black (<=128) = keep
+        mask_bw = np.where(arr > 128, 255, 0).astype(np.uint8)
         rgba = np.zeros((img.height, img.width, 4), dtype=np.uint8)
+        rgba[:, :, 0] = mask_bw  # R
+        rgba[:, :, 1] = mask_bw  # G
+        rgba[:, :, 2] = mask_bw  # B
+        rgba[:, :, 3] = 255  # Fully opaque
+        log.info(f"Loaded {img.mode} mask: {int((mask_bw > 0).sum())} white pixels (to remove)")
         return rgba
     # For RGBA: check if alpha channel is meaningful
     arr = np.array(img)
     alpha = arr[:, :, 3]
+    rgb = arr[:, :, :3]
     # If alpha is mostly opaque everywhere (mean > 200), treat RGB channels as mask values
     if alpha.mean() > 200:
+        # Use RGB to determine mask: white/bright in RGB = remove
+        gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
+        # Also detect magenta specifically
+        magenta = np.all(rgb == [255, 0, 255], axis=2).astype(np.uint8) * 255
+        mask_bw = np.maximum(np.where(gray > 128, 255, 0).astype(np.uint8), magenta)
         rgba = arr.copy()
+        rgba[:, :, 0] = mask_bw  # R
+        rgba[:, :, 1] = mask_bw  # G
+        rgba[:, :, 2] = mask_bw  # B
+        rgba[:, :, 3] = 255  # Fully opaque
+        log.info(f"Loaded RGBA mask (RGB-based): {int((mask_bw > 0).sum())} white pixels (to remove)")
         return rgba
+    # Alpha channel encodes the mask - convert to RGB-based
+    # Transparent areas (alpha < 128) = remove, Opaque areas = keep
+    mask_bw = np.where(alpha < 128, 255, 0).astype(np.uint8)
+    rgba = arr.copy()
+    rgba[:, :, 0] = mask_bw
+    rgba[:, :, 1] = mask_bw
+    rgba[:, :, 2] = mask_bw
+    rgba[:, :, 3] = 255
+    log.info(f"Loaded RGBA mask (alpha-based): {int((mask_bw > 0).sum())} white pixels (to remove)")
+    return rgba
 @app.post("/inpaint")
             nonzero = int((binmask > 0).sum())
             log.info("fallback detection: %d pixels", nonzero)
+        # Build RGBA mask: convert to proper black/white mask
+        # White (255) = remove, Black (0) = keep (standard convention)
         mask_rgba = np.zeros((binmask.shape[0], binmask.shape[1], 4), dtype=np.uint8)
+        # Set RGB channels: white where paint detected, black elsewhere
         mask_rgba[:, :, 0] = binmask  # R
         mask_rgba[:, :, 1] = binmask  # G
         mask_rgba[:, :, 2] = binmask  # B
+        # Set alpha to opaque so it's treated as a standard RGB mask
         mask_rgba[:, :, 3] = 255
+        # Also create a cleaner version: apply morphological operations to smooth edges
+        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
+        binmask_clean = cv2.morphologyEx(binmask, cv2.MORPH_CLOSE, kernel)
+        binmask_clean = cv2.morphologyEx(binmask_clean, cv2.MORPH_OPEN, kernel)
+        mask_rgba[:, :, 0] = binmask_clean
+        mask_rgba[:, :, 1] = binmask_clean
+        mask_rgba[:, :, 2] = binmask_clean
+        log.info("Auto-converted painted image to black/white mask: %d white pixels (to remove)",
+                 int((binmask_clean > 0).sum()))
     else:
         mask_rgba = _load_rgba_mask_from_image(m)