Spaces:

LogicGoInfotechSpaces
/

object_remover

Running

App Files Files Community

LogicGoInfotechSpaces commited on Nov 3, 2025

Commit

ed7d157

1 Parent(s): c96c733

fix: match reference model exactly - use simple 255-alpha inversion like aryadytm/remove-photo-object

Browse files

Files changed (1) hide show

src/core.py +32 -44

src/core.py CHANGED Viewed

@@ -443,81 +443,69 @@ def get_args_parser():
 def process_inpaint(image, mask, invert_mask=True):
     image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
     original_shape = image.shape
     interpolation = cv2.INTER_CUBIC
-    #size_limit: Union[int, str] = request.form.get("sizeLimit", "1080")
-    #if size_limit == "Original":
     size_limit = max(image.shape)
-    #else:
-    #    size_limit = int(size_limit)
     print(f"Origin image shape: {original_shape}")
     image = resize_max_size(image, size_limit=size_limit, interpolation=interpolation)
     print(f"Resized image shape: {image.shape}")
     image = norm_img(image)
-    # Convert RGBA mask to single-channel mask.
-    # Standard LaMa convention: 1 = remove, 0 = keep
-    # Simple approach: white pixels in RGB = remove, black = keep
-    # This matches the reference model behavior
     alpha_channel = mask[:,:,3]
     rgb_channels = mask[:,:,:3]
-    # Convert RGB to grayscale to detect white/black
-    gray = cv2.cvtColor(rgb_channels, cv2.COLOR_RGB2GRAY)
-    # Standard: white (255) = remove, black (0) = keep
-    # Detect white pixels (>128) as removal areas
-    mask = (gray > 128).astype(np.uint8) * 255
-    # Also explicitly detect magenta (255, 0, 255) which is commonly used for painting
-    magenta = np.all(rgb_channels == [255, 0, 255], axis=2).astype(np.uint8) * 255
-    mask = np.maximum(mask, magenta)
-    # If alpha channel is mostly transparent (<50 mean), use it as mask source
     alpha_mean = alpha_channel.mean()
-    if alpha_mean < 50:
-        # Transparent areas (alpha=0) should be removed
-        if invert_mask:
-            mask = np.maximum(mask, (255 - alpha_channel))  # transparent → white
-        else:
-            mask = np.maximum(mask, alpha_channel)  # opaque → white
-    # Apply invert_mask if needed
-    # When invert_mask=False: black pixels (0) should become white (255) to remove
-    # When invert_mask=True (default): white pixels (255) stay white to remove (standard)
-    if not invert_mask:
-        mask = 255 - mask
-        print(f"Applied invert_mask=False: inverted mask - {int((mask > 128).sum())} pixels now marked for removal")
     else:
-        print(f"Using invert_mask=True: {int((mask > 128).sum())} white pixels will be removed (standard)")
     mask = resize_max_size(mask, size_limit=size_limit, interpolation=interpolation)
-    # Debug: log mask statistics BEFORE normalization
     mask_nonzero = int((mask > 128).sum())
     mask_total = mask.shape[0] * mask.shape[1]
     print(f"Mask shape: {mask.shape}, pixels to remove (>128): {mask_nonzero}/{mask_total} ({100*mask_nonzero/mask_total:.1f}%)")
-    # Normalize: values > 0 become 1.0, 0 stays 0
-    # After this, 1.0 = remove, 0.0 = keep (LaMa expects this)
     mask = norm_img(mask)
-    # Final check: ensure we have some pixels to remove
     mask_final_pixels = int((mask > 0.5).sum())
     print(f"After normalization: {mask_final_pixels} pixels marked for removal (value > 0.5)")
     if mask_final_pixels < 10:
-        print("WARNING: Very few pixels marked for removal! The mask might be empty or inverted.")
-        print("Check your mask format: white pixels (255) should indicate areas to remove when invert_mask=True")
     res_np_img = run(image, mask)
-    # Debug: verify output changed
-    diff_pixels = int(np.sum(np.abs(res_np_img.astype(np.float32) - cv2.cvtColor(image, cv2.COLOR_RGBA2RGB).astype(np.float32)) > 5))
-    print(f"Output check: {diff_pixels} pixels differ from input (should be > 0 if removal worked)")
     return cv2.cvtColor(res_np_img, cv2.COLOR_BGR2RGB)

 def process_inpaint(image, mask, invert_mask=True):
+    """
+    Process inpainting - matches reference model implementation exactly.
+    Reference: https://huggingface.co/spaces/aryadytm/remove-photo-object
+    """
     image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
     original_shape = image.shape
     interpolation = cv2.INTER_CUBIC
     size_limit = max(image.shape)
     print(f"Origin image shape: {original_shape}")
     image = resize_max_size(image, size_limit=size_limit, interpolation=interpolation)
     print(f"Resized image shape: {image.shape}")
     image = norm_img(image)
+    # Match reference model exactly: invert alpha channel
+    # Reference line 460: mask = 255-mask[:,:,3]
+    # This means: alpha=0 (transparent/drawn) → 255 (white/remove)
+    #             alpha=255 (opaque) → 0 (black/keep)
+    # Check if we should use RGB channels (for uploaded black/white masks)
     alpha_channel = mask[:,:,3]
     rgb_channels = mask[:,:,:3]
     alpha_mean = alpha_channel.mean()
+    if alpha_mean > 200:
+        # Alpha is mostly opaque - use RGB channels (white=remove, black=keep)
+        gray = cv2.cvtColor(rgb_channels, cv2.COLOR_RGB2GRAY)
+        # White pixels (>128) = remove
+        mask = (gray > 128).astype(np.uint8) * 255
+        # Also detect magenta specifically
+        magenta = np.all(rgb_channels == [255, 0, 255], axis=2).astype(np.uint8) * 255
+        mask = np.maximum(mask, magenta)
+        # Apply invert_mask if needed
+        if not invert_mask:
+            mask = 255 - mask
     else:
+        # Alpha channel encodes mask - use reference model's exact logic
+        # Invert alpha: transparent (0) → white (255), opaque (255) → black (0)
+        mask = 255 - alpha_channel
+        # Apply invert_mask if user wants opposite
+        if not invert_mask:
+            mask = 255 - mask  # double invert back to original
     mask = resize_max_size(mask, size_limit=size_limit, interpolation=interpolation)
+    # Debug: log mask statistics
     mask_nonzero = int((mask > 128).sum())
     mask_total = mask.shape[0] * mask.shape[1]
     print(f"Mask shape: {mask.shape}, pixels to remove (>128): {mask_nonzero}/{mask_total} ({100*mask_nonzero/mask_total:.1f}%)")
+    # Normalize: values > 0 become 1.0, 0 stays 0 (LaMa expects this)
     mask = norm_img(mask)
+    # Final check
     mask_final_pixels = int((mask > 0.5).sum())
     print(f"After normalization: {mask_final_pixels} pixels marked for removal (value > 0.5)")
     if mask_final_pixels < 10:
+        print("WARNING: Very few pixels marked for removal! Check mask format.")
     res_np_img = run(image, mask)
     return cv2.cvtColor(res_np_img, cv2.COLOR_BGR2RGB)