Spaces:

LogicGoInfotechSpaces
/

object_remover

Running

App Files Files Community

LogicGoInfotechSpaces commited on Nov 3, 2025

Commit

d670035

1 Parent(s): 89c8105

refactor: use exact reference model implementation from aryadytm/remove-photo-object - simple 255-alpha inversion

Browse files

Files changed (1) hide show

src/core.py +15 -71

src/core.py CHANGED Viewed

@@ -444,94 +444,38 @@ def get_args_parser():
 def process_inpaint(image, mask, invert_mask=True):
     """
-    Process inpainting - matches reference model implementation exactly.
     Reference: https://huggingface.co/spaces/aryadytm/remove-photo-object
     """
     image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
     original_shape = image.shape
-    # Use INTER_LANCZOS4 for better quality (higher quality interpolation)
-    interpolation = cv2.INTER_LANCZOS4
-    # Increase size limit to preserve quality (up to 2048px max dimension)
-    # Reference model uses max(image.shape) but we can optimize for quality
-    max_dimension = max(image.shape)
-    size_limit = min(max_dimension, 2048)  # Cap at 2048 for quality/speed balance
     print(f"Origin image shape: {original_shape}")
-    print(f"Size limit: {size_limit} (max dimension was {max_dimension})")
     image = resize_max_size(image, size_limit=size_limit, interpolation=interpolation)
     print(f"Resized image shape: {image.shape}")
     image = norm_img(image)
-    # Match reference model exactly: invert alpha channel
-    # Reference line 460: mask = 255-mask[:,:,3]
     # This means: alpha=0 (transparent/drawn) → 255 (white/remove)
     #             alpha=255 (opaque) → 0 (black/keep)
-    # Check if we should use RGB channels (for uploaded black/white masks)
-    alpha_channel = mask[:,:,3]
-    rgb_channels = mask[:,:,:3]
-    alpha_mean = alpha_channel.mean()
-    if alpha_mean > 200:
-        # Alpha is mostly opaque - use RGB channels (white=remove, black=keep)
-        gray = cv2.cvtColor(rgb_channels, cv2.COLOR_RGB2GRAY)
-        # White pixels (>128) = remove
-        mask = (gray > 128).astype(np.uint8) * 255
-        # Also detect magenta specifically
-        magenta = np.all(rgb_channels == [255, 0, 255], axis=2).astype(np.uint8) * 255
-        mask = np.maximum(mask, magenta)
-        # Apply invert_mask if needed
-        if not invert_mask:
-            mask = 255 - mask
-    else:
-        # Alpha channel encodes mask - use reference model's exact logic
-        # Invert alpha: transparent (0) → white (255), opaque (255) → black (0)
-        mask = 255 - alpha_channel
-        # Apply invert_mask if user wants opposite
-        if not invert_mask:
-            mask = 255 - mask  # double invert back to original
-    # Resize mask to match image dimensions (use INTER_NEAREST for binary mask)
-    mask = resize_max_size(mask, size_limit=size_limit, interpolation=cv2.INTER_NEAREST)
-    # Debug: log mask statistics BEFORE normalization
-    mask_nonzero = int((mask > 128).sum())
-    mask_total = mask.shape[0] * mask.shape[1]
-    print(f"Mask shape: {mask.shape}, pixels to remove (>128): {mask_nonzero}/{mask_total} ({100*mask_nonzero/mask_total:.1f}%)")
-    if mask_nonzero < 10:
-        print("ERROR: Mask is empty or almost empty! Cannot proceed with inpainting.")
-        print("DEBUG INFO:")
-        print(f"  - Alpha channel mean: {alpha_mean}")
-        print(f"  - RGB channels min/max: {rgb_channels.min()}/{rgb_channels.max()}")
-        print(f"  - Alpha channel min/max: {alpha_channel.min()}/{alpha_channel.max()}")
-        # Return original image if mask is invalid
-        return cv2.cvtColor(cv2.resize(cv2.cvtColor(np.array(image*255, dtype=np.uint8), cv2.COLOR_RGB2BGR),
-                                       (original_shape[1], original_shape[0]),
-                                       interpolation=cv2.INTER_LANCZOS4), cv2.COLOR_BGR2RGB)
-    # Normalize: values > 0 become 1.0, 0 stays 0 (LaMa expects this)
     mask = norm_img(mask)
-    # Final check
-    mask_final_pixels = int((mask > 0.5).sum())
-    print(f"After normalization: {mask_final_pixels} pixels marked for removal (value > 0.5)")
-    if mask_final_pixels < 10:
-        print("ERROR: After normalization, mask is still empty! Returning original image.")
-        return cv2.cvtColor(cv2.resize(cv2.cvtColor(np.array(image*255, dtype=np.uint8), cv2.COLOR_RGB2BGR),
-                                       (original_shape[1], original_shape[0]),
-                                       interpolation=cv2.INTER_LANCZOS4), cv2.COLOR_BGR2RGB)
     res_np_img = run(image, mask)
-    # Resize back to original dimensions if needed (for quality preservation)
-    if res_np_img.shape[:2] != original_shape[:2]:
-        res_np_img = cv2.resize(res_np_img, (original_shape[1], original_shape[0]),
-                                interpolation=cv2.INTER_LANCZOS4)
-        print(f"Resized output back to original: {res_np_img.shape}")
     return cv2.cvtColor(res_np_img, cv2.COLOR_BGR2RGB)

 def process_inpaint(image, mask, invert_mask=True):
     """
+    Process inpainting - EXACT copy from reference model.
     Reference: https://huggingface.co/spaces/aryadytm/remove-photo-object
+    Line 444-466 in their src/core.py
     """
     image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
     original_shape = image.shape
+    interpolation = cv2.INTER_CUBIC
+    #size_limit: Union[int, str] = request.form.get("sizeLimit", "1080")
+    #if size_limit == "Original":
+    size_limit = max(image.shape)
+    #else:
+    #    size_limit = int(size_limit)
     print(f"Origin image shape: {original_shape}")
     image = resize_max_size(image, size_limit=size_limit, interpolation=interpolation)
     print(f"Resized image shape: {image.shape}")
     image = norm_img(image)
+    # Reference model's EXACT logic: simple alpha inversion
+    # Line 460: mask = 255-mask[:,:,3]
     # This means: alpha=0 (transparent/drawn) → 255 (white/remove)
     #             alpha=255 (opaque) → 0 (black/keep)
+    mask = 255 - mask[:,:,3]
+    # Apply invert_mask if user wants opposite behavior
+    if not invert_mask:
+        mask = 255 - mask  # double invert back
+    mask = resize_max_size(mask, size_limit=size_limit, interpolation=interpolation)
     mask = norm_img(mask)
     res_np_img = run(image, mask)
     return cv2.cvtColor(res_np_img, cv2.COLOR_BGR2RGB)