Commit
·
d670035
1
Parent(s):
89c8105
refactor: use exact reference model implementation from aryadytm/remove-photo-object - simple 255-alpha inversion
Browse files- src/core.py +15 -71
src/core.py
CHANGED
|
@@ -444,94 +444,38 @@ def get_args_parser():
|
|
| 444 |
|
| 445 |
def process_inpaint(image, mask, invert_mask=True):
|
| 446 |
"""
|
| 447 |
-
Process inpainting -
|
| 448 |
Reference: https://huggingface.co/spaces/aryadytm/remove-photo-object
|
|
|
|
| 449 |
"""
|
| 450 |
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
|
| 451 |
original_shape = image.shape
|
| 452 |
-
|
| 453 |
-
interpolation = cv2.INTER_LANCZOS4
|
| 454 |
|
| 455 |
-
#
|
| 456 |
-
#
|
| 457 |
-
|
| 458 |
-
|
|
|
|
| 459 |
|
| 460 |
print(f"Origin image shape: {original_shape}")
|
| 461 |
-
print(f"Size limit: {size_limit} (max dimension was {max_dimension})")
|
| 462 |
image = resize_max_size(image, size_limit=size_limit, interpolation=interpolation)
|
| 463 |
print(f"Resized image shape: {image.shape}")
|
| 464 |
image = norm_img(image)
|
| 465 |
|
| 466 |
-
#
|
| 467 |
-
#
|
| 468 |
# This means: alpha=0 (transparent/drawn) → 255 (white/remove)
|
| 469 |
# alpha=255 (opaque) → 0 (black/keep)
|
|
|
|
| 470 |
|
| 471 |
-
#
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
alpha_mean = alpha_channel.mean()
|
| 475 |
|
| 476 |
-
|
| 477 |
-
# Alpha is mostly opaque - use RGB channels (white=remove, black=keep)
|
| 478 |
-
gray = cv2.cvtColor(rgb_channels, cv2.COLOR_RGB2GRAY)
|
| 479 |
-
# White pixels (>128) = remove
|
| 480 |
-
mask = (gray > 128).astype(np.uint8) * 255
|
| 481 |
-
# Also detect magenta specifically
|
| 482 |
-
magenta = np.all(rgb_channels == [255, 0, 255], axis=2).astype(np.uint8) * 255
|
| 483 |
-
mask = np.maximum(mask, magenta)
|
| 484 |
-
|
| 485 |
-
# Apply invert_mask if needed
|
| 486 |
-
if not invert_mask:
|
| 487 |
-
mask = 255 - mask
|
| 488 |
-
else:
|
| 489 |
-
# Alpha channel encodes mask - use reference model's exact logic
|
| 490 |
-
# Invert alpha: transparent (0) → white (255), opaque (255) → black (0)
|
| 491 |
-
mask = 255 - alpha_channel
|
| 492 |
-
|
| 493 |
-
# Apply invert_mask if user wants opposite
|
| 494 |
-
if not invert_mask:
|
| 495 |
-
mask = 255 - mask # double invert back to original
|
| 496 |
-
|
| 497 |
-
# Resize mask to match image dimensions (use INTER_NEAREST for binary mask)
|
| 498 |
-
mask = resize_max_size(mask, size_limit=size_limit, interpolation=cv2.INTER_NEAREST)
|
| 499 |
-
|
| 500 |
-
# Debug: log mask statistics BEFORE normalization
|
| 501 |
-
mask_nonzero = int((mask > 128).sum())
|
| 502 |
-
mask_total = mask.shape[0] * mask.shape[1]
|
| 503 |
-
print(f"Mask shape: {mask.shape}, pixels to remove (>128): {mask_nonzero}/{mask_total} ({100*mask_nonzero/mask_total:.1f}%)")
|
| 504 |
-
|
| 505 |
-
if mask_nonzero < 10:
|
| 506 |
-
print("ERROR: Mask is empty or almost empty! Cannot proceed with inpainting.")
|
| 507 |
-
print("DEBUG INFO:")
|
| 508 |
-
print(f" - Alpha channel mean: {alpha_mean}")
|
| 509 |
-
print(f" - RGB channels min/max: {rgb_channels.min()}/{rgb_channels.max()}")
|
| 510 |
-
print(f" - Alpha channel min/max: {alpha_channel.min()}/{alpha_channel.max()}")
|
| 511 |
-
# Return original image if mask is invalid
|
| 512 |
-
return cv2.cvtColor(cv2.resize(cv2.cvtColor(np.array(image*255, dtype=np.uint8), cv2.COLOR_RGB2BGR),
|
| 513 |
-
(original_shape[1], original_shape[0]),
|
| 514 |
-
interpolation=cv2.INTER_LANCZOS4), cv2.COLOR_BGR2RGB)
|
| 515 |
-
|
| 516 |
-
# Normalize: values > 0 become 1.0, 0 stays 0 (LaMa expects this)
|
| 517 |
mask = norm_img(mask)
|
| 518 |
-
|
| 519 |
-
# Final check
|
| 520 |
-
mask_final_pixels = int((mask > 0.5).sum())
|
| 521 |
-
print(f"After normalization: {mask_final_pixels} pixels marked for removal (value > 0.5)")
|
| 522 |
-
|
| 523 |
-
if mask_final_pixels < 10:
|
| 524 |
-
print("ERROR: After normalization, mask is still empty! Returning original image.")
|
| 525 |
-
return cv2.cvtColor(cv2.resize(cv2.cvtColor(np.array(image*255, dtype=np.uint8), cv2.COLOR_RGB2BGR),
|
| 526 |
-
(original_shape[1], original_shape[0]),
|
| 527 |
-
interpolation=cv2.INTER_LANCZOS4), cv2.COLOR_BGR2RGB)
|
| 528 |
|
| 529 |
res_np_img = run(image, mask)
|
| 530 |
|
| 531 |
-
# Resize back to original dimensions if needed (for quality preservation)
|
| 532 |
-
if res_np_img.shape[:2] != original_shape[:2]:
|
| 533 |
-
res_np_img = cv2.resize(res_np_img, (original_shape[1], original_shape[0]),
|
| 534 |
-
interpolation=cv2.INTER_LANCZOS4)
|
| 535 |
-
print(f"Resized output back to original: {res_np_img.shape}")
|
| 536 |
-
|
| 537 |
return cv2.cvtColor(res_np_img, cv2.COLOR_BGR2RGB)
|
|
|
|
| 444 |
|
| 445 |
def process_inpaint(image, mask, invert_mask=True):
|
| 446 |
"""
|
| 447 |
+
Process inpainting - EXACT copy from reference model.
|
| 448 |
Reference: https://huggingface.co/spaces/aryadytm/remove-photo-object
|
| 449 |
+
Line 444-466 in their src/core.py
|
| 450 |
"""
|
| 451 |
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
|
| 452 |
original_shape = image.shape
|
| 453 |
+
interpolation = cv2.INTER_CUBIC
|
|
|
|
| 454 |
|
| 455 |
+
#size_limit: Union[int, str] = request.form.get("sizeLimit", "1080")
|
| 456 |
+
#if size_limit == "Original":
|
| 457 |
+
size_limit = max(image.shape)
|
| 458 |
+
#else:
|
| 459 |
+
# size_limit = int(size_limit)
|
| 460 |
|
| 461 |
print(f"Origin image shape: {original_shape}")
|
|
|
|
| 462 |
image = resize_max_size(image, size_limit=size_limit, interpolation=interpolation)
|
| 463 |
print(f"Resized image shape: {image.shape}")
|
| 464 |
image = norm_img(image)
|
| 465 |
|
| 466 |
+
# Reference model's EXACT logic: simple alpha inversion
|
| 467 |
+
# Line 460: mask = 255-mask[:,:,3]
|
| 468 |
# This means: alpha=0 (transparent/drawn) → 255 (white/remove)
|
| 469 |
# alpha=255 (opaque) → 0 (black/keep)
|
| 470 |
+
mask = 255 - mask[:,:,3]
|
| 471 |
|
| 472 |
+
# Apply invert_mask if user wants opposite behavior
|
| 473 |
+
if not invert_mask:
|
| 474 |
+
mask = 255 - mask # double invert back
|
|
|
|
| 475 |
|
| 476 |
+
mask = resize_max_size(mask, size_limit=size_limit, interpolation=interpolation)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
mask = norm_img(mask)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
|
| 479 |
res_np_img = run(image, mask)
|
| 480 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
return cv2.cvtColor(res_np_img, cv2.COLOR_BGR2RGB)
|