Commit
·
8c6db4d
1
Parent(s):
2b0f7aa
fix: preserve image quality by limiting resize to 3000px, add output verification, improve debug logging
Browse files- src/core.py +64 -8
src/core.py
CHANGED
|
@@ -445,18 +445,35 @@ def get_args_parser():
|
|
| 445 |
def process_inpaint(image, mask, invert_mask=True):
|
| 446 |
"""
|
| 447 |
Process inpainting - handles both alpha-based masks and RGB-based masks.
|
|
|
|
| 448 |
Reference: https://huggingface.co/spaces/aryadytm/remove-photo-object
|
| 449 |
"""
|
| 450 |
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
|
| 451 |
-
original_shape = image.shape
|
| 452 |
interpolation = cv2.INTER_CUBIC
|
| 453 |
|
| 454 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 455 |
|
| 456 |
print(f"Origin image shape: {original_shape}")
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
|
| 461 |
# Handle mask: check if we should use alpha channel or RGB channels
|
| 462 |
alpha_channel = mask[:,:,3]
|
|
@@ -471,28 +488,67 @@ def process_inpaint(image, mask, invert_mask=True):
|
|
| 471 |
# alpha=0 (transparent) → 255 (white/remove)
|
| 472 |
# alpha=255 (opaque) → 0 (black/keep)
|
| 473 |
mask = 255 - alpha_channel
|
| 474 |
-
|
|
|
|
| 475 |
else:
|
| 476 |
# Alpha is mostly opaque (255), use RGB channels instead
|
| 477 |
# RGB masks: white (255) = remove, black (0) = keep
|
| 478 |
gray = cv2.cvtColor(rgb_channels, cv2.COLOR_RGB2GRAY)
|
| 479 |
mask = (gray > 128).astype(np.uint8) * 255
|
| 480 |
-
|
|
|
|
| 481 |
|
| 482 |
# Apply invert_mask if user wants opposite behavior
|
| 483 |
if not invert_mask:
|
| 484 |
mask = 255 - mask # invert: white becomes black, black becomes white
|
| 485 |
print(f"Applied invert_mask=False: inverted mask")
|
| 486 |
|
| 487 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
|
| 489 |
# Debug: log final mask statistics
|
| 490 |
mask_nonzero = int((mask > 128).sum())
|
| 491 |
mask_total = mask.shape[0] * mask.shape[1]
|
| 492 |
print(f"Final mask before normalization: {mask_nonzero}/{mask_total} pixels marked for removal ({100*mask_nonzero/mask_total:.2f}%)")
|
| 493 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
mask = norm_img(mask)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
|
|
|
|
|
|
|
| 496 |
res_np_img = run(image, mask)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
|
| 498 |
return cv2.cvtColor(res_np_img, cv2.COLOR_BGR2RGB)
|
|
|
|
| 445 |
def process_inpaint(image, mask, invert_mask=True):
|
| 446 |
"""
|
| 447 |
Process inpainting - handles both alpha-based masks and RGB-based masks.
|
| 448 |
+
Preserves original image quality and dimensions.
|
| 449 |
Reference: https://huggingface.co/spaces/aryadytm/remove-photo-object
|
| 450 |
"""
|
| 451 |
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
|
| 452 |
+
original_shape = image.shape # (H, W, C)
|
| 453 |
interpolation = cv2.INTER_CUBIC
|
| 454 |
|
| 455 |
+
# Preserve original size - only resize if absolutely necessary for memory/performance
|
| 456 |
+
# Keep original quality by preserving dimensions
|
| 457 |
+
max_dimension = max(image.shape[:2])
|
| 458 |
+
# Don't resize unless image is extremely large (over 3000px) to preserve quality
|
| 459 |
+
if max_dimension > 3000:
|
| 460 |
+
size_limit = 3000
|
| 461 |
+
print(f"Very large image detected ({max_dimension}px), resizing to {size_limit}px for processing")
|
| 462 |
+
else:
|
| 463 |
+
size_limit = max_dimension # Keep original size to preserve quality
|
| 464 |
+
print(f"Preserving original image size: {max_dimension}px (no resize)")
|
| 465 |
|
| 466 |
print(f"Origin image shape: {original_shape}")
|
| 467 |
+
|
| 468 |
+
# Resize image only if needed
|
| 469 |
+
if size_limit < max_dimension:
|
| 470 |
+
image_resized = resize_max_size(image, size_limit=size_limit, interpolation=interpolation)
|
| 471 |
+
print(f"Resized image shape: {image_resized.shape}")
|
| 472 |
+
else:
|
| 473 |
+
image_resized = image
|
| 474 |
+
print(f"Image not resized: {image_resized.shape}")
|
| 475 |
+
|
| 476 |
+
image = norm_img(image_resized)
|
| 477 |
|
| 478 |
# Handle mask: check if we should use alpha channel or RGB channels
|
| 479 |
alpha_channel = mask[:,:,3]
|
|
|
|
| 488 |
# alpha=0 (transparent) → 255 (white/remove)
|
| 489 |
# alpha=255 (opaque) → 0 (black/keep)
|
| 490 |
mask = 255 - alpha_channel
|
| 491 |
+
transparent_count = int((alpha_channel < 128).sum())
|
| 492 |
+
print(f"Using alpha channel: {transparent_count} transparent pixels → white (to remove)")
|
| 493 |
else:
|
| 494 |
# Alpha is mostly opaque (255), use RGB channels instead
|
| 495 |
# RGB masks: white (255) = remove, black (0) = keep
|
| 496 |
gray = cv2.cvtColor(rgb_channels, cv2.COLOR_RGB2GRAY)
|
| 497 |
mask = (gray > 128).astype(np.uint8) * 255
|
| 498 |
+
white_count = int((mask > 128).sum())
|
| 499 |
+
print(f"Using RGB channels: {white_count} white pixels (to remove)")
|
| 500 |
|
| 501 |
# Apply invert_mask if user wants opposite behavior
|
| 502 |
if not invert_mask:
|
| 503 |
mask = 255 - mask # invert: white becomes black, black becomes white
|
| 504 |
print(f"Applied invert_mask=False: inverted mask")
|
| 505 |
|
| 506 |
+
# Resize mask to match image dimensions
|
| 507 |
+
if size_limit < max_dimension:
|
| 508 |
+
mask = resize_max_size(mask, size_limit=size_limit, interpolation=cv2.INTER_NEAREST)
|
| 509 |
+
else:
|
| 510 |
+
# Ensure mask matches image dimensions
|
| 511 |
+
if mask.shape[:2] != image_resized.shape[:2]:
|
| 512 |
+
mask = cv2.resize(mask, (image_resized.shape[1], image_resized.shape[0]), interpolation=cv2.INTER_NEAREST)
|
| 513 |
|
| 514 |
# Debug: log final mask statistics
|
| 515 |
mask_nonzero = int((mask > 128).sum())
|
| 516 |
mask_total = mask.shape[0] * mask.shape[1]
|
| 517 |
print(f"Final mask before normalization: {mask_nonzero}/{mask_total} pixels marked for removal ({100*mask_nonzero/mask_total:.2f}%)")
|
| 518 |
|
| 519 |
+
if mask_nonzero < 10:
|
| 520 |
+
print("ERROR: Mask is empty or almost empty! Returning original image.")
|
| 521 |
+
# Return original image at original size
|
| 522 |
+
original_rgb = (image_resized * 255).astype(np.uint8)
|
| 523 |
+
return cv2.resize(cv2.cvtColor(original_rgb, cv2.COLOR_RGB2BGR),
|
| 524 |
+
(original_shape[1], original_shape[0]),
|
| 525 |
+
interpolation=cv2.INTER_CUBIC)
|
| 526 |
+
|
| 527 |
+
# Verify mask is correct before normalization
|
| 528 |
+
print(f"Mask verification: {mask_nonzero} pixels will be removed, shape: {mask.shape}")
|
| 529 |
+
|
| 530 |
mask = norm_img(mask)
|
| 531 |
+
|
| 532 |
+
# Verify normalized mask
|
| 533 |
+
mask_normalized_ones = int((mask > 0.5).sum())
|
| 534 |
+
print(f"After normalization: {mask_normalized_ones} pixels marked for removal (value > 0.5)")
|
| 535 |
|
| 536 |
+
# Run inpainting
|
| 537 |
+
print("Running LaMa model for inpainting...")
|
| 538 |
res_np_img = run(image, mask)
|
| 539 |
+
print(f"Inpainting complete. Output shape: {res_np_img.shape}")
|
| 540 |
+
|
| 541 |
+
# Verify output changed
|
| 542 |
+
original_for_compare = (image_resized * 255).astype(np.uint8)
|
| 543 |
+
original_bgr = cv2.cvtColor(original_for_compare, cv2.COLOR_RGB2BGR)
|
| 544 |
+
diff = np.abs(res_np_img.astype(np.float32) - original_bgr.astype(np.float32))
|
| 545 |
+
diff_pixels = int((diff.sum(axis=2) > 10).sum()) # Pixels that changed by more than 10 in any channel
|
| 546 |
+
print(f"Output verification: {diff_pixels} pixels differ from input (should be > 0 if inpainting worked)")
|
| 547 |
+
|
| 548 |
+
# Resize back to original dimensions if we resized (use LANCZOS4 for better quality)
|
| 549 |
+
if size_limit < max_dimension:
|
| 550 |
+
res_np_img = cv2.resize(res_np_img, (original_shape[1], original_shape[0]),
|
| 551 |
+
interpolation=cv2.INTER_LANCZOS4)
|
| 552 |
+
print(f"Resized output back to original size: {res_np_img.shape}")
|
| 553 |
|
| 554 |
return cv2.cvtColor(res_np_img, cv2.COLOR_BGR2RGB)
|