LogicGoInfotechSpaces commited on
Commit
d670035
·
1 Parent(s): 89c8105

refactor: use exact reference model implementation from aryadytm/remove-photo-object - simple 255-alpha inversion

Browse files
Files changed (1) hide show
  1. src/core.py +15 -71
src/core.py CHANGED
@@ -444,94 +444,38 @@ def get_args_parser():
444
 
445
  def process_inpaint(image, mask, invert_mask=True):
446
  """
447
- Process inpainting - matches reference model implementation exactly.
448
  Reference: https://huggingface.co/spaces/aryadytm/remove-photo-object
 
449
  """
450
  image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
451
  original_shape = image.shape
452
- # Use INTER_LANCZOS4 for better quality (higher quality interpolation)
453
- interpolation = cv2.INTER_LANCZOS4
454
 
455
- # Increase size limit to preserve quality (up to 2048px max dimension)
456
- # Reference model uses max(image.shape) but we can optimize for quality
457
- max_dimension = max(image.shape)
458
- size_limit = min(max_dimension, 2048) # Cap at 2048 for quality/speed balance
 
459
 
460
  print(f"Origin image shape: {original_shape}")
461
- print(f"Size limit: {size_limit} (max dimension was {max_dimension})")
462
  image = resize_max_size(image, size_limit=size_limit, interpolation=interpolation)
463
  print(f"Resized image shape: {image.shape}")
464
  image = norm_img(image)
465
 
466
- # Match reference model exactly: invert alpha channel
467
- # Reference line 460: mask = 255-mask[:,:,3]
468
  # This means: alpha=0 (transparent/drawn) → 255 (white/remove)
469
  # alpha=255 (opaque) → 0 (black/keep)
 
470
 
471
- # Check if we should use RGB channels (for uploaded black/white masks)
472
- alpha_channel = mask[:,:,3]
473
- rgb_channels = mask[:,:,:3]
474
- alpha_mean = alpha_channel.mean()
475
 
476
- if alpha_mean > 200:
477
- # Alpha is mostly opaque - use RGB channels (white=remove, black=keep)
478
- gray = cv2.cvtColor(rgb_channels, cv2.COLOR_RGB2GRAY)
479
- # White pixels (>128) = remove
480
- mask = (gray > 128).astype(np.uint8) * 255
481
- # Also detect magenta specifically
482
- magenta = np.all(rgb_channels == [255, 0, 255], axis=2).astype(np.uint8) * 255
483
- mask = np.maximum(mask, magenta)
484
-
485
- # Apply invert_mask if needed
486
- if not invert_mask:
487
- mask = 255 - mask
488
- else:
489
- # Alpha channel encodes mask - use reference model's exact logic
490
- # Invert alpha: transparent (0) → white (255), opaque (255) → black (0)
491
- mask = 255 - alpha_channel
492
-
493
- # Apply invert_mask if user wants opposite
494
- if not invert_mask:
495
- mask = 255 - mask # double invert back to original
496
-
497
- # Resize mask to match image dimensions (use INTER_NEAREST for binary mask)
498
- mask = resize_max_size(mask, size_limit=size_limit, interpolation=cv2.INTER_NEAREST)
499
-
500
- # Debug: log mask statistics BEFORE normalization
501
- mask_nonzero = int((mask > 128).sum())
502
- mask_total = mask.shape[0] * mask.shape[1]
503
- print(f"Mask shape: {mask.shape}, pixels to remove (>128): {mask_nonzero}/{mask_total} ({100*mask_nonzero/mask_total:.1f}%)")
504
-
505
- if mask_nonzero < 10:
506
- print("ERROR: Mask is empty or almost empty! Cannot proceed with inpainting.")
507
- print("DEBUG INFO:")
508
- print(f" - Alpha channel mean: {alpha_mean}")
509
- print(f" - RGB channels min/max: {rgb_channels.min()}/{rgb_channels.max()}")
510
- print(f" - Alpha channel min/max: {alpha_channel.min()}/{alpha_channel.max()}")
511
- # Return original image if mask is invalid
512
- return cv2.cvtColor(cv2.resize(cv2.cvtColor(np.array(image*255, dtype=np.uint8), cv2.COLOR_RGB2BGR),
513
- (original_shape[1], original_shape[0]),
514
- interpolation=cv2.INTER_LANCZOS4), cv2.COLOR_BGR2RGB)
515
-
516
- # Normalize: values > 0 become 1.0, 0 stays 0 (LaMa expects this)
517
  mask = norm_img(mask)
518
-
519
- # Final check
520
- mask_final_pixels = int((mask > 0.5).sum())
521
- print(f"After normalization: {mask_final_pixels} pixels marked for removal (value > 0.5)")
522
-
523
- if mask_final_pixels < 10:
524
- print("ERROR: After normalization, mask is still empty! Returning original image.")
525
- return cv2.cvtColor(cv2.resize(cv2.cvtColor(np.array(image*255, dtype=np.uint8), cv2.COLOR_RGB2BGR),
526
- (original_shape[1], original_shape[0]),
527
- interpolation=cv2.INTER_LANCZOS4), cv2.COLOR_BGR2RGB)
528
 
529
  res_np_img = run(image, mask)
530
 
531
- # Resize back to original dimensions if needed (for quality preservation)
532
- if res_np_img.shape[:2] != original_shape[:2]:
533
- res_np_img = cv2.resize(res_np_img, (original_shape[1], original_shape[0]),
534
- interpolation=cv2.INTER_LANCZOS4)
535
- print(f"Resized output back to original: {res_np_img.shape}")
536
-
537
  return cv2.cvtColor(res_np_img, cv2.COLOR_BGR2RGB)
 
444
 
445
  def process_inpaint(image, mask, invert_mask=True):
446
  """
447
+ Process inpainting - EXACT copy from reference model.
448
  Reference: https://huggingface.co/spaces/aryadytm/remove-photo-object
449
+ Line 444-466 in their src/core.py
450
  """
451
  image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
452
  original_shape = image.shape
453
+ interpolation = cv2.INTER_CUBIC
 
454
 
455
+ #size_limit: Union[int, str] = request.form.get("sizeLimit", "1080")
456
+ #if size_limit == "Original":
457
+ size_limit = max(image.shape)
458
+ #else:
459
+ # size_limit = int(size_limit)
460
 
461
  print(f"Origin image shape: {original_shape}")
 
462
  image = resize_max_size(image, size_limit=size_limit, interpolation=interpolation)
463
  print(f"Resized image shape: {image.shape}")
464
  image = norm_img(image)
465
 
466
+ # Reference model's EXACT logic: simple alpha inversion
467
+ # Line 460: mask = 255-mask[:,:,3]
468
  # This means: alpha=0 (transparent/drawn) → 255 (white/remove)
469
  # alpha=255 (opaque) → 0 (black/keep)
470
+ mask = 255 - mask[:,:,3]
471
 
472
+ # Apply invert_mask if user wants opposite behavior
473
+ if not invert_mask:
474
+ mask = 255 - mask # double invert back
 
475
 
476
+ mask = resize_max_size(mask, size_limit=size_limit, interpolation=interpolation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
477
  mask = norm_img(mask)
 
 
 
 
 
 
 
 
 
 
478
 
479
  res_np_img = run(image, mask)
480
 
 
 
 
 
 
 
481
  return cv2.cvtColor(res_np_img, cv2.COLOR_BGR2RGB)