subapi / detect_crop_image.py
habulaj's picture
Update detect_crop_image.py
e0f323e verified
import cv2
import numpy as np
import os
import argparse
def detect_and_crop_image(image_path, output_image_path=None):
if not os.path.exists(image_path):
print(f"Error: Image file not found at {image_path}")
return None
# Read the image
img = cv2.imread(image_path)
if img is None:
print("Error: Could not open image.")
return None
height, width, _ = img.shape
print(f"[detect_crop] Input image: {width}x{height}")
# --- Step 1: Build a mask of non-background pixels ---
# Opencv loads as BGR. np.all() checks all 3 channels, so it applies to both RGB/BGR equally well.
white_threshold = 240
black_threshold = 10
is_white = np.all(img >= white_threshold, axis=2)
is_black = np.all(img <= black_threshold, axis=2)
is_bg = is_white | is_black
is_content = ~is_bg # True where there IS content (non-background)
if not np.any(is_content):
print("Error: Image appears to be entirely background. No crop applied.")
if output_image_path:
cv2.imwrite(output_image_path, img)
return output_image_path
return image_path
# --- Step 2: Find the main block of vertical content (ignoring text/watermarks) ---
noise_tolerance = 5
row_content_pixels = np.sum(is_content, axis=1)
row_has_content = row_content_pixels > noise_tolerance
blocks = []
in_block = False
start_row = 0
for i, has_content in enumerate(row_has_content):
if has_content and not in_block:
in_block = True
start_row = i
elif not has_content and in_block:
in_block = False
blocks.append([start_row, i - 1])
if in_block:
blocks.append([start_row, len(row_has_content) - 1])
if not blocks:
print("Error: No content blocks found.")
return None
# Merge blocks separated by small gaps to handle intra-image background lines
gap_tolerance = 20
merged_blocks = []
curr_block = blocks[0]
for next_block in blocks[1:]:
if next_block[0] - curr_block[1] <= gap_tolerance:
curr_block = [curr_block[0], next_block[1]]
else:
merged_blocks.append(curr_block)
curr_block = next_block
merged_blocks.append(curr_block)
# Select the block with the largest number of non-white/black pixels
best_top, best_bottom = -1, -1
max_pixels = -1
for start, end in merged_blocks:
total_p = np.sum(row_content_pixels[start:end+1])
if total_p > max_pixels:
max_pixels = total_p
best_top, best_bottom = start, end
top, bottom = best_top, best_bottom
# Find extreme left and right columns restricted to the selected main block
valid_rows = is_content[top:bottom+1, :]
col_content_pixels = np.sum(valid_rows, axis=0)
cols_with_content = col_content_pixels > noise_tolerance
left = int(np.argmax(cols_with_content))
right = int(width - np.argmax(cols_with_content[::-1]) - 1)
print(f"[detect_crop] Detected content bounds: top={top}, bottom={bottom}, left={left}, right={right}")
# --- Step 3: Smart Zoom for rounded corners ---
zoom_limit = min(width, height) // 4 # max zoom 25%
zoom_amount = 0
while zoom_amount < zoom_limit and (right - left > 20) and (bottom - top > 20):
c_tl = is_bg[top, left]
c_tr = is_bg[top, right]
c_bl = is_bg[bottom, left]
c_br = is_bg[bottom, right]
if c_tl or c_tr or c_bl or c_br:
top += 1
bottom -= 1
left += 1
right -= 1
zoom_amount += 1
else:
break
if zoom_amount > 0:
print(f"Smart Zoom applied: {zoom_amount}px inset to clear rounded corners.")
# --- Step 4: Validate and prepare crop area ---
margin = 2
if zoom_amount == 0:
top = max(0, top - margin)
bottom = min(height - 1, bottom + margin)
left = max(0, left - margin)
right = min(width - 1, right + margin)
final_w = right - left + 1
final_h = bottom - top + 1
if final_w <= 0 or final_h <= 0:
print("Error: Invalid crop dimensions after zoom.")
return None
# Ensure crop dimensions are even (needed for video encoding/Gemini pipelines)
if final_w % 2 != 0: final_w -= 1
if final_h % 2 != 0: final_h -= 1
# Adjust right/bottom to match the even dimensions
right = left + final_w - 1
bottom = top + final_h - 1
print(f"Proposed Crop: w={final_w}, h={final_h}, x={left}, y={top}")
total_removed = top + (height - bottom - 1) + left + (width - right - 1)
if total_removed < 10:
print("[detect_crop] Very little border detected. No crop applied.")
if output_image_path:
cv2.imwrite(output_image_path, img)
print(f"Successfully created cropped image at {output_image_path}")
return output_image_path
return image_path
# Crop the original image
cropped_img = img[top:bottom+1, left:right+1]
if output_image_path is None:
filename, ext = os.path.splitext(image_path)
output_image_path = f"{filename}_cropped{ext}"
cv2.imwrite(output_image_path, cropped_img)
print(f"Successfully created cropped image at {output_image_path}")
return output_image_path
if __name__ == "__main__":
import sys
input_image = "image.png"
output_image = "image_cropped.png"
if len(sys.argv) > 1:
input_image = sys.argv[1]
if len(sys.argv) > 2:
output_image = sys.argv[2]
print(f"Processing: {input_image} -> {output_image}")
result = detect_and_crop_image(input_image, output_image)
if result and os.path.exists(result):
print(f"\n✅ Done! Cropped image saved as: {result}")
else:
print(f"\n❌ Failed to create cropped image.")