Spaces:

habulaj
/

subapi

Running

App Files Files Community

subapi / detect_crop_image.py

habulaj

Update detect_crop_image.py

e0f323e verified 14 days ago

raw

history blame contribute delete

6.04 kB

	import cv2
	import numpy as np
	import os
	import argparse

	def detect_and_crop_image(image_path, output_image_path=None):
	if not os.path.exists(image_path):
	print(f"Error: Image file not found at {image_path}")
	return None

	# Read the image
	img = cv2.imread(image_path)
	if img is None:
	print("Error: Could not open image.")
	return None

	height, width, _ = img.shape
	print(f"[detect_crop] Input image: {width}x{height}")

	# --- Step 1: Build a mask of non-background pixels ---
	# Opencv loads as BGR. np.all() checks all 3 channels, so it applies to both RGB/BGR equally well.
	white_threshold = 240
	black_threshold = 10

	is_white = np.all(img >= white_threshold, axis=2)
	is_black = np.all(img <= black_threshold, axis=2)

	is_bg = is_white \| is_black
	is_content = ~is_bg # True where there IS content (non-background)

	if not np.any(is_content):
	print("Error: Image appears to be entirely background. No crop applied.")
	if output_image_path:
	cv2.imwrite(output_image_path, img)
	return output_image_path
	return image_path

	# --- Step 2: Find the main block of vertical content (ignoring text/watermarks) ---
	noise_tolerance = 5
	row_content_pixels = np.sum(is_content, axis=1)
	row_has_content = row_content_pixels > noise_tolerance

	blocks = []
	in_block = False
	start_row = 0

	for i, has_content in enumerate(row_has_content):
	if has_content and not in_block:
	in_block = True
	start_row = i
	elif not has_content and in_block:
	in_block = False
	blocks.append([start_row, i - 1])

	if in_block:
	blocks.append([start_row, len(row_has_content) - 1])

	if not blocks:
	print("Error: No content blocks found.")
	return None

	# Merge blocks separated by small gaps to handle intra-image background lines
	gap_tolerance = 20
	merged_blocks = []
	curr_block = blocks[0]

	for next_block in blocks[1:]:
	if next_block[0] - curr_block[1] <= gap_tolerance:
	curr_block = [curr_block[0], next_block[1]]
	else:
	merged_blocks.append(curr_block)
	curr_block = next_block
	merged_blocks.append(curr_block)

	# Select the block with the largest number of non-white/black pixels
	best_top, best_bottom = -1, -1
	max_pixels = -1

	for start, end in merged_blocks:
	total_p = np.sum(row_content_pixels[start:end+1])
	if total_p > max_pixels:
	max_pixels = total_p
	best_top, best_bottom = start, end

	top, bottom = best_top, best_bottom

	# Find extreme left and right columns restricted to the selected main block
	valid_rows = is_content[top:bottom+1, :]
	col_content_pixels = np.sum(valid_rows, axis=0)
	cols_with_content = col_content_pixels > noise_tolerance

	left = int(np.argmax(cols_with_content))
	right = int(width - np.argmax(cols_with_content[::-1]) - 1)

	print(f"[detect_crop] Detected content bounds: top={top}, bottom={bottom}, left={left}, right={right}")

	# --- Step 3: Smart Zoom for rounded corners ---
	zoom_limit = min(width, height) // 4 # max zoom 25%
	zoom_amount = 0

	while zoom_amount < zoom_limit and (right - left > 20) and (bottom - top > 20):
	c_tl = is_bg[top, left]
	c_tr = is_bg[top, right]
	c_bl = is_bg[bottom, left]
	c_br = is_bg[bottom, right]

	if c_tl or c_tr or c_bl or c_br:
	top += 1
	bottom -= 1
	left += 1
	right -= 1
	zoom_amount += 1
	else:
	break

	if zoom_amount > 0:
	print(f"Smart Zoom applied: {zoom_amount}px inset to clear rounded corners.")

	# --- Step 4: Validate and prepare crop area ---
	margin = 2
	if zoom_amount == 0:
	top = max(0, top - margin)
	bottom = min(height - 1, bottom + margin)
	left = max(0, left - margin)
	right = min(width - 1, right + margin)

	final_w = right - left + 1
	final_h = bottom - top + 1

	if final_w <= 0 or final_h <= 0:
	print("Error: Invalid crop dimensions after zoom.")
	return None

	# Ensure crop dimensions are even (needed for video encoding/Gemini pipelines)
	if final_w % 2 != 0: final_w -= 1
	if final_h % 2 != 0: final_h -= 1

	# Adjust right/bottom to match the even dimensions
	right = left + final_w - 1
	bottom = top + final_h - 1

	print(f"Proposed Crop: w={final_w}, h={final_h}, x={left}, y={top}")

	total_removed = top + (height - bottom - 1) + left + (width - right - 1)
	if total_removed < 10:
	print("[detect_crop] Very little border detected. No crop applied.")
	if output_image_path:
	cv2.imwrite(output_image_path, img)
	print(f"Successfully created cropped image at {output_image_path}")
	return output_image_path
	return image_path

	# Crop the original image
	cropped_img = img[top:bottom+1, left:right+1]

	if output_image_path is None:
	filename, ext = os.path.splitext(image_path)
	output_image_path = f"{filename}_cropped{ext}"

	cv2.imwrite(output_image_path, cropped_img)
	print(f"Successfully created cropped image at {output_image_path}")
	return output_image_path


	if __name__ == "__main__":
	import sys

	input_image = "image.png"
	output_image = "image_cropped.png"

	if len(sys.argv) > 1:
	input_image = sys.argv[1]
	if len(sys.argv) > 2:
	output_image = sys.argv[2]

	print(f"Processing: {input_image} -> {output_image}")
	result = detect_and_crop_image(input_image, output_image)

	if result and os.path.exists(result):
	print(f"\n✅ Done! Cropped image saved as: {result}")
	else:
	print(f"\n❌ Failed to create cropped image.")