habulaj commited on
Commit
e0f323e
·
verified ·
1 Parent(s): 3efef97

Update detect_crop_image.py

Browse files
Files changed (1) hide show
  1. detect_crop_image.py +125 -138
detect_crop_image.py CHANGED
@@ -14,158 +14,144 @@ def detect_and_crop_image(image_path, output_image_path=None):
14
  print("Error: Could not open image.")
15
  return None
16
 
17
- # Convert to grayscale
18
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
19
-
20
- # Identify "mid-tones" to separate the real photo from pure white or black backgrounds/text.
21
- # JPEG artifacts mean pure white/black might vary. We use 20 to 235 as the "mid-tone" photo range.
22
- mask = cv2.inRange(gray, 20, 235)
23
-
24
- # 1. MORPH_OPEN (Erode then Dilate)
25
- # This removes thin structures, such as text anti-aliasing, thin lines, or small icons.
26
- # A 15x15 kernel removes anything thinner than 15 pixels.
27
- kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 15))
28
- mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_open)
29
-
30
- # 2. MORPH_CLOSE (Dilate then Erode)
31
- # This merges nearby blobs and fills holes (e.g., if the photo has pure white/black areas inside).
32
- # A large kernel ensures the entire main image forms one single solid block.
33
- kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (51, 51))
34
- mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_close)
35
-
36
- # Find contours
37
- contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
38
-
39
- if not contours:
40
- print("Error: No significant non-background regions detected.")
41
- return None
42
-
43
- # Find the contour with the largest bounding box area
44
- max_area = 0
45
- best_bbox = None
46
-
47
- for c in contours:
48
- x, y, w, h = cv2.boundingRect(c)
49
- area = w * h
50
- if area > max_area:
51
- max_area = area
52
- best_bbox = (x, y, w, h)
53
-
54
- if best_bbox is None or max_area < 500:
55
- print("Error: No significant image content detected.")
56
- return None
57
-
58
- x, y, w, h = best_bbox
59
-
60
- # --- Smart Zoom for Rounded Corners ---
61
- # If the corners of our bounding box still touch the background (white/black),
62
- # it's likely a rounded corner. We "zoom in" (inset) until the corners are safe.
63
- img_h, img_w = img.shape[:2]
64
-
65
- def check_corners(cx, cy, cw, ch, m):
66
- # Check the 4 corner pixels in the mask
67
- # We use a small 3x3 average or just the point? Point is simpler.
68
- coords = [
69
- (cy, cx),
70
- (cy, cx + cw - 1),
71
- (cy + ch - 1, cx),
72
- (cy + ch - 1, cx + cw - 1)
73
- ]
74
- for py, px in coords:
75
- if m[py, px] == 0:
76
- return False
77
- return True
78
-
79
- zoom_inset = 0
80
- max_zoom = min(w, h) // 4 # Prevent zooming more than 25% of the image size
81
-
82
- while not check_corners(x, y, w, h, mask) and zoom_inset < max_zoom:
83
- x += 1
84
- y += 1
85
- w -= 2
86
- h -= 2
87
- zoom_inset += 1
88
- if w <= 20 or h <= 20:
89
- break
90
-
91
- if zoom_inset > 0:
92
- print(f"Smart Zoom applied: {zoom_inset}px inset to clear rounded corners.")
93
-
94
- # --- Validate Crops ---
95
- # Only crop if the excluded region is genuinely a white/black background
96
- prop_x_min = x
97
- prop_y_min = y
98
- prop_x_max = x + w
99
- prop_y_max = y + h
100
-
101
- def validate_crop(region, border_region, edge_thresh=0.80, region_thresh=0.60):
102
- if region.size == 0 or border_region.size == 0:
103
- return False
104
 
105
- dark_edge = np.count_nonzero(border_region < 20) / border_region.size
106
- light_edge = np.count_nonzero(border_region > 235) / border_region.size
107
-
108
- dark_region = np.count_nonzero(region < 20) / region.size
109
- light_region = np.count_nonzero(region > 235) / region.size
110
 
111
- is_dark_bg = (dark_edge >= edge_thresh) and (dark_region >= region_thresh)
112
- is_light_bg = (light_edge >= edge_thresh) and (light_region >= region_thresh)
 
113
 
114
- return is_dark_bg or is_light_bg
115
-
116
- # Validate Top Crop
117
- if prop_y_min > 0:
118
- top_region = gray[0:prop_y_min, :]
119
- top_border = gray[0:min(3, prop_y_min), :]
120
- if not validate_crop(top_region, top_border):
121
- prop_y_min = 0
122
-
123
- # Validate Bottom Crop
124
- if prop_y_max < img_h:
125
- bottom_region = gray[prop_y_max:img_h, :]
126
- bottom_border = gray[max(img_h-3, prop_y_max):img_h, :]
127
- if not validate_crop(bottom_region, bottom_border):
128
- prop_y_max = img_h
129
-
130
- # Validate Left Crop
131
- if prop_x_min > 0:
132
- left_region = gray[:, 0:prop_x_min]
133
- left_border = gray[:, 0:min(3, prop_x_min)]
134
- if not validate_crop(left_region, left_border):
135
- prop_x_min = 0
136
-
137
- # Validate Right Crop
138
- if prop_x_max < img_w:
139
- right_region = gray[:, prop_x_max:img_w]
140
- right_border = gray[:, max(img_w-3, prop_x_max):img_w]
141
- if not validate_crop(right_region, right_border):
142
- prop_x_max = img_w
143
-
144
- # Inset Logic (2px) - additional fixed safety margin ONLY for valid crops
145
- inset = 2
146
- x_min = prop_x_min + inset if prop_x_min > 0 else 0
147
- y_min = prop_y_min + inset if prop_y_min > 0 else 0
148
- x_max = prop_x_max - inset if prop_x_max < img_w else img_w
149
- y_max = prop_y_max - inset if prop_y_max < img_h else img_h
150
-
151
- final_w = x_max - x_min
152
- final_h = y_max - y_min
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  if final_w <= 0 or final_h <= 0:
155
  print("Error: Invalid crop dimensions after zoom.")
156
  return None
157
 
158
- # Ensure crop dimensions are even
159
  if final_w % 2 != 0: final_w -= 1
160
  if final_h % 2 != 0: final_h -= 1
161
 
162
- x_max = x_min + final_w
163
- y_max = y_min + final_h
 
164
 
165
- print(f"Proposed Crop: w={final_w}, h={final_h}, x={x_min}, y={y_min}")
 
 
 
 
 
 
 
 
 
166
 
167
  # Crop the original image
168
- cropped_img = img[y_min:y_max, x_min:x_max]
169
 
170
  if output_image_path is None:
171
  filename, ext = os.path.splitext(image_path)
@@ -175,6 +161,7 @@ def detect_and_crop_image(image_path, output_image_path=None):
175
  print(f"Successfully created cropped image at {output_image_path}")
176
  return output_image_path
177
 
 
178
  if __name__ == "__main__":
179
  import sys
180
 
 
14
  print("Error: Could not open image.")
15
  return None
16
 
17
+ height, width, _ = img.shape
18
+ print(f"[detect_crop] Input image: {width}x{height}")
19
+
20
+ # --- Step 1: Build a mask of non-background pixels ---
21
+ # Opencv loads as BGR. np.all() checks all 3 channels, so it applies to both RGB/BGR equally well.
22
+ white_threshold = 240
23
+ black_threshold = 10
24
+
25
+ is_white = np.all(img >= white_threshold, axis=2)
26
+ is_black = np.all(img <= black_threshold, axis=2)
27
+
28
+ is_bg = is_white | is_black
29
+ is_content = ~is_bg # True where there IS content (non-background)
30
+
31
+ if not np.any(is_content):
32
+ print("Error: Image appears to be entirely background. No crop applied.")
33
+ if output_image_path:
34
+ cv2.imwrite(output_image_path, img)
35
+ return output_image_path
36
+ return image_path
37
+
38
+ # --- Step 2: Find the main block of vertical content (ignoring text/watermarks) ---
39
+ noise_tolerance = 5
40
+ row_content_pixels = np.sum(is_content, axis=1)
41
+ row_has_content = row_content_pixels > noise_tolerance
42
+
43
+ blocks = []
44
+ in_block = False
45
+ start_row = 0
46
+
47
+ for i, has_content in enumerate(row_has_content):
48
+ if has_content and not in_block:
49
+ in_block = True
50
+ start_row = i
51
+ elif not has_content and in_block:
52
+ in_block = False
53
+ blocks.append([start_row, i - 1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ if in_block:
56
+ blocks.append([start_row, len(row_has_content) - 1])
 
 
 
57
 
58
+ if not blocks:
59
+ print("Error: No content blocks found.")
60
+ return None
61
 
62
+ # Merge blocks separated by small gaps to handle intra-image background lines
63
+ gap_tolerance = 20
64
+ merged_blocks = []
65
+ curr_block = blocks[0]
66
+
67
+ for next_block in blocks[1:]:
68
+ if next_block[0] - curr_block[1] <= gap_tolerance:
69
+ curr_block = [curr_block[0], next_block[1]]
70
+ else:
71
+ merged_blocks.append(curr_block)
72
+ curr_block = next_block
73
+ merged_blocks.append(curr_block)
74
+
75
+ # Select the block with the largest number of non-white/black pixels
76
+ best_top, best_bottom = -1, -1
77
+ max_pixels = -1
78
+
79
+ for start, end in merged_blocks:
80
+ total_p = np.sum(row_content_pixels[start:end+1])
81
+ if total_p > max_pixels:
82
+ max_pixels = total_p
83
+ best_top, best_bottom = start, end
84
+
85
+ top, bottom = best_top, best_bottom
86
+
87
+ # Find extreme left and right columns restricted to the selected main block
88
+ valid_rows = is_content[top:bottom+1, :]
89
+ col_content_pixels = np.sum(valid_rows, axis=0)
90
+ cols_with_content = col_content_pixels > noise_tolerance
 
 
 
 
 
 
 
 
 
 
91
 
92
+ left = int(np.argmax(cols_with_content))
93
+ right = int(width - np.argmax(cols_with_content[::-1]) - 1)
94
+
95
+ print(f"[detect_crop] Detected content bounds: top={top}, bottom={bottom}, left={left}, right={right}")
96
+
97
+ # --- Step 3: Smart Zoom for rounded corners ---
98
+ zoom_limit = min(width, height) // 4 # max zoom 25%
99
+ zoom_amount = 0
100
+
101
+ while zoom_amount < zoom_limit and (right - left > 20) and (bottom - top > 20):
102
+ c_tl = is_bg[top, left]
103
+ c_tr = is_bg[top, right]
104
+ c_bl = is_bg[bottom, left]
105
+ c_br = is_bg[bottom, right]
106
+
107
+ if c_tl or c_tr or c_bl or c_br:
108
+ top += 1
109
+ bottom -= 1
110
+ left += 1
111
+ right -= 1
112
+ zoom_amount += 1
113
+ else:
114
+ break
115
+
116
+ if zoom_amount > 0:
117
+ print(f"Smart Zoom applied: {zoom_amount}px inset to clear rounded corners.")
118
+
119
+ # --- Step 4: Validate and prepare crop area ---
120
+ margin = 2
121
+ if zoom_amount == 0:
122
+ top = max(0, top - margin)
123
+ bottom = min(height - 1, bottom + margin)
124
+ left = max(0, left - margin)
125
+ right = min(width - 1, right + margin)
126
+
127
+ final_w = right - left + 1
128
+ final_h = bottom - top + 1
129
+
130
  if final_w <= 0 or final_h <= 0:
131
  print("Error: Invalid crop dimensions after zoom.")
132
  return None
133
 
134
+ # Ensure crop dimensions are even (needed for video encoding/Gemini pipelines)
135
  if final_w % 2 != 0: final_w -= 1
136
  if final_h % 2 != 0: final_h -= 1
137
 
138
+ # Adjust right/bottom to match the even dimensions
139
+ right = left + final_w - 1
140
+ bottom = top + final_h - 1
141
 
142
+ print(f"Proposed Crop: w={final_w}, h={final_h}, x={left}, y={top}")
143
+
144
+ total_removed = top + (height - bottom - 1) + left + (width - right - 1)
145
+ if total_removed < 10:
146
+ print("[detect_crop] Very little border detected. No crop applied.")
147
+ if output_image_path:
148
+ cv2.imwrite(output_image_path, img)
149
+ print(f"Successfully created cropped image at {output_image_path}")
150
+ return output_image_path
151
+ return image_path
152
 
153
  # Crop the original image
154
+ cropped_img = img[top:bottom+1, left:right+1]
155
 
156
  if output_image_path is None:
157
  filename, ext = os.path.splitext(image_path)
 
161
  print(f"Successfully created cropped image at {output_image_path}")
162
  return output_image_path
163
 
164
+
165
  if __name__ == "__main__":
166
  import sys
167