Spaces:

habulaj
/

subapi

Running

App Files Files Community

habulaj commited on 13 days ago

Commit

e0f323e

verified ·

1 Parent(s): 3efef97

Update detect_crop_image.py

Browse files

Files changed (1) hide show

detect_crop_image.py +125 -138

detect_crop_image.py CHANGED Viewed

@@ -14,158 +14,144 @@ def detect_and_crop_image(image_path, output_image_path=None):
         print("Error: Could not open image.")
         return None
-    # Convert to grayscale
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    # Identify "mid-tones" to separate the real photo from pure white or black backgrounds/text.
-    # JPEG artifacts mean pure white/black might vary. We use 20 to 235 as the "mid-tone" photo range.
-    mask = cv2.inRange(gray, 20, 235)
-    # 1. MORPH_OPEN (Erode then Dilate)
-    # This removes thin structures, such as text anti-aliasing, thin lines, or small icons.
-    # A 15x15 kernel removes anything thinner than 15 pixels.
-    kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 15))
-    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_open)
-    # 2. MORPH_CLOSE (Dilate then Erode)
-    # This merges nearby blobs and fills holes (e.g., if the photo has pure white/black areas inside).
-    # A large kernel ensures the entire main image forms one single solid block.
-    kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (51, 51))
-    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_close)
-    # Find contours
-    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    if not contours:
-        print("Error: No significant non-background regions detected.")
-        return None
-    # Find the contour with the largest bounding box area
-    max_area = 0
-    best_bbox = None
-    for c in contours:
-        x, y, w, h = cv2.boundingRect(c)
-        area = w * h
-        if area > max_area:
-            max_area = area
-            best_bbox = (x, y, w, h)
-    if best_bbox is None or max_area < 500:
-         print("Error: No significant image content detected.")
-         return None
-    x, y, w, h = best_bbox
-    # --- Smart Zoom for Rounded Corners ---
-    # If the corners of our bounding box still touch the background (white/black),
-    # it's likely a rounded corner. We "zoom in" (inset) until the corners are safe.
-    img_h, img_w = img.shape[:2]
-    def check_corners(cx, cy, cw, ch, m):
-        # Check the 4 corner pixels in the mask
-        # We use a small 3x3 average or just the point? Point is simpler.
-        coords = [
-            (cy, cx),
-            (cy, cx + cw - 1),
-            (cy + ch - 1, cx),
-            (cy + ch - 1, cx + cw - 1)
-        ]
-        for py, px in coords:
-            if m[py, px] == 0:
-                return False
-        return True
-    zoom_inset = 0
-    max_zoom = min(w, h) // 4  # Prevent zooming more than 25% of the image size
-    while not check_corners(x, y, w, h, mask) and zoom_inset < max_zoom:
-        x += 1
-        y += 1
-        w -= 2
-        h -= 2
-        zoom_inset += 1
-        if w <= 20 or h <= 20:
-            break
-    if zoom_inset > 0:
-        print(f"Smart Zoom applied: {zoom_inset}px inset to clear rounded corners.")
-    # --- Validate Crops ---
-    # Only crop if the excluded region is genuinely a white/black background
-    prop_x_min = x
-    prop_y_min = y
-    prop_x_max = x + w
-    prop_y_max = y + h
-    def validate_crop(region, border_region, edge_thresh=0.80, region_thresh=0.60):
-        if region.size == 0 or border_region.size == 0:
-            return False
-        dark_edge = np.count_nonzero(border_region < 20) / border_region.size
-        light_edge = np.count_nonzero(border_region > 235) / border_region.size
-        dark_region = np.count_nonzero(region < 20) / region.size
-        light_region = np.count_nonzero(region > 235) / region.size
-        is_dark_bg = (dark_edge >= edge_thresh) and (dark_region >= region_thresh)
-        is_light_bg = (light_edge >= edge_thresh) and (light_region >= region_thresh)
-        return is_dark_bg or is_light_bg
-    # Validate Top Crop
-    if prop_y_min > 0:
-        top_region = gray[0:prop_y_min, :]
-        top_border = gray[0:min(3, prop_y_min), :]
-        if not validate_crop(top_region, top_border):
-            prop_y_min = 0
-    # Validate Bottom Crop
-    if prop_y_max < img_h:
-        bottom_region = gray[prop_y_max:img_h, :]
-        bottom_border = gray[max(img_h-3, prop_y_max):img_h, :]
-        if not validate_crop(bottom_region, bottom_border):
-            prop_y_max = img_h
-    # Validate Left Crop
-    if prop_x_min > 0:
-        left_region = gray[:, 0:prop_x_min]
-        left_border = gray[:, 0:min(3, prop_x_min)]
-        if not validate_crop(left_region, left_border):
-            prop_x_min = 0
-    # Validate Right Crop
-    if prop_x_max < img_w:
-        right_region = gray[:, prop_x_max:img_w]
-        right_border = gray[:, max(img_w-3, prop_x_max):img_w]
-        if not validate_crop(right_region, right_border):
-            prop_x_max = img_w
-    # Inset Logic (2px) - additional fixed safety margin ONLY for valid crops
-    inset = 2
-    x_min = prop_x_min + inset if prop_x_min > 0 else 0
-    y_min = prop_y_min + inset if prop_y_min > 0 else 0
-    x_max = prop_x_max - inset if prop_x_max < img_w else img_w
-    y_max = prop_y_max - inset if prop_y_max < img_h else img_h
-    final_w = x_max - x_min
-    final_h = y_max - y_min
     if final_w <= 0 or final_h <= 0:
         print("Error: Invalid crop dimensions after zoom.")
         return None
-    # Ensure crop dimensions are even
     if final_w % 2 != 0: final_w -= 1
     if final_h % 2 != 0: final_h -= 1
-    x_max = x_min + final_w
-    y_max = y_min + final_h
-    print(f"Proposed Crop: w={final_w}, h={final_h}, x={x_min}, y={y_min}")
     # Crop the original image
-    cropped_img = img[y_min:y_max, x_min:x_max]
     if output_image_path is None:
         filename, ext = os.path.splitext(image_path)
@@ -175,6 +161,7 @@ def detect_and_crop_image(image_path, output_image_path=None):
     print(f"Successfully created cropped image at {output_image_path}")
     return output_image_path
 if __name__ == "__main__":
     import sys

         print("Error: Could not open image.")
         return None
+    height, width, _ = img.shape
+    print(f"[detect_crop] Input image: {width}x{height}")
+    # --- Step 1: Build a mask of non-background pixels ---
+    # Opencv loads as BGR. np.all() checks all 3 channels, so it applies to both RGB/BGR equally well.
+    white_threshold = 240
+    black_threshold = 10
+    is_white = np.all(img >= white_threshold, axis=2)
+    is_black = np.all(img <= black_threshold, axis=2)
+    is_bg = is_white | is_black
+    is_content = ~is_bg  # True where there IS content (non-background)
+    if not np.any(is_content):
+        print("Error: Image appears to be entirely background. No crop applied.")
+        if output_image_path:
+            cv2.imwrite(output_image_path, img)
+            return output_image_path
+        return image_path
+    # --- Step 2: Find the main block of vertical content (ignoring text/watermarks) ---
+    noise_tolerance = 5
+    row_content_pixels = np.sum(is_content, axis=1)
+    row_has_content = row_content_pixels > noise_tolerance
+    blocks = []
+    in_block = False
+    start_row = 0
+    for i, has_content in enumerate(row_has_content):
+        if has_content and not in_block:
+            in_block = True
+            start_row = i
+        elif not has_content and in_block:
+            in_block = False
+            blocks.append([start_row, i - 1])
+    if in_block:
+        blocks.append([start_row, len(row_has_content) - 1])
+    if not blocks:
+        print("Error: No content blocks found.")
+        return None
+    # Merge blocks separated by small gaps to handle intra-image background lines
+    gap_tolerance = 20
+    merged_blocks = []
+    curr_block = blocks[0]
+    for next_block in blocks[1:]:
+        if next_block[0] - curr_block[1] <= gap_tolerance:
+            curr_block = [curr_block[0], next_block[1]]
+        else:
+            merged_blocks.append(curr_block)
+            curr_block = next_block
+    merged_blocks.append(curr_block)
+    # Select the block with the largest number of non-white/black pixels
+    best_top, best_bottom = -1, -1
+    max_pixels = -1
+    for start, end in merged_blocks:
+        total_p = np.sum(row_content_pixels[start:end+1])
+        if total_p > max_pixels:
+            max_pixels = total_p
+            best_top, best_bottom = start, end
+    top, bottom = best_top, best_bottom
+    # Find extreme left and right columns restricted to the selected main block
+    valid_rows = is_content[top:bottom+1, :]
+    col_content_pixels = np.sum(valid_rows, axis=0)
+    cols_with_content = col_content_pixels > noise_tolerance
+    left = int(np.argmax(cols_with_content))
+    right = int(width - np.argmax(cols_with_content[::-1]) - 1)
+    print(f"[detect_crop] Detected content bounds: top={top}, bottom={bottom}, left={left}, right={right}")
+    # --- Step 3: Smart Zoom for rounded corners ---
+    zoom_limit = min(width, height) // 4  # max zoom 25%
+    zoom_amount = 0
+    while zoom_amount < zoom_limit and (right - left > 20) and (bottom - top > 20):
+        c_tl = is_bg[top, left]
+        c_tr = is_bg[top, right]
+        c_bl = is_bg[bottom, left]
+        c_br = is_bg[bottom, right]
+        if c_tl or c_tr or c_bl or c_br:
+            top += 1
+            bottom -= 1
+            left += 1
+            right -= 1
+            zoom_amount += 1
+        else:
+            break
+    if zoom_amount > 0:
+        print(f"Smart Zoom applied: {zoom_amount}px inset to clear rounded corners.")
+    # --- Step 4: Validate and prepare crop area ---
+    margin = 2
+    if zoom_amount == 0:
+        top = max(0, top - margin)
+        bottom = min(height - 1, bottom + margin)
+        left = max(0, left - margin)
+        right = min(width - 1, right + margin)
+    final_w = right - left + 1
+    final_h = bottom - top + 1
     if final_w <= 0 or final_h <= 0:
         print("Error: Invalid crop dimensions after zoom.")
         return None
+    # Ensure crop dimensions are even (needed for video encoding/Gemini pipelines)
     if final_w % 2 != 0: final_w -= 1
     if final_h % 2 != 0: final_h -= 1
+    # Adjust right/bottom to match the even dimensions
+    right = left + final_w - 1
+    bottom = top + final_h - 1
+    print(f"Proposed Crop: w={final_w}, h={final_h}, x={left}, y={top}")
+    total_removed = top + (height - bottom - 1) + left + (width - right - 1)
+    if total_removed < 10:
+        print("[detect_crop] Very little border detected. No crop applied.")
+        if output_image_path:
+            cv2.imwrite(output_image_path, img)
+            print(f"Successfully created cropped image at {output_image_path}")
+            return output_image_path
+        return image_path
     # Crop the original image
+    cropped_img = img[top:bottom+1, left:right+1]
     if output_image_path is None:
         filename, ext = os.path.splitext(image_path)
     print(f"Successfully created cropped image at {output_image_path}")
     return output_image_path
 if __name__ == "__main__":
     import sys