import cv2 import numpy as np import os def find_contiguous_regions(image, area_threshold): # Convert image to grayscale gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Threshold the image to get binary image _, binary = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY_INV) # Find contours contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Filter out small contours and get their bounding rectangles bounding_rects = [cv2.boundingRect(contour) for contour in contours if cv2.contourArea(contour) > area_threshold] return bounding_rects def is_black_ratio_satisfied(patch, threshold): # Calculate black to white ratio in the patch black_pixels = np.count_nonzero(patch == 0) white_pixels = np.count_nonzero(patch == 255) total_pixels = black_pixels + white_pixels # Calculate the ratio black_ratio = black_pixels / total_pixels return black_ratio >= threshold def save_patches(image, patches, output_dir, ratio_threshold): for idx, patch in enumerate(patches): x, y, w, h = patch patch_image = image[y:y+h, x:x+w] # Check if the black ratio is satisfied if is_black_ratio_satisfied(patch_image, ratio_threshold): cv2.imwrite(os.path.join(output_dir, f"patch_{idx}.png"), patch_image) def main(image_path, output_dir, area_threshold, ratio_threshold): # Read the image image = cv2.imread(image_path) # Find contiguous black patches black_patches = find_contiguous_regions(image, area_threshold) # Save patches as separate images save_patches(image, black_patches, output_dir, ratio_threshold) if __name__ == "__main__": image_path = "fin.png" # Path to your document image output_dir = "black_patches/" # Directory to save the patches area_threshold = 1000 # Minimum area threshold for a patch to be considered ratio_threshold = 0.80 # Maximum ratio of white to black pixels # Create output directory if it doesn't exist if not os.path.exists(output_dir): os.makedirs(output_dir) main(image_path, output_dir, area_threshold, ratio_threshold)