import cv2
import numpy as np
import os

def find_contiguous_regions(image, area_threshold):
    # Convert image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Threshold the image to get binary image
    _, binary = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY_INV)
    
    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Filter out small contours and get their bounding rectangles
    bounding_rects = [cv2.boundingRect(contour) for contour in contours if cv2.contourArea(contour) > area_threshold]
    
    return bounding_rects

def is_black_ratio_satisfied(patch, threshold):
    # Calculate black to white ratio in the patch
    black_pixels = np.count_nonzero(patch == 0)
    white_pixels = np.count_nonzero(patch == 255)
    total_pixels = black_pixels + white_pixels
    
    # Calculate the ratio
    black_ratio = black_pixels / total_pixels
    
    return black_ratio >= threshold

def save_patches(image, patches, output_dir, ratio_threshold):
    for idx, patch in enumerate(patches):
        x, y, w, h = patch
        patch_image = image[y:y+h, x:x+w]
        
        # Check if the black ratio is satisfied
        if is_black_ratio_satisfied(patch_image, ratio_threshold):
            cv2.imwrite(os.path.join(output_dir, f"patch_{idx}.png"), patch_image)

def main(image_path, output_dir, area_threshold, ratio_threshold):
    # Read the image
    image = cv2.imread(image_path)
    
    # Find contiguous black patches
    black_patches = find_contiguous_regions(image, area_threshold)
    
    # Save patches as separate images
    save_patches(image, black_patches, output_dir, ratio_threshold)

if __name__ == "__main__":
    image_path = "fin.png"  # Path to your document image
    output_dir = "black_patches/"  # Directory to save the patches
    area_threshold = 1000  # Minimum area threshold for a patch to be considered
    ratio_threshold = 0.80  # Maximum ratio of white to black pixels
    
    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    main(image_path, output_dir, area_threshold, ratio_threshold)