jkushwaha
/

code

Model card Files Files and versions

xet

Community

jkushwaha commited on Feb 8, 2024

Commit

7f7bf76

verified ·

1 Parent(s): 0de21f6

Update cv.py

Browse files

Files changed (1) hide show

cv.py +46 -36

cv.py CHANGED Viewed

@@ -1,50 +1,60 @@
 import cv2
 import numpy as np
-def detect_background(gray, threshold=0.70):
     # Convert image to grayscale
-    # gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    # Thresholding to binarize the image
-    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
     # Find contours
     contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    # Initialize list to store identified background regions
-    background_regions = []
-    # Iterate through contours
-    for contour in contours:
-        # Calculate area of contour
-        area = cv2.contourArea(contour)
-        # Calculate bounding rectangle
-        x, y, w, h = cv2.boundingRect(contour)
-        # Calculate aspect ratio of bounding rectangle
-        aspect_ratio = w / h if h != 0 else 0
-        # Calculate ratio of area of contour to area of bounding rectangle
-        ratio = area / (w * h) if (w * h) != 0 else 0
-        # If aspect ratio is close to 1 (nearly square) and ratio is greater than threshold, it's likely background
-        if aspect_ratio > 0.9 and ratio > threshold:
-            background_regions.append((x, y, w, h))
-    return background_regions
-def save_background_regions(image, regions, output_prefix="background_region"):
-    for i, region in enumerate(regions):
-        x, y, w, h = region
-        region_image = image[y:y+h, x:x+w]
-        cv2.imwrite(f"black_patches/{output_prefix}_{i}.png", region_image)
-# Load document image
-gray = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
-# Detect continuous black or white background regions
-background_regions = detect_background(gray)
-# Save identified background regions as images
-save_background_regions(gray, background_regions)

 import cv2
 import numpy as np
+import os
+def find_contiguous_regions(image, area_threshold):
     # Convert image to grayscale
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    # Threshold the image to get binary image
+    _, binary = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY_INV)
     # Find contours
     contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    # Filter out small contours and get their bounding rectangles
+    bounding_rects = [cv2.boundingRect(contour) for contour in contours if cv2.contourArea(contour) > area_threshold]
+    return bounding_rects
+def is_black_ratio_satisfied(patch, threshold):
+    # Calculate black to white ratio in the patch
+    black_pixels = np.count_nonzero(patch == 0)
+    white_pixels = np.count_nonzero(patch == 255)
+    total_pixels = black_pixels + white_pixels
+    # Calculate the ratio
+    black_ratio = black_pixels / total_pixels
+    return black_ratio >= threshold
+def save_patches(image, patches, output_dir, ratio_threshold):
+    for idx, patch in enumerate(patches):
+        x, y, w, h = patch
+        patch_image = image[y:y+h, x:x+w]
+        # Check if the black ratio is satisfied
+        if is_black_ratio_satisfied(patch_image, ratio_threshold):
+            cv2.imwrite(os.path.join(output_dir, f"patch_{idx}.png"), patch_image)
+def main(image_path, output_dir, area_threshold, ratio_threshold):
+    # Read the image
+    image = cv2.imread(image_path)
+    # Find contiguous black patches
+    black_patches = find_contiguous_regions(image, area_threshold)
+    # Save patches as separate images
+    save_patches(image, black_patches, output_dir, ratio_threshold)
+if __name__ == "__main__":
+    image_path = "fin.png"  # Path to your document image
+    output_dir = "black_patches/"  # Directory to save the patches
+    area_threshold = 1000  # Minimum area threshold for a patch to be considered
+    ratio_threshold = 0.80  # Maximum ratio of white to black pixels
+    # Create output directory if it doesn't exist
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    main(image_path, output_dir, area_threshold, ratio_threshold)