Spaces:

jebin2
/

comic-panel-extractor

Running

App Files Files Community

jebin2 commited on Jul 29

Commit

bd1f76d

1 Parent(s): b269113

new changes

Browse files

Files changed (6) hide show

comic_panel_extractor/config.py +1 -1
comic_panel_extractor/image_processor.py +529 -87
comic_panel_extractor/main.py +20 -24
comic_panel_extractor/panel_extractor.py +67 -24
comic_panel_extractor/panel_segmentation.py +175 -25
requirements.txt +2 -1

comic_panel_extractor/config.py CHANGED Viewed

@@ -12,7 +12,7 @@ class Config:
     min_text_length: int = 2
     min_area_ratio: float = 0.05
     min_width_ratio: float = 0.05
-    min_height_ratio: float = 0.05
 def get_text_cood_file_path(config: Config):
     return f'{config.output_folder}/{config.text_cood_file_name}'

     min_text_length: int = 2
     min_area_ratio: float = 0.05
     min_width_ratio: float = 0.05
+    min_height_ratio: float = 0.1
 def get_text_cood_file_path(config: Config):
     return f'{config.output_folder}/{config.text_cood_file_name}'

comic_panel_extractor/image_processor.py CHANGED Viewed

@@ -4,13 +4,22 @@ from .config import Config
 import numpy as np
 import cv2
 class ImageProcessor:
     """Handles image preprocessing operations."""
-    def __init__(self, config: Config):
-        self.config = config
     def mask_text_regions(self, input_path, bboxes: List[List[int]], output_filename: str = "1_text_removed.jpg", color: Tuple[int, int, int] = (0, 0, 0)) -> str:
         """Mask text regions in the image to reduce panel extraction noise."""
         image = cv2.imread(input_path)
@@ -23,7 +32,6 @@ class ImageProcessor:
         output_path = f'{self.config.output_folder}/{output_filename}'
         cv2.imwrite(output_path, image)
-        print(f"✅ Text-masked image saved to: {output_path}")
         return str(output_path)
     def preprocess_image(self, processed_image_path) -> Tuple[str, str, str]:
@@ -34,33 +42,25 @@ class ImageProcessor:
         # Convert to grayscale and binary
         gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-        # _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
         # Apply Gaussian blur to reduce noise
         blurred = cv2.GaussianBlur(gray, (3, 3), 0)
         # Canny edge detection
         edges = cv2.Canny(blurred, threshold1=50, threshold2=150, apertureSize=3)
-        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
-        is_inverted = False
-        # binary, is_inverted = self.invert_if_black_dominates(binary)
-        if not is_inverted:
-            # Dilate to strengthen borders
-            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
-            dilated = cv2.dilate(edges, kernel, iterations=2)
-        else: dilated = edges
         # Save intermediate results
-        gray_path = f'{self.config.output_folder}/2_gray.jpg'
-        binary_path = f'{self.config.output_folder}/3_binary.jpg'
-        dilated_path = f'{self.config.output_folder}/4_dilated.jpg'
         cv2.imwrite(str(gray_path), gray)
         cv2.imwrite(str(binary_path), edges)
         cv2.imwrite(str(dilated_path), dilated)
-        return str(gray_path), str(binary_path), str(dilated_path), is_inverted
     def invert_if_black_dominates(self, binary):
         # Threshold to binary image
@@ -81,46 +81,11 @@ class ImageProcessor:
         # Save result
         return inverted, black_pixels > white_pixels
-    def remove_inner_sketch(self, input_path, output_filename="5_remove_inner_sketch.jpg"):
-        img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
-        height, width = img.shape
-        # Threshold image to binary
-        _, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
-        # Find all contours
-        contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-        # Create mask for large contours (likely panel borders)
-        mask = np.zeros_like(binary)
-        for cnt in contours:
-            area = cv2.contourArea(cnt)
-            if area >= (height * width * self.config.min_area_ratio):
-                cv2.drawContours(mask, [cnt], -1, 255, thickness=cv2.FILLED)
-        # Apply mask to original image (keeps only large borders)
-        cleaned = cv2.bitwise_and(binary, binary, mask=mask)
-        # Optional: Apply morphological opening to clean tiny sketch lines
-        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
-        cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_OPEN, kernel)
-        # Invert back if needed
-        cleaned = cv2.bitwise_not(cleaned)
-        # Save
-        output_path = f'{self.config.output_folder}/{output_filename}'
-        cv2.imwrite(output_path, cleaned)
-        print(f"✅ Remove Inner Sketch image saved to: {output_path}")
-        return str(output_path)
-    def thin_image_borders(self, processed_image_path: str, output_filename: str = "6_thin_border.jpg") -> str:
         """
         Clean dilated image by thinning thick borders and removing hanging clusters.
         """
-        from skimage.morphology import skeletonize, remove_small_objects
-        from skimage.measure import label
         # Load image
         img = cv2.imread(processed_image_path)
         # Convert to grayscale and binary
@@ -147,40 +112,517 @@ class ImageProcessor:
         result = 255 - final
         # Save
-        output_path = f'{self.config.output_folder}/{output_filename}'
         cv2.imwrite(output_path, result)
-        print(f"✅ Cleaned and thinned image saved to: {output_path}")
         return str(output_path)
-    def clean_dilated_image(self, dilated_path: str,
-                           output_filename: str = "6_dilated_cleaned.jpg",
-                           max_neighbors: int = 2) -> str:
-        """Clean dilated image by thinning thick borders."""
-        dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
-        if dilated is None:
-            raise FileNotFoundError(f"Could not load dilated image: {dilated_path}")
-        binary = (dilated == 0).astype(np.uint8)
-        padded = np.pad(binary, pad_width=1, mode="constant", constant_values=0)
-        cleaned = binary.copy()
         height, width = binary.shape
-        row_black_counts = np.sum(binary, axis=1)
-        for y in range(1, height + 1):
-            for x in range(1, width + 1):
-                if padded[y, x] == 1:
-                    neighbors = np.sum(padded[y-1:y+2, x-1:x+2]) - 1
-                    if neighbors > max_neighbors:
-                        neighbor_rows = [r for r in [y-1, y, y+1] if 1 <= r <= height]
-                        if neighbor_rows:
-                            row_to_clear = min(neighbor_rows, key=lambda r: row_black_counts[r-1])
-                            if y == row_to_clear:
-                                cleaned[y-1, x-1] = 0
-        cleaned_img = (1 - cleaned) * 255
-        output_path = f'{self.config.output_folder}/{output_filename}'
-        cv2.imwrite(str(output_path), cleaned_img)
-        print(f"✅ Cleaned dilated image saved to: {output_path}")
-        return str(output_path)

 import numpy as np
 import cv2
+from skimage.morphology import skeletonize, remove_small_objects
+from skimage.measure import label
+from skimage import measure
+from tqdm import tqdm
 class ImageProcessor:
     """Handles image preprocessing operations."""
+    def __init__(self, config: Config = None):
+        self.config = config or Config()
+        self.index = 0
+    def get_output_path(self, output_folder, file_name):
+        self.index += 1
+        return f'{output_folder}/{self.index:02d}_{file_name}'
     def mask_text_regions(self, input_path, bboxes: List[List[int]], output_filename: str = "1_text_removed.jpg", color: Tuple[int, int, int] = (0, 0, 0)) -> str:
         """Mask text regions in the image to reduce panel extraction noise."""
         image = cv2.imread(input_path)
         output_path = f'{self.config.output_folder}/{output_filename}'
         cv2.imwrite(output_path, image)
         return str(output_path)
     def preprocess_image(self, processed_image_path) -> Tuple[str, str, str]:
         # Convert to grayscale and binary
         gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
         # Apply Gaussian blur to reduce noise
         blurred = cv2.GaussianBlur(gray, (3, 3), 0)
         # Canny edge detection
         edges = cv2.Canny(blurred, threshold1=50, threshold2=150, apertureSize=3)
+        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+        dilated = cv2.dilate(edges, kernel, iterations=2)
         # Save intermediate results
+        gray_path = self.get_output_path(self.config.output_folder, "gray.jpg")
+        binary_path = self.get_output_path(self.config.output_folder, "binary.jpg")
+        dilated_path = self.get_output_path(self.config.output_folder, "dilated.jpg")
         cv2.imwrite(str(gray_path), gray)
         cv2.imwrite(str(binary_path), edges)
         cv2.imwrite(str(dilated_path), dilated)
+        return str(gray_path), str(binary_path), str(dilated_path)
     def invert_if_black_dominates(self, binary):
         # Threshold to binary image
         # Save result
         return inverted, black_pixels > white_pixels
+    def thin_image_borders(self, processed_image_path: str, file_name="thin_border.jpg", output_folder=None) -> str:
         """
         Clean dilated image by thinning thick borders and removing hanging clusters.
         """
+        output_folder = output_folder or self.config.output_folder
         # Load image
         img = cv2.imread(processed_image_path)
         # Convert to grayscale and binary
         result = 255 - final
         # Save
+        output_path = self.get_output_path(output_folder, file_name)
         cv2.imwrite(output_path, result)
         return str(output_path)
+    def remove_dangling_lines(self, image_path, file_name="dangling_lines_removed.jpg", output_folder=None):
+        output_folder = output_folder or self.config.output_folder
+        gray = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+        # Threshold to binary mask (black lines = True, white = False)
+        binary = gray < 128  # black parts (lines/dangling strokes)
+        binary = binary.astype(bool)
+        # Label connected components
+        labeled = label(binary, connectivity=2)
+        # Remove small connected components (dangling lines, fragments)
+        cleaned = remove_small_objects(labeled, min_size=500)  # Adjust min_size as needed
+        # Convert back to mask (255 = black lines kept, 255 background = white)
+        final_mask = (cleaned > 0).astype(np.uint8) * 255
+        # Invert mask to match original layout: black lines on white background
+        final_image = 255 - final_mask
+        # Save result
+        output_path = self.get_output_path(output_folder, file_name)
+        cv2.imwrite(output_path, final_image)
+        return output_path
+    def remove_diagonal_lines(self, image_path, file_name="remove_diagonal_lines.jpg", output_folder=None):
+        output_folder = output_folder or self.config.output_folder
+        # Read the image
+        img = cv2.imread(image_path)
+        # Convert to grayscale
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        # Create binary image (black lines on white background)
+        _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
+        # Create kernels for detecting horizontal and vertical lines
+        # Adjust kernel size based on your image - larger for thicker lines
+        kernel_length = max(gray.shape[0], gray.shape[1]) // 30
+        # Horizontal kernel
+        horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))
+        # Vertical kernel
+        vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))
+        # Detect horizontal lines
+        horizontal_lines = cv2.morphologyEx(binary, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
+        # Detect vertical lines
+        vertical_lines = cv2.morphologyEx(binary, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
+        # Combine horizontal and vertical lines
+        rect_lines = cv2.addWeighted(horizontal_lines, 1, vertical_lines, 1, 0)
+        # Create final result - white background with black rectangular lines only
+        result = np.ones_like(gray) * 255  # White background
+        result[rect_lines > 0] = 0  # Black lines where rectangular lines were detected
+        # Save result
+        output_path = self.get_output_path(output_folder, file_name)
+        cv2.imwrite(output_path, result)
+        return output_path
+    def thick_black(self, image_path, thickness=20, file_name="thick_black.jpg", output_folder=None):
+        output_folder = output_folder or self.config.output_folder
+        # Load image
+        img = cv2.imread(image_path)
+        # Convert to grayscale
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        # Create a binary mask where black pixels are 1 (foreground)
+        _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY_INV)
+        # Define kernel size based on desired thickness
+        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (thickness, thickness))
+        # Dilate the black areas
+        dilated = cv2.dilate(binary, kernel, iterations=1)
+        # Invert back so black is 0 again
+        # result_mask = cv2.bitwise_not(dilated)
+        # Apply mask on original image
+        result = img.copy()
+        result[np.where(dilated == 255)] = (0, 0, 0)
+        # Save result
+        output_path = self.get_output_path(output_folder, file_name)
+        cv2.imwrite(output_path, result)
+        return output_path
+    def remove_small_regions(self, image_path, file_name="remove_small_regions.jpg", output_folder=None):
+        output_folder = output_folder or self.config.output_folder
+        # Load image in grayscale
+        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+        visual = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)  # For debugging with colored rectangles
+        if img is None:
+            raise FileNotFoundError(f"Could not load image: {image_path}")
+        height_, width_ = img.shape
+        min_area = height_ * width_ * self.config.min_area_ratio
+        # Threshold: make black = foreground
+        _, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
+        # Label connected regions
+        labeled = measure.label(binary)
+        regions = measure.regionprops(labeled)
+        # Create clean mask (copy of original binary)
+        clean_mask = np.copy(binary)
+        for region in regions:
+            area = region.area
+            minr, minc, maxr, maxc = region.bbox
+            width = maxc - minc
+            height = maxr - minr
+            # Bounding box filter
+            if (width < width_ * self.config.min_width_ratio or height < height_ * self.config.min_height_ratio):
+                clean_mask[labeled == region.label] = 0  # Remove small region
+                cv2.rectangle(visual, (minc, minr), (maxc, maxr), (0, 0, 255), 2)
+                continue
+            # Crop and analyze region for line orientation
+            region_crop = binary[minr:maxr, minc:maxc]
+            edges = cv2.Canny(region_crop, 50, 150, apertureSize=3)
+            lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=30, minLineLength=10, maxLineGap=5)
+            if lines is not None:
+                for line in lines:
+                    x1, y1, x2, y2 = line[0]
+                    angle = np.abs(np.arctan2(y2 - y1, x2 - x1) * 180.0 / np.pi)
+                    length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
+                    if 80 < angle < 100:
+                        if length / height_ > self.config.min_height_ratio:
+                            break  # keep region
+                    elif angle < 10 or angle > 170:
+                        if length / width_ > self.config.min_width_ratio:
+                            break  # keep region
+                else:
+                    # If no qualifying line found, remove region
+                    clean_mask[labeled == region.label] = 0
+                    cv2.rectangle(visual, (minc, minr), (maxc, maxr), (0, 255, 255), 2)
+            else:
+                # No lines, remove region
+                clean_mask[labeled == region.label] = 0
+                cv2.rectangle(visual, (minc, minr), (maxc, maxr), (255, 0, 0), 2)
+        # Save debug visualization
+        cv2.imwrite(f"{output_folder}/debug_{file_name}", visual)
+        # Invert back to original format: black lines on white
+        cleaned = cv2.bitwise_not(clean_mask)
+        output_path = self.get_output_path(output_folder, file_name)
+        cv2.imwrite(output_path, cleaned)
+        return output_path
+    def thin_black(self, image_path, file_name="thin_black.jpg", output_folder=None):
+        output_folder = output_folder or self.config.output_folder
+        # Load the image (replace 'debug_dilated.jpg' with your actual file path if needed)
+        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+        # Check if the image loaded correctly
+        if img is None:
+            raise ValueError("Image not loaded. Check the file path.")
+        # Threshold to binary (invert if lines are black on white)
+        _, binary = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
+        # Perform thinning to reduce to 1-pixel lines
+        try:
+            # Use Zhang-Suen thinning if opencv-contrib is installed
+            thinned = cv2.ximgproc.thinning(binary)
+        except AttributeError:
+            # Fallback: Morphological skeletonization
+            skel = np.zeros(binary.shape, np.uint8)
+            element = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
+            while True:
+                eroded = cv2.erode(binary, element)
+                temp = cv2.dilate(eroded, element)
+                temp = cv2.subtract(binary, temp)
+                skel = cv2.bitwise_or(skel, temp)
+                binary = eroded.copy()
+                if cv2.countNonZero(binary) == 0:
+                    break
+            thinned = skel
+        # Invert back if needed (for white lines on black background)
+        thinned = 255 - thinned
+        # Save result
+        output_path = self.get_output_path(output_folder, file_name)
+        cv2.imwrite(output_path, thinned)
+        return output_path
+    def thin_lines_direct(self, image_path, file_name="thin_lines_direct.jpg", output_folder=None):
+        output_folder = output_folder or self.config.output_folder
+        # Read image
+        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+        if img is None:
+            raise ValueError("Could not load image")
+        # Convert to binary (0 = black lines, 255 = white background)
+        _, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
+        # Create result image (start with white background)
+        result = np.full_like(binary, 255)  # All white
         height, width = binary.shape
+        print("Processing thick lines...")
+        # Method 1: Scan rows - for each thick horizontal segment, keep only bottom pixel
+        print("Step 1: Thinning horizontal segments...")
+        for row in range(height):
+            col = 0
+            while col < width:
+                # If we hit a black pixel
+                if binary[row, col] == 0:  # Black pixel
+                    # Find the end of this horizontal segment
+                    start_col = col
+                    while col < width and binary[row, col] == 0:
+                        col += 1
+                    end_col = col - 1
+                    # For this horizontal segment, check if it's part of a thick vertical region
+                    segment_width = end_col - start_col + 1
+                    if segment_width >= 1:  # Any horizontal segment
+                        # Check how thick this region is vertically at the middle
+                        mid_col = (start_col + end_col) // 2
+                        # Find vertical thickness at this point
+                        thickness = self.get_vertical_thickness(binary, row, mid_col)
+                        if thickness > 1:
+                            # This is part of a thick region - keep only the bottom pixel
+                            bottom_row = row + thickness - 1
+                            if bottom_row < height:
+                                result[bottom_row, start_col:end_col+1] = 0  # Draw black line
+                        else:
+                            # Already thin - keep as is
+                            result[row, start_col:end_col+1] = 0
+                else:
+                    col += 1
+        # Save step 1
+        # cv2.imwrite(f'{self.config.output_folder}/step1_horizontal_thinned.png', result)
+        # Method 2: Scan columns - for each thick vertical segment, keep only right pixel
+        print("Step 2: Thinning vertical segments...")
+        # Start fresh for vertical processing
+        result_v = np.full_like(binary, 255)  # All white
+        for col in range(width):
+            row = 0
+            while row < height:
+                # If we hit a black pixel
+                if binary[row, col] == 0:  # Black pixel
+                    # Find the end of this vertical segment
+                    start_row = row
+                    while row < height and binary[row, col] == 0:
+                        row += 1
+                    end_row = row - 1
+                    segment_height = end_row - start_row + 1
+                    if segment_height >= 1:  # Any vertical segment
+                        # Check how thick this region is horizontally at the middle
+                        mid_row = (start_row + end_row) // 2
+                        # Find horizontal thickness at this point
+                        thickness = self.get_horizontal_thickness(binary, mid_row, col)
+                        if thickness > 1:
+                            # This is part of a thick region - keep only the right pixel
+                            right_col = col + thickness - 1
+                            if right_col < width:
+                                result_v[start_row:end_row+1, right_col] = 0  # Draw black line
+                        else:
+                            # Already thin - keep as is
+                            result_v[start_row:end_row+1, col] = 0
+                else:
+                    row += 1
+        # Save step 2
+        # cv2.imwrite(f'{self.config.output_folder}/step2_vertical_thinned.png', result_v)
+        # Method 3: Combine both results
+        print("Step 3: Combining results...")
+        final_result = cv2.bitwise_and(result, result_v)  # Keep both thin lines
+        output_path = self.get_output_path(output_folder, file_name)
+        cv2.imwrite(output_path, final_result)
+        return output_path
+    def get_vertical_thickness(self, binary, start_row, col):
+        """Get the vertical thickness of a black region starting from start_row, col"""
+        height = binary.shape[0]
+        thickness = 0
+        row = start_row
+        while row < height and binary[row, col] == 0:  # Black pixel
+            thickness += 1
+            row += 1
+        return thickness
+    def get_horizontal_thickness(self, binary, row, start_col):
+        """Get the horizontal thickness of a black region starting from row, start_col"""
+        width = binary.shape[1]
+        thickness = 0
+        col = start_col
+        while col < width and binary[row, col] == 0:  # Black pixel
+            thickness += 1
+            col += 1
+        return thickness
+    def remove_diagonal_only_cells(self, image_path, file_name="remove_diagonal_only_cells.jpg", output_folder=None):
+        output_folder = output_folder or self.config.output_folder
+        # Load the image in grayscale
+        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+        if img is None:
+            raise ValueError("Unable to load the image. Check the file path.")
+        # Threshold to binary (invert if lines are black on white background)
+        _, binary = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
+        # Pad image to handle border cells easily
+        padded = np.pad(binary, pad_width=1, mode='constant', constant_values=0)
+        rows, cols = binary.shape
+        output = padded.copy()
+        # Scan each cell (excluding padding)
+        for r in range(1, rows + 1):
+            for c in range(1, cols + 1):
+                if padded[r, c] == 255:  # Assuming white (255) represents active cells/lines
+                    # Get 8 neighbors
+                    neighbors = {
+                        'top_left': padded[r-1, c-1],
+                        'top': padded[r-1, c],
+                        'top_right': padded[r-1, c+1],
+                        'left': padded[r, c-1],
+                        'right': padded[r, c+1],
+                        'bottom_left': padded[r+1, c-1],
+                        'bottom': padded[r+1, c],
+                        'bottom_right': padded[r+1, c+1]
+                    }
+                    # Helper: Count active neighbors (255)
+                    active_count = sum(1 for v in neighbors.values() if v == 255)
+                    # Conditions as specified:
+                    # 1) Only top-left and bottom-right
+                    cond1 = (neighbors['top_left'] == 255 and neighbors['bottom_right'] == 255 and
+                            active_count == 2)
+                    # 2) Only top-left
+                    cond2 = (neighbors['top_left'] == 255 and active_count == 1)
+                    # 3) Only bottom-right
+                    cond3 = (neighbors['bottom_right'] == 255 and active_count == 1)
+                    # 4) Only top-right and bottom-left
+                    cond4 = (neighbors['top_right'] == 255 and neighbors['bottom_left'] == 255 and
+                            active_count == 2)
+                    # 5) Only top-right
+                    cond5 = (neighbors['top_right'] == 255 and active_count == 1)
+                    # 6) Only bottom-left
+                    cond6 = (neighbors['bottom_left'] == 255 and active_count == 1)
+                    # Remove cell if any condition matches (set to 0)
+                    if cond1 or cond2 or cond3 or cond4 or cond5 or cond6:
+                        output[r, c] = 0
+        # Remove padding and invert back to original style (black lines on white)
+        cleaned = output[1:-1, 1:-1]
+        result = cv2.bitwise_not(cleaned)
+        # Save the result
+        output_path = self.get_output_path(output_folder, file_name)
+        cv2.imwrite(output_path, result)
+        return output_path
+    def remove_small_continuity_components(self, image_path, file_name="remove_small_continuity_components.jpg", output_folder=None):
+        output_folder = output_folder or self.config.output_folder
+        # Load the image in grayscale
+        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+        if img is None:
+            raise ValueError("Unable to load the image. Check the file path.")
+        height, width = img.shape
+        continuity_threshold = height * self.config.min_height_ratio
+        # Threshold to binary (invert if lines are black on white background)
+        _, binary = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
+        # Perform connected component labeling (8-connectivity)
+        num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(binary, connectivity=8)
+        # Create a copy for output
+        output = binary.copy()
+        # Iterate over components (skip label 0, which is background)
+        for label in tqdm(range(1, num_labels), desc="Processing labels"):
+            # Get the size (area) of the component
+            size = stats[label, cv2.CC_STAT_AREA]
+            # If size is below threshold, remove the component (set to 0)
+            if size < continuity_threshold:
+                output[labels == label] = 0
+        # Invert back to original style (black lines on white)
+        result = cv2.bitwise_not(output)
+        # Save the result
+        output_path = self.get_output_path(output_folder, file_name)
+        cv2.imwrite(output_path, result)
+        return output_path
+    def connect_horizontal_vertical_gaps(self, image_path, file_name='connected_output.jpg', output_folder=None):
+        output_folder = output_folder or self.config.output_folder
+        # Load the image in grayscale
+        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+        if img is None:
+            raise ValueError("Unable to load the image. Check the file path.")
+        height, width = img.shape
+        # Threshold to binary (invert if lines are black on white background)
+        _, binary = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
+        rows, cols = binary.shape
+        canvas = binary.copy()  # Work on a copy (lines=255 on black)
+        gap_threshold = width * self.config.min_width_ratio
+        # Scan row by row to connect small horizontal gaps
+        for r in range(rows):
+            col = 0
+            while col < cols:
+                if canvas[r, col] == 255:
+                    # Find start and end of current segment
+                    start = col
+                    while col < cols and canvas[r, col] == 255:
+                        col += 1
+                    end = col - 1
+                    # Look for next segment in the same row
+                    next_start = col
+                    while next_start < cols and canvas[r, next_start] == 0:
+                        next_start += 1
+                    if next_start < cols:
+                        gap = next_start - end - 1
+                        if gap >= 0 and gap <= gap_threshold:
+                            # Fill the gap
+                            for fill_col in range(end + 1, next_start):
+                                canvas[r, fill_col] = 255
+                            col = next_start  # Jump to next segment
+                        else:
+                            col = next_start
+                    else:
+                        col = next_start
+                else:
+                    col += 1
+        gap_threshold = height * self.config.min_height_ratio
+        # Scan column by column to connect small vertical gaps
+        for c in range(cols):
+            row = 0
+            while row < rows:
+                if canvas[row, c] == 255:
+                    # Find start and end of current segment
+                    start = row
+                    while row < rows and canvas[row, c] == 255:
+                        row += 1
+                    end = row - 1
+                    # Look for next segment in the same column
+                    next_start = row
+                    while next_start < rows and canvas[next_start, c] == 0:
+                        next_start += 1
+                    if next_start < rows:
+                        gap = next_start - end - 1
+                        if gap >= 0 and gap <= gap_threshold:
+                            # Fill the gap
+                            for fill_row in range(end + 1, next_start):
+                                canvas[fill_row, c] = 255
+                            row = next_start  # Jump to next segment
+                        else:
+                            row = next_start
+                    else:
+                        row = next_start
+                else:
+                    row += 1
+        # Invert back to original style (black lines on white)
+        result = cv2.bitwise_not(canvas)
+        # Save the result
+        output_path = self.get_output_path(output_folder, file_name)
+        cv2.imwrite(output_path, result)
+        return output_path

comic_panel_extractor/main.py CHANGED Viewed

@@ -1,9 +1,9 @@
-from .text_detector import TextDetector
 from .config import Config
 from .image_processor import ImageProcessor
 from .panel_extractor import PanelData
 from .panel_extractor import PanelExtractor
-from .panel_segmentation import main as main_panel_segmentation
 from typing import List, Tuple
 from pathlib import Path
@@ -28,41 +28,37 @@ class ComicPanelExtractor:
         """Complete pipeline to extract panels from a comic image."""
         print(f"Starting panel extraction for: {self.config.input_path}")
-        processed_image_path = main_panel_segmentation(self.config.output_folder, self.config.input_path, self.config.input_path)
         self.config.black_overlay_input_path = processed_image_path
-        # Step 1: Detect and mask text regions
-        # text_bubbles = self._detect_text_bubbles()
-        # processed_image_path = self.image_processor.mask_text_regions(processed_image_path, [bubble["bbox"] for bubble in text_bubbles])
-        # Step 2: Preprocess image
-        _, _, processed_image_path, is_inverted = self.image_processor.preprocess_image(processed_image_path)
-        if is_inverted:
-            # Step 3: Remove Inner Sketch
-            processed_image_path = self.image_processor.remove_inner_sketch(processed_image_path)
-        # Step 4: Thin border line
         processed_image_path = self.image_processor.thin_image_borders(processed_image_path)
-        # Step 3: Clean dilated image
-        # processed_image_path = self.image_processor.clean_dilated_image(processed_image_path)
-        # Step 5: Extract panels
         panel_images, panel_data, all_panel_path = self.panel_extractor.extract_panels(
             processed_image_path
         )
         return panel_images, panel_data, all_panel_path
-    def _detect_text_bubbles(self) -> List[dict]:
-        """Detect text bubbles in the comic image."""
-        with TextDetector(self.config) as text_detector:
-            bubbles_path = text_detector.detect_and_group_text()
-        with open(bubbles_path, "r", encoding="utf-8") as f:
-            return json.load(f)
     def cleanup(self):
         """Clean up temporary files if needed."""
         # Add cleanup logic here if needed

+# from .text_detector import TextDetector
 from .config import Config
 from .image_processor import ImageProcessor
 from .panel_extractor import PanelData
 from .panel_extractor import PanelExtractor
+from .panel_segmentation import main as basic_panel_segmentation
 from typing import List, Tuple
 from pathlib import Path
         """Complete pipeline to extract panels from a comic image."""
         print(f"Starting panel extraction for: {self.config.input_path}")
+        processed_image_path = basic_panel_segmentation(self.config.output_folder, self.config.input_path, self.config.input_path)
         self.config.black_overlay_input_path = processed_image_path
+        _, _, processed_image_path = self.image_processor.preprocess_image(processed_image_path)
+        processed_image_path = self.image_processor.thin_image_borders(processed_image_path)
+        processed_image_path = self.image_processor.remove_dangling_lines(processed_image_path)
+        processed_image_path = self.image_processor.remove_diagonal_only_cells(processed_image_path)
+        processed_image_path = self.image_processor.remove_small_continuity_components(processed_image_path)
+        processed_image_path = self.image_processor.thick_black(processed_image_path)
+        processed_image_path = self.image_processor.remove_small_regions(processed_image_path)
+        processed_image_path = self.image_processor.remove_diagonal_lines(processed_image_path)
+        processed_image_path = self.image_processor.remove_small_regions(processed_image_path)
+        processed_image_path = self.image_processor.connect_horizontal_vertical_gaps(processed_image_path)
         processed_image_path = self.image_processor.thin_image_borders(processed_image_path)
         panel_images, panel_data, all_panel_path = self.panel_extractor.extract_panels(
             processed_image_path
         )
         return panel_images, panel_data, all_panel_path
     def cleanup(self):
         """Clean up temporary files if needed."""
         # Add cleanup logic here if needed

comic_panel_extractor/panel_extractor.py CHANGED Viewed

@@ -5,6 +5,7 @@ import numpy as np
 import cv2
 from dataclasses import dataclass
 import os
 @dataclass
 class PanelData:
@@ -80,25 +81,23 @@ class PanelExtractor:
         # Forcefully include first and last row
         if 0 not in black_rows:
             black_rows.insert(0, 0)
-        if (height - 1) not in black_rows:
-            black_rows.append(height - 1)
         # Group consecutive rows into gutters
         row_gutters = []
         if black_rows:
             start_row = black_rows[0]
-            prev_row = black_rows[0]
-            for y in black_rows:
-                if y != start_row:
-                    # Only extend if combined height meets min_height_ratio
-                    combined_height = y - start_row + 1
-                    if combined_height / height >= self.config.min_height_ratio:
-                        prev_row = y
-                        row_gutters.append((start_row, prev_row))
-                        start_row = y
-            if start_row != prev_row:
-                row_gutters.append((start_row, prev_row))  # Add last gutter
         print(f"✅ Detected panel row gutters: {row_gutters}")
@@ -236,6 +235,35 @@ class PanelExtractor:
             if fname.startswith("panel_") and os.path.isfile(os.path.join(folder_path, fname))
         ])
     def _save_panels(self, panels: List[Tuple[int, int, int, int]], original: np.ndarray, width: int, height: int) -> Tuple[List[np.ndarray], List[PanelData], List[str]]:
         """Save panel images and return panel data."""
         visual_output = original.copy()
@@ -247,32 +275,46 @@ class PanelExtractor:
         black_overlay_input = cv2.imread(self.config.black_overlay_input_path)
         image_area = width * height
-        maybe_full_page_panel = None  # Store panel that is ≥90% of the page
         for idx, (x1, y1, x2, y2) in enumerate(panels, 1):
             # Extract panel image from black_overlay_input
             panel_img = black_overlay_input[y1:y2, x1:x2]
-            # Check for mostly black content
             gray = cv2.cvtColor(panel_img, cv2.COLOR_BGR2GRAY)
-            black_pixels = np.sum(gray < 30)
             total_pixels = gray.size
             black_ratio = black_pixels / total_pixels
             if black_ratio > 0.8:
                 print(f"⚠️ Skipping panel #{idx} — {round(black_ratio * 100, 2)}% black")
                 continue
             else:
-                print(f"✅ Black ratio panel #{idx} — {round(black_ratio * 100, 2)}% black")
-            # Check if this panel is ≥90% of the full image
             panel_area = (x2 - x1) * (y2 - y1)
             if panel_area >= 0.9 * image_area:
                 print(f"⚠️ Panel #{idx} covers ≥90% of the image — marked for potential use only")
                 maybe_full_page_panel = (idx, (x1, y1, x2, y2))
-                continue  # Skip for now
-            # Save valid smaller panel
             panel_img = visual_output[y1:y2, x1:x2]
             panel_images.append(panel_img)
             panel_info = PanelData.from_coordinates(x1, y1, x2, y2)
@@ -285,9 +327,9 @@ class PanelExtractor:
             cv2.rectangle(visual_output, (x1, y1), (x2, y2), (0, 255, 0), 2)
             cv2.putText(visual_output, f"#{idx}", (x1+5, y1+25),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
-        # If no valid panels were saved, and we had a full-page one, save it
         if not panel_images and maybe_full_page_panel and panel_idx == 0:
             idx, (x1, y1, x2, y2) = maybe_full_page_panel
             panel_img = visual_output[y1:y2, x1:x2]
@@ -302,7 +344,7 @@ class PanelExtractor:
             cv2.rectangle(visual_output, (x1, y1), (x2, y2), (255, 0, 0), 2)
             cv2.putText(visual_output, f"#full", (x1+5, y1+25),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
             print(f"✅ Saved full-page panel as fallback")
         # Save final visualization
@@ -311,3 +353,4 @@ class PanelExtractor:
         print(f"✅ Extracted {len(panel_images)} panels after filtering.")
         return panel_images, panel_data, all_panel_path

 import cv2
 from dataclasses import dataclass
 import os
+import re
 @dataclass
 class PanelData:
         # Forcefully include first and last row
         if 0 not in black_rows:
             black_rows.insert(0, 0)
+        if (height) not in black_rows:
+                black_rows.append(height)
+        print(f'📄 Row Points:: {black_rows}')
         # Group consecutive rows into gutters
         row_gutters = []
         if black_rows:
             start_row = black_rows[0]
+            for i, end_row in enumerate(black_rows):
+                # Only extend if combined height meets min_height_ratio
+                combined_height = end_row - start_row
+                if combined_height / height >= self.config.min_height_ratio:
+                    print(f'📄 {i+1}) Start: {start_row:04d} | End: {end_row:04d} | Total: {combined_height:04d} | Ratio: {(combined_height / height):04f}')
+                    row_gutters.append((start_row, end_row))
+                    start_row = end_row
+                elif len(black_rows) == i + 1:
+                    row_gutters[-1] = (row_gutters[-1][0], end_row)
         print(f"✅ Detected panel row gutters: {row_gutters}")
             if fname.startswith("panel_") and os.path.isfile(os.path.join(folder_path, fname))
         ])
+    def load_existing_panels_from_folder(self, folder: str) -> List[Tuple[int, int, int, int]]:
+        """
+        Parses filenames like 'panel_1_(1006, 176, 1757, 1085).jpg' and extracts coordinates.
+        """
+        pattern = re.compile(r"panel_\d+_\((\d+), (\d+), (\d+), (\d+)\)\.jpg")
+        coords = []
+        for fname in os.listdir(folder):
+            match = pattern.match(fname)
+            if match:
+                coords.append(tuple(map(int, match.groups())))
+        return coords
+    def is_fully_contained(self, x1: int, y1: int, x2: int, y2: int,
+                       boxes: List[Tuple[int, int, int, int]],
+                       threshold: int = 200) -> bool:
+        for ex1, ey1, ex2, ey2 in boxes:
+            # Case 1: Fully contained
+            if x1 >= ex1 and y1 >= ey1 and x2 <= ex2 and y2 <= ey2:
+                return True
+            # Case 2: Near containment (within threshold)
+            if (
+                x1 >= ex1 - threshold and y1 >= ey1 - threshold and
+                x2 <= ex2 + threshold and y2 <= ey2 + threshold
+            ):
+                return True
+        return False
     def _save_panels(self, panels: List[Tuple[int, int, int, int]], original: np.ndarray, width: int, height: int) -> Tuple[List[np.ndarray], List[PanelData], List[str]]:
         """Save panel images and return panel data."""
         visual_output = original.copy()
         black_overlay_input = cv2.imread(self.config.black_overlay_input_path)
         image_area = width * height
+        maybe_full_page_panel = None
+        # Load existing panels from disk
+        existing_coords = self.load_existing_panels_from_folder(self.config.output_folder)
         for idx, (x1, y1, x2, y2) in enumerate(panels, 1):
             # Extract panel image from black_overlay_input
             panel_img = black_overlay_input[y1:y2, x1:x2]
+            # Check for mostly black/white
             gray = cv2.cvtColor(panel_img, cv2.COLOR_BGR2GRAY)
             total_pixels = gray.size
+            black_pixels = np.sum(gray < 30)
+            white_pixels = np.sum(gray > 240)
             black_ratio = black_pixels / total_pixels
+            white_ratio = white_pixels / total_pixels
             if black_ratio > 0.8:
                 print(f"⚠️ Skipping panel #{idx} — {round(black_ratio * 100, 2)}% black")
                 continue
+            elif white_ratio > 0.9:
+                print(f"⚠️ Skipping panel #{idx} — {round(white_ratio * 100, 2)}% white")
+                continue
             else:
+                print(f"✅ Panel #{idx} — {round(black_ratio * 100, 2)}% black, {round(white_ratio * 100, 2)}% white")
             panel_area = (x2 - x1) * (y2 - y1)
             if panel_area >= 0.9 * image_area:
                 print(f"⚠️ Panel #{idx} covers ≥90% of the image — marked for potential use only")
                 maybe_full_page_panel = (idx, (x1, y1, x2, y2))
+                continue
+            # Check for full containment in existing and current session
+            already_saved_coords = existing_coords + [ (pd.x_start, pd.y_start, pd.x_end, pd.y_end) for pd in panel_data ]
+            if self.is_fully_contained(x1, y1, x2, y2, already_saved_coords):
+                print(f"⚠️ Skipping panel #{idx} — fully contained in existing panel")
+                continue
+            # Save panel
             panel_img = visual_output[y1:y2, x1:x2]
             panel_images.append(panel_img)
             panel_info = PanelData.from_coordinates(x1, y1, x2, y2)
             cv2.rectangle(visual_output, (x1, y1), (x2, y2), (0, 255, 0), 2)
             cv2.putText(visual_output, f"#{idx}", (x1+5, y1+25),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
+        # If no valid panels and full-page backup exists
         if not panel_images and maybe_full_page_panel and panel_idx == 0:
             idx, (x1, y1, x2, y2) = maybe_full_page_panel
             panel_img = visual_output[y1:y2, x1:x2]
             cv2.rectangle(visual_output, (x1, y1), (x2, y2), (255, 0, 0), 2)
             cv2.putText(visual_output, f"#full", (x1+5, y1+25),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
             print(f"✅ Saved full-page panel as fallback")
         # Save final visualization
         print(f"✅ Extracted {len(panel_images)} panels after filtering.")
         return panel_images, panel_data, all_panel_path

comic_panel_extractor/panel_segmentation.py CHANGED Viewed

@@ -8,13 +8,16 @@ from skimage import measure
 from scipy import ndimage as ndi
 import re
 from skimage.morphology import remove_small_holes
 def extract_fully_white_panels(
     original_image: np.ndarray,
     segmentation_mask: np.ndarray,
     output_dir: str = "panel_output",
-    debug_region_dir: str = "panel_debug_regions",
     min_area_ratio: float = 0.05,
     min_width_ratio: float = 0.05,
     min_height_ratio: float = 0.05,
@@ -71,9 +74,9 @@ def extract_fully_white_panels(
             w < min_width_ratio * img_w or
             h < min_height_ratio * img_h
         ):
-            if save_debug:
-                cropped_img.save(os.path.join(debug_region_dir, f"{crop_name_prefix}_too_small_orig.jpg"))
-                mask_pil.save(os.path.join(debug_region_dir, f"{crop_name_prefix}_too_small_mask.jpg"))
             continue
         # 2. Check if region is mostly white (allow small % of black)
@@ -81,7 +84,7 @@ def extract_fully_white_panels(
         total_pixels = region.image.size
         black_ratio = black_pixel_count / total_pixels
-        if black_ratio > 0.05:  # Allow up to 1% black pixels
             print(f"❌ Black ratio panel #{idx} — {round(black_ratio * 100, 2)}% black")
             # Save debug info if desired
             if save_debug:
@@ -126,6 +129,65 @@ def extract_fully_white_panels(
     return saved_panels
 def create_segmentation_mask(image: np.ndarray, save_debug: bool = True) -> np.ndarray:
     """
@@ -139,35 +201,102 @@ def create_segmentation_mask(image: np.ndarray, save_debug: bool = True) -> np.n
         Binary segmentation mask
     """
     if save_debug:
-        os.makedirs("panel_debug_steps", exist_ok=True)
-        Image.fromarray(image).save("panel_debug_steps/step1_original.jpg")
     # Convert to grayscale
     grayscale = rgb2gray(image)
     if save_debug:
         gray_uint8 = (grayscale * 255).astype('uint8')
         # Fix for Pillow warning: Remove mode parameter
-        Image.fromarray(gray_uint8).save("panel_debug_steps/step2_grayscale.jpg")
     # Edge detection
     edges = canny(grayscale)
     if save_debug:
-        edges_uint8 = (edges * 255).astype('uint8')
-        # Fix for Pillow warning: Remove mode parameter
-        Image.fromarray(edges_uint8).save("panel_debug_steps/step3_edges.jpg")
     # Fill holes in edges
-    segmentation = ndi.binary_fill_holes(edges)
     # ✅ Remove small black clusters (holes in white regions)
     segmentation_cleaned = remove_small_holes(segmentation, area_threshold=500)  # adjust threshold as needed
     if save_debug:
         segmentation_uint8 = (segmentation_cleaned * 255).astype('uint8')
-        Image.fromarray(segmentation_uint8).save("panel_debug_steps/step4_segmentation_filled.jpg")
     return segmentation_cleaned
 def create_image_with_panels_removed(
     original_image: np.ndarray,
@@ -189,7 +318,7 @@ def create_image_with_panels_removed(
         original_image=original_image,
         segmentation_mask=segmentation_mask,
         output_dir=output_folder,
-        debug_region_dir="panel_debug_regions",
         save_debug=save_debug
     )
@@ -198,17 +327,18 @@ def create_image_with_panels_removed(
     draw = ImageDraw.Draw(im_no_panels)
     # Get regions and black them out
-    labeled_mask = measure.label(segmentation_mask)
-    regions = measure.regionprops(labeled_mask)
-    pattern = re.compile(r"panel_\d+_\((\d+), (\d+), (\d+), (\d+)\)\.jpg")
-    for panel_path in saved_panels:
-        # Extract panel index from filename with bbox format
-        panel_name = os.path.basename(panel_path)
-        match = pattern.match(panel_name)
-        minc, minr, maxc, maxr = map(int, match.groups())
-        draw.rectangle([minc, minr, maxc, maxr], fill=(0, 0, 0))
     # Save the result
     im_no_panels.save(output_path)
@@ -219,11 +349,31 @@ def main(output_folder, input_image_path, original_image_path):
     # Load the input image
     image = imageio.imread(input_image_path)
     original_image = imageio.imread(original_image_path)
-    save_debug = False
     # Create segmentation mask
     segmentation_mask = create_segmentation_mask(image, save_debug=save_debug)
-    pre_process_path = f"{output_folder}/original_with_panels_removed.jpg"
     # Create image with panels removed
     create_image_with_panels_removed(
         original_image=original_image,

 from scipy import ndimage as ndi
 import re
 from skimage.morphology import remove_small_holes
+from .image_processor import ImageProcessor
+import cv2
+pattern = re.compile(r"panel_\d+_\((\d+), (\d+), (\d+), (\d+)\)\.jpg")
 def extract_fully_white_panels(
     original_image: np.ndarray,
     segmentation_mask: np.ndarray,
     output_dir: str = "panel_output",
+    debug_region_dir: str = "temp_dir/panel_debug_regions",
     min_area_ratio: float = 0.05,
     min_width_ratio: float = 0.05,
     min_height_ratio: float = 0.05,
             w < min_width_ratio * img_w or
             h < min_height_ratio * img_h
         ):
+            # if save_debug:
+            #     cropped_img.save(os.path.join(debug_region_dir, f"{crop_name_prefix}_too_small_orig.jpg"))
+            #     mask_pil.save(os.path.join(debug_region_dir, f"{crop_name_prefix}_too_small_mask.jpg"))
             continue
         # 2. Check if region is mostly white (allow small % of black)
         total_pixels = region.image.size
         black_ratio = black_pixel_count / total_pixels
+        if black_ratio > 0.1:  # Allow up to 1% black pixels
             print(f"❌ Black ratio panel #{idx} — {round(black_ratio * 100, 2)}% black")
             # Save debug info if desired
             if save_debug:
     return saved_panels
+def get_region_count(binary_seg):
+    labeled_mask = measure.label(binary_seg)
+    regions = measure.regionprops(labeled_mask)
+    img_h, img_w = binary_seg.shape
+    image_area = img_h * img_w
+    count = 0
+    for idx, region in enumerate(regions):
+        minr, minc, maxr, maxc = region.bbox
+        w = maxc - minc
+        h = maxr - minr
+        area = w * h
+        if (
+            area < 0.05 * image_area or
+            w < 0.05 * img_w or
+            h < 0.05 * img_h
+        ):
+            continue
+        count += 1
+    return count
+def get_black_white_ratio(image_path, threshold=128):
+    """
+    Calculates the ratio of black and white pixels in a binary image.
+    Parameters:
+        image_path (str): Path to the image file.
+        threshold (int): Threshold value for binarization (default: 128).
+    Returns:
+        dict: Dictionary with black_ratio, white_ratio, black_count, white_count, total_pixels.
+    """
+    # Load image in grayscale
+    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+    if img is None:
+        raise FileNotFoundError(f"Image not found: {image_path}")
+    # Convert to binary using the given threshold
+    _, binary = cv2.threshold(img, threshold, 255, cv2.THRESH_BINARY)
+    total_pixels = binary.size
+    white_count = np.count_nonzero(binary == 255)
+    black_count = total_pixels - white_count
+    black_ratio = black_count / total_pixels
+    white_ratio = white_count / total_pixels
+    return {
+        "black_ratio": black_ratio,
+        "white_ratio": white_ratio,
+        "black_count": black_count,
+        "white_count": white_count,
+        "total_pixels": total_pixels
+    }
 def create_segmentation_mask(image: np.ndarray, save_debug: bool = True) -> np.ndarray:
     """
         Binary segmentation mask
     """
     if save_debug:
+        os.makedirs("temp_dir/panel_debug_steps", exist_ok=True)
+        Image.fromarray(image).save("temp_dir/panel_debug_steps/step1_original.jpg")
     # Convert to grayscale
     grayscale = rgb2gray(image)
     if save_debug:
         gray_uint8 = (grayscale * 255).astype('uint8')
         # Fix for Pillow warning: Remove mode parameter
+        Image.fromarray(gray_uint8).save("temp_dir/panel_debug_steps/step2_grayscale.jpg")
     # Edge detection
     edges = canny(grayscale)
+    edges_uint8 = (edges * 255).astype('uint8')
     if save_debug:
+        Image.fromarray(edges_uint8).save("temp_dir/panel_debug_steps/step3_edges.jpg")
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+    seg = cv2.dilate(edges_uint8, kernel, iterations=2)
+    seg = cv2.ximgproc.thinning(seg)
     # Fill holes in edges
+    segmentation = ndi.binary_fill_holes(seg)
+    # Ensure it's a NumPy boolean or 0/1 array
+    binary_seg = segmentation.astype(np.uint8)
+    # Count white and black pixels
+    total_pixels = binary_seg.size
+    white_pixels = np.count_nonzero(binary_seg)  # 1s
+    # Ratios
+    white_ratio = white_pixels / total_pixels
+    region_count = get_region_count(binary_seg)
+    if white_ratio > 0.8 or region_count == 1:
+        print(f"⚠️ white is maximum hence reverting to only binary_fill_holes")
+        # Fill holes in edges
+        segmentation = ndi.binary_fill_holes(edges)
     # ✅ Remove small black clusters (holes in white regions)
     segmentation_cleaned = remove_small_holes(segmentation, area_threshold=500)  # adjust threshold as needed
     if save_debug:
         segmentation_uint8 = (segmentation_cleaned * 255).astype('uint8')
+        Image.fromarray(segmentation_uint8).save("temp_dir/panel_debug_steps/step4_segmentation_filled.jpg")
     return segmentation_cleaned
+def boxes_are_close(box1, box2, thresh):
+    # Horizontal overlap or near
+    horiz_close = (box1[2] >= box2[0] - thresh and box1[0] <= box2[2] + thresh)
+    # Vertical overlap or near
+    vert_close = (box1[3] >= box2[1] - thresh and box1[1] <= box2[3] + thresh)
+    return horiz_close and vert_close
+def merge_close_panels(saved_panels, draw, distance_thresh=20):
+    """Merge panels with close bounding boxes and fill them on draw object."""
+    # Step 1: Extract bounding boxes
+    boxes = []
+    for panel_path in saved_panels:
+        panel_name = os.path.basename(panel_path)
+        match = pattern.match(panel_name)
+        if match:
+            minc, minr, maxc, maxr = map(int, match.groups())
+            boxes.append([minc, minr, maxc, maxr])
+    # Step 2: Merge nearby boxes
+    merged = []
+    used = [False] * len(boxes)
+    for i in range(len(boxes)):
+        if used[i]:
+            continue
+        box1 = boxes[i]
+        merged_box = box1.copy()
+        for j in range(i + 1, len(boxes)):
+            if used[j]:
+                continue
+            box2 = boxes[j]
+            # Check if boxes are close (horizontal and vertical)
+            if boxes_are_close(box1, box2, distance_thresh):
+                # Merge boxes
+                merged_box = [
+                    min(merged_box[0], box2[0]),
+                    min(merged_box[1], box2[1]),
+                    max(merged_box[2], box2[2]),
+                    max(merged_box[3], box2[3])
+                ]
+                used[j] = True
+        used[i] = True
+        merged.append(merged_box)
+    # Step 3: Fill merged boxes
+    for box in merged:
+        draw.rectangle(box, fill=(0, 0, 0))
 def create_image_with_panels_removed(
     original_image: np.ndarray,
         original_image=original_image,
         segmentation_mask=segmentation_mask,
         output_dir=output_folder,
+        debug_region_dir="temp_dir/panel_debug_regions",
         save_debug=save_debug
     )
     draw = ImageDraw.Draw(im_no_panels)
     # Get regions and black them out
+    # labeled_mask = measure.label(segmentation_mask)
+    # regions = measure.regionprops(labeled_mask)
+    # for panel_path in saved_panels:
+    #     # Extract panel index from filename with bbox format
+    #     panel_name = os.path.basename(panel_path)
+    #     match = pattern.match(panel_name)
+    #     minc, minr, maxc, maxr = map(int, match.groups())
+    #     draw.rectangle([minc, minr, maxc, maxr], fill=(0, 0, 0))
+    merge_close_panels(saved_panels, draw, distance_thresh=25)
     # Save the result
     im_no_panels.save(output_path)
     # Load the input image
     image = imageio.imread(input_image_path)
     original_image = imageio.imread(original_image_path)
+    save_debug = True
     # Create segmentation mask
     segmentation_mask = create_segmentation_mask(image, save_debug=save_debug)
+    segmentation_mask_output_path = f"temp_dir/panel_debug_steps/step4_segmentation_filled.jpg"
+    pixel_ratios = get_black_white_ratio(segmentation_mask_output_path)
+    if pixel_ratios['black_ratio'] < 0.8:
+        print(f"✅ black is less hence applying other features")
+        image_pros = ImageProcessor()
+        new_path = image_pros.thick_black(segmentation_mask_output_path, file_name="step5_thick.jpg", output_folder="temp_dir/panel_debug_steps")
+        new_path = image_pros.connect_horizontal_vertical_gaps(new_path, file_name="step6_continuity.jpg", output_folder="temp_dir/panel_debug_steps")
+        pixel_ratios = get_black_white_ratio(new_path)
+        if pixel_ratios['black_ratio'] < 0.8:
+            new_path = image_pros.thin_image_borders(new_path, file_name="step7_thin.jpg", output_folder="temp_dir/panel_debug_steps")
+            new_path = image_pros.remove_dangling_lines(new_path, file_name="step8_remove_dangling_lines.jpg", output_folder="temp_dir/panel_debug_steps")
+            new_path = image_pros.thick_black(new_path, file_name="step9_thick.jpg", output_folder="temp_dir/panel_debug_steps")
+            segmentation_mask = cv2.imread(new_path, cv2.IMREAD_GRAYSCALE)
+    pre_process_path = f"{output_folder}/00_original_with_panels_removed.jpg"
     # Create image with panels removed
     create_image_with_panels_removed(
         original_image=original_image,

requirements.txt CHANGED Viewed

@@ -6,4 +6,5 @@ fastapi
 uvicorn
 python-multipart
 jinja2
-scikit-image

 uvicorn
 python-multipart
 jinja2
+scikit-image
+imagehash