Spaces:

jebin2
/

comic-panel-extractor

Running

File size: 5,669 Bytes

from typing import List, Tuple
from pathlib import Path
from .config import Config

import numpy as np
import cv2

class ImageProcessor:
    """Handles image preprocessing operations."""
    
    def __init__(self, config: Config):
        self.config = config
    
    def mask_text_regions(self, bboxes: List[List[int]], output_filename: str = "1_text_removed.jpg", color: Tuple[int, int, int] = (0, 0, 0)) -> str:
        """Mask text regions in the image to reduce panel extraction noise."""
        image = cv2.imread(self.config.input_path)
        if image is None:
            raise FileNotFoundError(f"Could not load image: {self.config.input_path}")

        for bbox in bboxes:
            x1, y1, x2, y2 = bbox
            cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness=-1)

        output_path = f'{self.config.output_folder}/{output_filename}'
        cv2.imwrite(output_path, image)
        print(f"✅ Text-masked image saved to: {output_path}")
        return str(output_path)
    
    def preprocess_image(self, masked_image_path) -> Tuple[str, str, str]:
        """Preprocess image for panel extraction."""
        image = cv2.imread(masked_image_path)
        if image is None:
            raise FileNotFoundError(f"Could not load image: {masked_image_path}")

        # Convert to grayscale and binary
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
        binary, is_inverted = self.invert_if_black_dominates(binary)

        if not is_inverted:
            # Dilate to strengthen borders
            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
            dilated = cv2.dilate(binary, kernel, iterations=2)
        else: dilated = binary

        # Save intermediate results
        gray_path = f'{self.config.output_folder}/2_gray.jpg'
        binary_path = f'{self.config.output_folder}/3_binary.jpg'
        dilated_path = f'{self.config.output_folder}/4_dilated.jpg'
        
        cv2.imwrite(str(gray_path), gray)
        cv2.imwrite(str(binary_path), binary)
        cv2.imwrite(str(dilated_path), dilated)
        
        return str(gray_path), str(binary_path), str(dilated_path)

    def invert_if_black_dominates(self, binary):
        # Threshold to binary image
        _, binary = cv2.threshold(binary, 127, 255, cv2.THRESH_BINARY)

        # Count black and white pixels
        black_pixels = np.sum(binary == 0)
        white_pixels = np.sum(binary == 255)

        print(f"Black pixels: {black_pixels}, White pixels: {white_pixels}")

        # If black dominates, invert
        if black_pixels > white_pixels:
            print("🔄 Inverting image because black > white")
            inverted = cv2.bitwise_not(binary)
        else:
            print("✅ No inversion needed, white >= black")
            inverted = binary

        # Save result
        return inverted, black_pixels > white_pixels

    def thin_image_borders(self, processed_image_path: str, output_filename: str = "5_thin_border.jpg") -> str:
        """
        Clean dilated image by thinning thick borders and removing hanging clusters.
        """
        from skimage.morphology import skeletonize, remove_small_objects
        from skimage.measure import label

        # Load image
        img = cv2.imread(processed_image_path, cv2.IMREAD_GRAYSCALE)
        _, binary = cv2.threshold(img, 128, 1, cv2.THRESH_BINARY_INV)  # invert, binary mask (0,1)

        # Skeletonize
        skeleton = skeletonize(binary).astype(np.uint8)

        # Remove small hanging clusters
        labeled = label(skeleton, connectivity=2)
        cleaned = remove_small_objects(labeled, min_size=150)  # Adjust min_size for more/less pruning

        # Convert back to 0–255 uint8 image
        final = (cleaned > 0).astype(np.uint8) * 255

        # Invert back if needed
        result = 255 - final

        # Save
        output_path = f'{self.config.output_folder}/{output_filename}'
        cv2.imwrite(output_path, result)
        print(f"✅ Cleaned and thinned image saved to: {output_path}")
        return str(output_path)

    
    def clean_dilated_image(self, dilated_path: str, 
                           output_filename: str = "6_dilated_cleaned.jpg",
                           max_neighbors: int = 2) -> str:
        """Clean dilated image by thinning thick borders."""
        dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
        if dilated is None:
            raise FileNotFoundError(f"Could not load dilated image: {dilated_path}")

        binary = (dilated == 0).astype(np.uint8)
        padded = np.pad(binary, pad_width=1, mode="constant", constant_values=0)
        cleaned = binary.copy()

        height, width = binary.shape
        row_black_counts = np.sum(binary, axis=1)

        for y in range(1, height + 1):
            for x in range(1, width + 1):
                if padded[y, x] == 1:
                    neighbors = np.sum(padded[y-1:y+2, x-1:x+2]) - 1
                    if neighbors > max_neighbors:
                        neighbor_rows = [r for r in [y-1, y, y+1] if 1 <= r <= height]
                        if neighbor_rows:
                            row_to_clear = min(neighbor_rows, key=lambda r: row_black_counts[r-1])
                            if y == row_to_clear:
                                cleaned[y-1, x-1] = 0

        cleaned_img = (1 - cleaned) * 255
        output_path = f'{self.config.output_folder}/{output_filename}'
        cv2.imwrite(str(output_path), cleaned_img)
        print(f"✅ Cleaned dilated image saved to: {output_path}")
        return str(output_path)