Spaces:

jebin2
/

comic-panel-extractor

Running

App Files Files Community

jebin2 commited on Jul 28

Commit

f0c23ec

1 Parent(s): d00f531

easy panel ext added

Browse files

Files changed (5) hide show

comic_panel_extractor/config.py +1 -0
comic_panel_extractor/image_processor.py +8 -7
comic_panel_extractor/main.py +8 -4
comic_panel_extractor/panel_extractor.py +72 -22
comic_panel_extractor/panel_segmentation.py +240 -0

comic_panel_extractor/config.py CHANGED Viewed

@@ -4,6 +4,7 @@ from dataclasses import dataclass
 class Config:
     """Configuration settings for the comic-to-video pipeline."""
     input_path: str = ""
     output_folder: str = "temp_dir"
     distance_threshold: int = 70
     vertical_threshold: int = 30

 class Config:
     """Configuration settings for the comic-to-video pipeline."""
     input_path: str = ""
+    black_overlay_input_path: str = ""
     output_folder: str = "temp_dir"
     distance_threshold: int = 70
     vertical_threshold: int = 30

comic_panel_extractor/image_processor.py CHANGED Viewed

@@ -11,11 +11,11 @@ class ImageProcessor:
     def __init__(self, config: Config):
         self.config = config
-    def mask_text_regions(self, bboxes: List[List[int]], output_filename: str = "1_text_removed.jpg", color: Tuple[int, int, int] = (0, 0, 0)) -> str:
         """Mask text regions in the image to reduce panel extraction noise."""
-        image = cv2.imread(self.config.input_path)
         if image is None:
-            raise FileNotFoundError(f"Could not load image: {self.config.input_path}")
         for bbox in bboxes:
             x1, y1, x2, y2 = bbox
@@ -26,16 +26,17 @@ class ImageProcessor:
         print(f"✅ Text-masked image saved to: {output_path}")
         return str(output_path)
-    def preprocess_image(self, masked_image_path) -> Tuple[str, str, str]:
         """Preprocess image for panel extraction."""
-        image = cv2.imread(masked_image_path)
         if image is None:
-            raise FileNotFoundError(f"Could not load image: {masked_image_path}")
         # Convert to grayscale and binary
         gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
         _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
-        binary, is_inverted = self.invert_if_black_dominates(binary)
         if not is_inverted:
             # Dilate to strengthen borders

     def __init__(self, config: Config):
         self.config = config
+    def mask_text_regions(self, input_path, bboxes: List[List[int]], output_filename: str = "1_text_removed.jpg", color: Tuple[int, int, int] = (0, 0, 0)) -> str:
         """Mask text regions in the image to reduce panel extraction noise."""
+        image = cv2.imread(input_path)
         if image is None:
+            raise FileNotFoundError(f"Could not load image: {input_path}")
         for bbox in bboxes:
             x1, y1, x2, y2 = bbox
         print(f"✅ Text-masked image saved to: {output_path}")
         return str(output_path)
+    def preprocess_image(self, processed_image_path) -> Tuple[str, str, str]:
         """Preprocess image for panel extraction."""
+        image = cv2.imread(processed_image_path)
         if image is None:
+            raise FileNotFoundError(f"Could not load image: {processed_image_path}")
         # Convert to grayscale and binary
         gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
         _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
+        is_inverted = False
+        # binary, is_inverted = self.invert_if_black_dominates(binary)
         if not is_inverted:
             # Dilate to strengthen borders

comic_panel_extractor/main.py CHANGED Viewed

@@ -3,6 +3,7 @@ from .config import Config
 from .image_processor import ImageProcessor
 from .panel_extractor import PanelData
 from .panel_extractor import PanelExtractor
 from typing import List, Tuple
 from pathlib import Path
@@ -26,13 +27,16 @@ class ComicPanelExtractor:
     def extract_panels_from_comic(self) -> Tuple[List[np.ndarray], List[PanelData]]:
         """Complete pipeline to extract panels from a comic image."""
         print(f"Starting panel extraction for: {self.config.input_path}")
         # Step 1: Detect and mask text regions
         text_bubbles = self._detect_text_bubbles()
-        masked_image_path = self.image_processor.mask_text_regions([bubble["bbox"] for bubble in text_bubbles])
         # Step 2: Preprocess image
-        _, _, processed_image_path, is_inverted = self.image_processor.preprocess_image(masked_image_path)
         if is_inverted:
             # Step 3: Remove Inner Sketch

 from .image_processor import ImageProcessor
 from .panel_extractor import PanelData
 from .panel_extractor import PanelExtractor
+from .panel_segmentation import main as main_panel_segmentation
 from typing import List, Tuple
 from pathlib import Path
     def extract_panels_from_comic(self) -> Tuple[List[np.ndarray], List[PanelData]]:
         """Complete pipeline to extract panels from a comic image."""
         print(f"Starting panel extraction for: {self.config.input_path}")
+        processed_image_path = main_panel_segmentation(self.config.output_folder, self.config.input_path, self.config.input_path)
+        self.config.black_overlay_input_path = processed_image_path
         # Step 1: Detect and mask text regions
         text_bubbles = self._detect_text_bubbles()
+        processed_image_path = self.image_processor.mask_text_regions(processed_image_path, [bubble["bbox"] for bubble in text_bubbles])
         # Step 2: Preprocess image
+        _, _, processed_image_path, is_inverted = self.image_processor.preprocess_image(processed_image_path)
         if is_inverted:
             # Step 3: Remove Inner Sketch

comic_panel_extractor/panel_extractor.py CHANGED Viewed

@@ -4,6 +4,7 @@ from .config import Config
 import numpy as np
 import cv2
 from dataclasses import dataclass
 @dataclass
 class PanelData:
@@ -218,49 +219,98 @@ class PanelExtractor:
         return [(x1, y1, x2, y2) for x1, y1, x2, y2 in panels
                 if (x2 - x1) >= min_allowed_width and (y2 - y1) >= min_allowed_height]
-    def _save_panels(self, panels: List[Tuple[int, int, int, int]],
-                    original: np.ndarray, width: int, height: int) -> Tuple[List[np.ndarray], List[PanelData]]:
         """Save panel images and return panel data."""
         visual_output = original.copy()
         panel_images = []
         panel_data = []
         all_panel_path = []
         for idx, (x1, y1, x2, y2) in enumerate(panels, 1):
-            # Extract panel image
-            panel_img = original[y1:y2, x1:x2]
-            # Check if more than 90% pixels are black
             gray = cv2.cvtColor(panel_img, cv2.COLOR_BGR2GRAY)
-            black_pixels = np.sum(gray < 30)  # pixel intensity <30 considered black
             total_pixels = gray.size
             black_ratio = black_pixels / total_pixels
-            if black_ratio > 0.9:
                 print(f"⚠️ Skipping panel #{idx} — {round(black_ratio * 100, 2)}% black")
                 continue
-            # Add to results
             panel_images.append(panel_img)
-            # Create panel data
             panel_info = PanelData.from_coordinates(x1, y1, x2, y2)
             panel_data.append(panel_info)
-            # Save panel image
-            panel_path = f'{self.config.output_folder}/panel_{idx}_{(x1, y1, x2, y2)}.jpg'
             cv2.imwrite(str(panel_path), panel_img)
             all_panel_path.append(panel_path)
-            # Draw visualization
             cv2.rectangle(visual_output, (x1, y1), (x2, y2), (0, 255, 0), 2)
             cv2.putText(visual_output, f"#{idx}", (x1+5, y1+25),
-                       cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
-        # Save visualization
         visual_path = f'{self.config.output_folder}/panels_visualization.jpg'
         cv2.imwrite(str(visual_path), visual_output)
         print(f"✅ Extracted {len(panel_images)} panels after filtering.")
-        return panel_images, panel_data, all_panel_path

 import numpy as np
 import cv2
 from dataclasses import dataclass
+import os
 @dataclass
 class PanelData:
         return [(x1, y1, x2, y2) for x1, y1, x2, y2 in panels
                 if (x2 - x1) >= min_allowed_width and (y2 - y1) >= min_allowed_height]
+    def count_panel_files(self, folder_path: str) -> int:
+        """
+        Count the number of files in a folder that start with 'panel_'.
+        Args:
+            folder_path: Path to the folder to search.
+        Returns:
+            Number of files starting with 'panel_'.
+        """
+        if not os.path.exists(folder_path):
+            print(f"Folder does not exist: {folder_path}")
+            return 0
+        return len([
+            fname for fname in os.listdir(folder_path)
+            if fname.startswith("panel_") and os.path.isfile(os.path.join(folder_path, fname))
+        ])
+    def _save_panels(self, panels: List[Tuple[int, int, int, int]], original: np.ndarray, width: int, height: int) -> Tuple[List[np.ndarray], List[PanelData], List[str]]:
         """Save panel images and return panel data."""
         visual_output = original.copy()
         panel_images = []
         panel_data = []
         all_panel_path = []
+        panel_idx = self.count_panel_files(self.config.output_folder)
+        black_overlay_input = cv2.imread(self.config.black_overlay_input_path)
+        image_area = width * height
+        maybe_full_page_panel = None  # Store panel that is ≥90% of the page
         for idx, (x1, y1, x2, y2) in enumerate(panels, 1):
+            # Extract panel image from black_overlay_input
+            panel_img = black_overlay_input[y1:y2, x1:x2]
+            # Check for mostly black content
             gray = cv2.cvtColor(panel_img, cv2.COLOR_BGR2GRAY)
+            black_pixels = np.sum(gray < 30)
             total_pixels = gray.size
             black_ratio = black_pixels / total_pixels
+            if black_ratio > 0.8:
                 print(f"⚠️ Skipping panel #{idx} — {round(black_ratio * 100, 2)}% black")
                 continue
+            else:
+                print(f"✅ Black ratio panel #{idx} — {round(black_ratio * 100, 2)}% black")
+            # Check if this panel is ≥90% of the full image
+            panel_area = (x2 - x1) * (y2 - y1)
+            if panel_area >= 0.9 * image_area:
+                print(f"⚠️ Panel #{idx} covers ≥90% of the image — marked for potential use only")
+                maybe_full_page_panel = (idx, (x1, y1, x2, y2))
+                continue  # Skip for now
+            # Save valid smaller panel
+            panel_img = visual_output[y1:y2, x1:x2]
             panel_images.append(panel_img)
             panel_info = PanelData.from_coordinates(x1, y1, x2, y2)
             panel_data.append(panel_info)
+            panel_idx += 1
+            panel_path = f'{self.config.output_folder}/panel_{panel_idx}_{(x1, y1, x2, y2)}.jpg'
             cv2.imwrite(str(panel_path), panel_img)
             all_panel_path.append(panel_path)
             cv2.rectangle(visual_output, (x1, y1), (x2, y2), (0, 255, 0), 2)
             cv2.putText(visual_output, f"#{idx}", (x1+5, y1+25),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
+        # If no valid panels were saved, and we had a full-page one, save it
+        if not panel_images and maybe_full_page_panel and panel_idx == 0:
+            idx, (x1, y1, x2, y2) = maybe_full_page_panel
+            panel_img = visual_output[y1:y2, x1:x2]
+            panel_images.append(panel_img)
+            panel_info = PanelData.from_coordinates(x1, y1, x2, y2)
+            panel_data.append(panel_info)
+            panel_idx += 1
+            panel_path = f'{self.config.output_folder}/panel_{panel_idx}_{(x1, y1, x2, y2)}.jpg'
+            cv2.imwrite(str(panel_path), panel_img)
+            all_panel_path.append(panel_path)
+            cv2.rectangle(visual_output, (x1, y1), (x2, y2), (255, 0, 0), 2)
+            cv2.putText(visual_output, f"#full", (x1+5, y1+25),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
+            print(f"✅ Saved full-page panel as fallback")
+        # Save final visualization
         visual_path = f'{self.config.output_folder}/panels_visualization.jpg'
         cv2.imwrite(str(visual_path), visual_output)
         print(f"✅ Extracted {len(panel_images)} panels after filtering.")
+        return panel_images, panel_data, all_panel_path

comic_panel_extractor/panel_segmentation.py ADDED Viewed

	@@ -0,0 +1,240 @@

+import os
+import numpy as np
+from PIL import Image, ImageDraw
+import imageio.v2 as imageio  # Fix for imageio warning
+from skimage.color import rgb2gray
+from skimage.feature import canny
+from skimage import measure
+from scipy import ndimage as ndi
+import re
+from skimage.morphology import remove_small_holes
+def extract_fully_white_panels(
+    original_image: np.ndarray,
+    segmentation_mask: np.ndarray,
+    output_dir: str = "panel_output",
+    debug_region_dir: str = "panel_debug_regions",
+    min_area_ratio: float = 0.05,
+    min_width_ratio: float = 0.05,
+    min_height_ratio: float = 0.05,
+    save_debug: bool = True
+):
+    """
+    Extract fully white panels from a segmented image.
+    Args:
+        original_image: Original RGB image as numpy array
+        segmentation_mask: Binary segmentation mask
+        output_dir: Directory to save extracted panels
+        debug_region_dir: Directory to save debug images
+        min_area_ratio: Minimum area ratio threshold
+        min_width_ratio: Minimum width ratio threshold
+        min_height_ratio: Minimum height ratio threshold
+        save_debug: Whether to save debug images
+    Returns:
+        List of saved panel file paths
+    """
+    os.makedirs(output_dir, exist_ok=True)
+    if save_debug:
+        os.makedirs(debug_region_dir, exist_ok=True)
+    img_h, img_w = segmentation_mask.shape
+    image_area = img_h * img_w
+    orig_pil = Image.fromarray(original_image)
+    labeled_mask = measure.label(segmentation_mask)
+    regions = measure.regionprops(labeled_mask)
+    saved_panels = []
+    accepted_boxes = []
+    panel_idx = 0
+    for idx, region in enumerate(regions):
+        minr, minc, maxr, maxc = region.bbox
+        w = maxc - minc
+        h = maxr - minr
+        area = w * h
+        crop_box = (minc, minr, maxc, maxr)
+        crop_name_prefix = f"region_{idx+1}"
+        # Crops
+        cropped_img = orig_pil.crop(crop_box)
+        cropped_mask = segmentation_mask[minr:maxr, minc:maxc]
+        # Fix for Pillow warning: Remove mode parameter
+        mask_pil = Image.fromarray((cropped_mask * 255).astype('uint8'))
+        # 1. Threshold check
+        if (
+            area < min_area_ratio * image_area or
+            w < min_width_ratio * img_w or
+            h < min_height_ratio * img_h
+        ):
+            if save_debug:
+                cropped_img.save(os.path.join(debug_region_dir, f"{crop_name_prefix}_too_small_orig.jpg"))
+                mask_pil.save(os.path.join(debug_region_dir, f"{crop_name_prefix}_too_small_mask.jpg"))
+            continue
+        # 2. Check if region is mostly white (allow small % of black)
+        black_pixel_count = np.count_nonzero(region.image == 0)
+        total_pixels = region.image.size
+        black_ratio = black_pixel_count / total_pixels
+        if black_ratio > 0.02:  # Allow up to 1% black pixels
+            print(f"❌ Black ratio panel #{idx} — {round(black_ratio * 100, 2)}% black")
+            # Save debug info if desired
+            if save_debug:
+                debug_region_dir_specific = os.path.join(output_dir, f"region_{idx}_skipped_black_inside")
+                os.makedirs(debug_region_dir_specific, exist_ok=True)
+                # Save cropped mask
+                cropped_mask = segmentation_mask[minr:maxr, minc:maxc]
+                # Fix for Pillow warning: Remove mode parameter
+                mask_pil = Image.fromarray((cropped_mask * 255).astype("uint8"))
+                mask_pil.save(os.path.join(debug_region_dir_specific, f"region_{idx}_mask.jpg"))
+                # Highlight black pixels in red and zoom
+                highlighted = np.stack([cropped_mask]*3, axis=-1) * 255
+                highlighted[cropped_mask == 0] = [255, 0, 0]
+                highlighted_zoom = Image.fromarray(highlighted.astype('uint8')).resize(
+                    (highlighted.shape[1]*4, highlighted.shape[0]*4), resample=Image.NEAREST
+                )
+                highlighted_zoom.save(os.path.join(debug_region_dir_specific, f"region_{idx}_highlight_black_zoomed.jpg"))
+            continue
+        # 3. Save valid panel with bbox coordinates in filename
+        bbox_str = f"({minc}, {minr}, {maxc}, {maxr})"
+        panel_idx = panel_idx + 1
+        panel_path = os.path.join(output_dir, f"panel_{panel_idx}_{bbox_str}.jpg")
+        cropped_img.save(panel_path)
+        saved_panels.append(panel_path)
+        accepted_boxes.append((minc, minr, maxc, maxr))
+        if save_debug:
+            cropped_img.save(os.path.join(debug_region_dir, f"{crop_name_prefix}_saved_orig.jpg"))
+            mask_pil.save(os.path.join(debug_region_dir, f"{crop_name_prefix}_saved_mask.jpg"))
+    # 4. Debug image with accepted boxes
+    if save_debug:
+        debug_img = orig_pil.copy()
+        draw = ImageDraw.Draw(debug_img)
+        for (x1, y1, x2, y2) in accepted_boxes:
+            draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
+        debug_img.save(os.path.join(output_dir, "debug_all_saved_panels.jpg"))
+    return saved_panels
+def create_segmentation_mask(image: np.ndarray, save_debug: bool = True) -> np.ndarray:
+    """
+    Create segmentation mask from image using edge detection and hole filling.
+    Args:
+        image: Input RGB image as numpy array
+        save_debug: Whether to save intermediate processing steps
+    Returns:
+        Binary segmentation mask
+    """
+    if save_debug:
+        os.makedirs("panel_debug_steps", exist_ok=True)
+        Image.fromarray(image).save("panel_debug_steps/step1_original.jpg")
+    # Convert to grayscale
+    grayscale = rgb2gray(image)
+    if save_debug:
+        gray_uint8 = (grayscale * 255).astype('uint8')
+        # Fix for Pillow warning: Remove mode parameter
+        Image.fromarray(gray_uint8).save("panel_debug_steps/step2_grayscale.jpg")
+    # Edge detection
+    edges = canny(grayscale)
+    if save_debug:
+        edges_uint8 = (edges * 255).astype('uint8')
+        # Fix for Pillow warning: Remove mode parameter
+        Image.fromarray(edges_uint8).save("panel_debug_steps/step3_edges.jpg")
+    # Fill holes in edges
+    segmentation = ndi.binary_fill_holes(edges)
+    # ✅ Remove small black clusters (holes in white regions)
+    segmentation_cleaned = remove_small_holes(segmentation, area_threshold=500)  # adjust threshold as needed
+    if save_debug:
+        segmentation_uint8 = (segmentation_cleaned * 255).astype('uint8')
+        Image.fromarray(segmentation_uint8).save("panel_debug_steps/step4_segmentation_filled.jpg")
+    return segmentation_cleaned
+def create_image_with_panels_removed(
+    original_image: np.ndarray,
+    segmentation_mask: np.ndarray,
+    output_folder: str,
+    output_path: str,
+    save_debug: True
+) -> None:
+    """
+    Create a version of the original image with detected panels blacked out.
+    Args:
+        original_image: Original RGB image as numpy array
+        segmentation_mask: Binary segmentation mask
+        output_path: Path to save the modified image
+    """
+    # Get panel information
+    saved_panels = extract_fully_white_panels(
+        original_image=original_image,
+        segmentation_mask=segmentation_mask,
+        output_dir=output_folder,
+        debug_region_dir="panel_debug_regions",
+        save_debug=save_debug
+    )
+    # Create modified image
+    im_no_panels = Image.fromarray(original_image.copy())
+    draw = ImageDraw.Draw(im_no_panels)
+    # Get regions and black them out
+    labeled_mask = measure.label(segmentation_mask)
+    regions = measure.regionprops(labeled_mask)
+    pattern = re.compile(r"panel_\d+_\((\d+), (\d+), (\d+), (\d+)\)\.jpg")
+    for panel_path in saved_panels:
+        # Extract panel index from filename with bbox format
+        panel_name = os.path.basename(panel_path)
+        match = pattern.match(panel_name)
+        minc, minr, maxc, maxr = map(int, match.groups())
+        draw.rectangle([minc, minr, maxc, maxr], fill=(0, 0, 0))
+    # Save the result
+    im_no_panels.save(output_path)
+def main(output_folder, input_image_path, original_image_path):
+    """Main execution function."""
+    # Load the input image
+    image = imageio.imread(input_image_path)
+    original_image = imageio.imread(original_image_path)
+    save_debug = True
+    # Create segmentation mask
+    segmentation_mask = create_segmentation_mask(image, save_debug=save_debug)
+    pre_process_path = f"{output_folder}/original_with_panels_removed.jpg"
+    # Create image with panels removed
+    create_image_with_panels_removed(
+        original_image=original_image,
+        segmentation_mask=segmentation_mask,
+        output_folder=output_folder,
+        output_path=pre_process_path,
+        save_debug=save_debug
+    )
+    return pre_process_path
+if __name__ == "__main__":
+    main('panel_output', 'test7.jpg', 'test7.jpg')