Spaces:
Running
Running
nec c
Browse files
comic_panel_extractor/border_panel_extractor.py
CHANGED
|
@@ -12,7 +12,7 @@ import cv2
|
|
| 12 |
|
| 13 |
from .config import Config
|
| 14 |
from .image_processor import ImageProcessor
|
| 15 |
-
from .utils import remove_duplicate_boxes
|
| 16 |
|
| 17 |
class BorderPanelExtractor:
|
| 18 |
"""
|
|
@@ -241,6 +241,25 @@ class BorderPanelExtractor:
|
|
| 241 |
|
| 242 |
return adjusted_boxes
|
| 243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
def create_image_with_panels_removed(
|
| 245 |
self,
|
| 246 |
original_image: np.ndarray,
|
|
@@ -284,6 +303,8 @@ class BorderPanelExtractor:
|
|
| 284 |
|
| 285 |
accepted_boxes = remove_duplicate_boxes(accepted_boxes)
|
| 286 |
|
|
|
|
|
|
|
| 287 |
all_paths = self._save_panel(original_image, accepted_boxes)
|
| 288 |
|
| 289 |
output_path = self.draw_black(original_image, accepted_boxes)
|
|
|
|
| 12 |
|
| 13 |
from .config import Config
|
| 14 |
from .image_processor import ImageProcessor
|
| 15 |
+
from .utils import remove_duplicate_boxes, count_panels_inside
|
| 16 |
|
| 17 |
class BorderPanelExtractor:
|
| 18 |
"""
|
|
|
|
| 241 |
|
| 242 |
return adjusted_boxes
|
| 243 |
|
| 244 |
+
def remove_swallow_boxes(self, boxes):
|
| 245 |
+
filtered_boxes = []
|
| 246 |
+
|
| 247 |
+
for i, (x1, y1, x2, y2) in enumerate(boxes):
|
| 248 |
+
current_box = (x1, y1, x2, y2)
|
| 249 |
+
# Count how many other boxes are fully inside this one
|
| 250 |
+
inside_count = count_panels_inside(current_box, [b for j, b in enumerate(boxes) if j != i])
|
| 251 |
+
|
| 252 |
+
# Skip this box if it fully contains at least one other box (i.e., it's swallowing)
|
| 253 |
+
if inside_count >= 1:
|
| 254 |
+
continue
|
| 255 |
+
|
| 256 |
+
# Keep boxes that don't swallow others
|
| 257 |
+
filtered_boxes.append(current_box)
|
| 258 |
+
|
| 259 |
+
print(f"✅ Found {abs(len(filtered_boxes) - len(boxes))} swallowed boxes")
|
| 260 |
+
return filtered_boxes
|
| 261 |
+
|
| 262 |
+
|
| 263 |
def create_image_with_panels_removed(
|
| 264 |
self,
|
| 265 |
original_image: np.ndarray,
|
|
|
|
| 303 |
|
| 304 |
accepted_boxes = remove_duplicate_boxes(accepted_boxes)
|
| 305 |
|
| 306 |
+
accepted_boxes = self.remove_swallow_boxes(accepted_boxes)
|
| 307 |
+
|
| 308 |
all_paths = self._save_panel(original_image, accepted_boxes)
|
| 309 |
|
| 310 |
output_path = self.draw_black(original_image, accepted_boxes)
|
comic_panel_extractor/image_processor.py
CHANGED
|
@@ -9,6 +9,10 @@ from skimage.measure import label
|
|
| 9 |
from skimage import measure
|
| 10 |
from tqdm import tqdm
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
class ImageProcessor:
|
| 13 |
"""Handles image preprocessing operations."""
|
| 14 |
|
|
@@ -81,6 +85,41 @@ class ImageProcessor:
|
|
| 81 |
# Save result
|
| 82 |
return inverted, black_pixels > white_pixels
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
def thin_image_borders(self, processed_image_path: str, file_name="thin_border.jpg", output_folder=None) -> str:
|
| 85 |
"""
|
| 86 |
Clean dilated image by thinning thick borders and removing hanging clusters.
|
|
|
|
| 9 |
from skimage import measure
|
| 10 |
from tqdm import tqdm
|
| 11 |
|
| 12 |
+
from PIL import Image
|
| 13 |
+
import numpy as np
|
| 14 |
+
from sklearn.cluster import KMeans
|
| 15 |
+
|
| 16 |
class ImageProcessor:
|
| 17 |
"""Handles image preprocessing operations."""
|
| 18 |
|
|
|
|
| 85 |
# Save result
|
| 86 |
return inverted, black_pixels > white_pixels
|
| 87 |
|
| 88 |
+
def group_colors(self, processed_image_path, num_clusters: int = 5, file_name="group_colors.jpg", output_folder=None) -> Image.Image:
|
| 89 |
+
"""
|
| 90 |
+
Groups similar colors in an image using KMeans clustering.
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
processed_image_path (str): Path to the image to be color-grouped.
|
| 94 |
+
num_clusters (int): Number of color clusters to form.
|
| 95 |
+
file_name (str): Name of the output image file.
|
| 96 |
+
output_folder (str): Optional output directory.
|
| 97 |
+
|
| 98 |
+
Returns:
|
| 99 |
+
str: Path to the saved grouped-color image.
|
| 100 |
+
"""
|
| 101 |
+
output_folder = output_folder or self.config.output_folder
|
| 102 |
+
# Load image
|
| 103 |
+
image = Image.open(processed_image_path).convert("RGB")
|
| 104 |
+
np_image = np.array(image)
|
| 105 |
+
h, w = np_image.shape[:2]
|
| 106 |
+
pixels = np_image.reshape(-1, 3)
|
| 107 |
+
|
| 108 |
+
# Run KMeans
|
| 109 |
+
kmeans = KMeans(n_clusters=num_clusters, random_state=42, n_init='auto')
|
| 110 |
+
labels = kmeans.fit_predict(pixels)
|
| 111 |
+
centers = kmeans.cluster_centers_.astype(np.uint8)
|
| 112 |
+
|
| 113 |
+
# Replace pixels with their cluster center color
|
| 114 |
+
clustered_pixels = centers[labels].reshape(h, w, 3)
|
| 115 |
+
|
| 116 |
+
# Save using OpenCV (convert RGB to BGR)
|
| 117 |
+
output_path = self.get_output_path(output_folder, file_name)
|
| 118 |
+
clustered_bgr = clustered_pixels[:, :, ::-1]
|
| 119 |
+
cv2.imwrite(output_path, clustered_bgr)
|
| 120 |
+
|
| 121 |
+
return str(output_path)
|
| 122 |
+
|
| 123 |
def thin_image_borders(self, processed_image_path: str, file_name="thin_border.jpg", output_folder=None) -> str:
|
| 124 |
"""
|
| 125 |
Clean dilated image by thinning thick borders and removing hanging clusters.
|
comic_panel_extractor/panel_extractor.py
CHANGED
|
@@ -6,7 +6,7 @@ import cv2
|
|
| 6 |
from dataclasses import dataclass
|
| 7 |
import os
|
| 8 |
import re
|
| 9 |
-
from .utils import remove_duplicate_boxes
|
| 10 |
|
| 11 |
@dataclass
|
| 12 |
class PanelData:
|
|
@@ -248,15 +248,6 @@ class PanelExtractor:
|
|
| 248 |
coords.append(tuple(map(int, match.groups())))
|
| 249 |
return coords
|
| 250 |
|
| 251 |
-
def count_panels_inside(self, target_box, other_boxes):
|
| 252 |
-
x1a, y1a, x2a, y2a = target_box
|
| 253 |
-
count = 0
|
| 254 |
-
for x1b, y1b, x2b, y2b in other_boxes:
|
| 255 |
-
if x1a <= x1b and y1a <= y1b and x2a >= x2b and y2a >= y2b:
|
| 256 |
-
count += 1
|
| 257 |
-
return count
|
| 258 |
-
|
| 259 |
-
|
| 260 |
def _save_panels(self, panels: List[Tuple[int, int, int, int]], original: np.ndarray, width: int, height: int) -> Tuple[List[np.ndarray], List[PanelData], List[str]]:
|
| 261 |
"""Save panel images and return panel data."""
|
| 262 |
visual_output = original.copy()
|
|
@@ -310,7 +301,7 @@ class PanelExtractor:
|
|
| 310 |
continue
|
| 311 |
|
| 312 |
# 2. Skip if this panel contains ≥1 other panels
|
| 313 |
-
contained_count =
|
| 314 |
if contained_count >= 1:
|
| 315 |
print(f"⚠️ Skipping panel #{idx} — contains {contained_count} other panels inside")
|
| 316 |
continue
|
|
|
|
| 6 |
from dataclasses import dataclass
|
| 7 |
import os
|
| 8 |
import re
|
| 9 |
+
from .utils import remove_duplicate_boxes, count_panels_inside
|
| 10 |
|
| 11 |
@dataclass
|
| 12 |
class PanelData:
|
|
|
|
| 248 |
coords.append(tuple(map(int, match.groups())))
|
| 249 |
return coords
|
| 250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
def _save_panels(self, panels: List[Tuple[int, int, int, int]], original: np.ndarray, width: int, height: int) -> Tuple[List[np.ndarray], List[PanelData], List[str]]:
|
| 252 |
"""Save panel images and return panel data."""
|
| 253 |
visual_output = original.copy()
|
|
|
|
| 301 |
continue
|
| 302 |
|
| 303 |
# 2. Skip if this panel contains ≥1 other panels
|
| 304 |
+
contained_count = count_panels_inside((x1, y1, x2, y2), already_saved_coords)
|
| 305 |
if contained_count >= 1:
|
| 306 |
print(f"⚠️ Skipping panel #{idx} — contains {contained_count} other panels inside")
|
| 307 |
continue
|
comic_panel_extractor/utils.py
CHANGED
|
@@ -58,4 +58,12 @@ def remove_duplicate_boxes(boxes, compare_single=None, iou_threshold=0.7):
|
|
| 58 |
unique_boxes.append(box)
|
| 59 |
|
| 60 |
print(f"✅ Found {abs(len(unique_boxes) - len(boxes))} duplicates")
|
| 61 |
-
return unique_boxes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
unique_boxes.append(box)
|
| 59 |
|
| 60 |
print(f"✅ Found {abs(len(unique_boxes) - len(boxes))} duplicates")
|
| 61 |
+
return unique_boxes
|
| 62 |
+
|
| 63 |
+
def count_panels_inside(target_box, other_boxes):
|
| 64 |
+
x1a, y1a, x2a, y2a = target_box
|
| 65 |
+
count = 0
|
| 66 |
+
for x1b, y1b, x2b, y2b in other_boxes:
|
| 67 |
+
if x1a <= x1b and y1a <= y1b and x2a >= x2b and y2a >= y2b:
|
| 68 |
+
count += 1
|
| 69 |
+
return count
|