jebin2 commited on
Commit
bd1f76d
·
1 Parent(s): b269113

new changes

Browse files
comic_panel_extractor/config.py CHANGED
@@ -12,7 +12,7 @@ class Config:
12
  min_text_length: int = 2
13
  min_area_ratio: float = 0.05
14
  min_width_ratio: float = 0.05
15
- min_height_ratio: float = 0.05
16
 
17
  def get_text_cood_file_path(config: Config):
18
  return f'{config.output_folder}/{config.text_cood_file_name}'
 
12
  min_text_length: int = 2
13
  min_area_ratio: float = 0.05
14
  min_width_ratio: float = 0.05
15
+ min_height_ratio: float = 0.1
16
 
17
  def get_text_cood_file_path(config: Config):
18
  return f'{config.output_folder}/{config.text_cood_file_name}'
comic_panel_extractor/image_processor.py CHANGED
@@ -4,13 +4,22 @@ from .config import Config
4
 
5
  import numpy as np
6
  import cv2
 
 
 
 
7
 
8
  class ImageProcessor:
9
  """Handles image preprocessing operations."""
10
 
11
- def __init__(self, config: Config):
12
- self.config = config
13
-
 
 
 
 
 
14
  def mask_text_regions(self, input_path, bboxes: List[List[int]], output_filename: str = "1_text_removed.jpg", color: Tuple[int, int, int] = (0, 0, 0)) -> str:
15
  """Mask text regions in the image to reduce panel extraction noise."""
16
  image = cv2.imread(input_path)
@@ -23,7 +32,6 @@ class ImageProcessor:
23
 
24
  output_path = f'{self.config.output_folder}/{output_filename}'
25
  cv2.imwrite(output_path, image)
26
- print(f"✅ Text-masked image saved to: {output_path}")
27
  return str(output_path)
28
 
29
  def preprocess_image(self, processed_image_path) -> Tuple[str, str, str]:
@@ -34,33 +42,25 @@ class ImageProcessor:
34
 
35
  # Convert to grayscale and binary
36
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
37
- # _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
38
 
39
  # Apply Gaussian blur to reduce noise
40
  blurred = cv2.GaussianBlur(gray, (3, 3), 0)
41
 
42
  # Canny edge detection
43
  edges = cv2.Canny(blurred, threshold1=50, threshold2=150, apertureSize=3)
44
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
45
- is_inverted = False
46
- # binary, is_inverted = self.invert_if_black_dominates(binary)
47
-
48
- if not is_inverted:
49
- # Dilate to strengthen borders
50
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
51
- dilated = cv2.dilate(edges, kernel, iterations=2)
52
- else: dilated = edges
53
 
54
  # Save intermediate results
55
- gray_path = f'{self.config.output_folder}/2_gray.jpg'
56
- binary_path = f'{self.config.output_folder}/3_binary.jpg'
57
- dilated_path = f'{self.config.output_folder}/4_dilated.jpg'
58
 
59
  cv2.imwrite(str(gray_path), gray)
60
  cv2.imwrite(str(binary_path), edges)
61
  cv2.imwrite(str(dilated_path), dilated)
62
 
63
- return str(gray_path), str(binary_path), str(dilated_path), is_inverted
64
 
65
  def invert_if_black_dominates(self, binary):
66
  # Threshold to binary image
@@ -81,46 +81,11 @@ class ImageProcessor:
81
  # Save result
82
  return inverted, black_pixels > white_pixels
83
 
84
- def remove_inner_sketch(self, input_path, output_filename="5_remove_inner_sketch.jpg"):
85
- img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
86
- height, width = img.shape
87
-
88
- # Threshold image to binary
89
- _, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
90
-
91
- # Find all contours
92
- contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
93
-
94
- # Create mask for large contours (likely panel borders)
95
- mask = np.zeros_like(binary)
96
- for cnt in contours:
97
- area = cv2.contourArea(cnt)
98
- if area >= (height * width * self.config.min_area_ratio):
99
- cv2.drawContours(mask, [cnt], -1, 255, thickness=cv2.FILLED)
100
-
101
- # Apply mask to original image (keeps only large borders)
102
- cleaned = cv2.bitwise_and(binary, binary, mask=mask)
103
-
104
- # Optional: Apply morphological opening to clean tiny sketch lines
105
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
106
- cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_OPEN, kernel)
107
-
108
- # Invert back if needed
109
- cleaned = cv2.bitwise_not(cleaned)
110
-
111
- # Save
112
- output_path = f'{self.config.output_folder}/{output_filename}'
113
- cv2.imwrite(output_path, cleaned)
114
- print(f"✅ Remove Inner Sketch image saved to: {output_path}")
115
- return str(output_path)
116
-
117
- def thin_image_borders(self, processed_image_path: str, output_filename: str = "6_thin_border.jpg") -> str:
118
  """
119
  Clean dilated image by thinning thick borders and removing hanging clusters.
120
  """
121
- from skimage.morphology import skeletonize, remove_small_objects
122
- from skimage.measure import label
123
-
124
  # Load image
125
  img = cv2.imread(processed_image_path)
126
  # Convert to grayscale and binary
@@ -147,40 +112,517 @@ class ImageProcessor:
147
  result = 255 - final
148
 
149
  # Save
150
- output_path = f'{self.config.output_folder}/{output_filename}'
151
  cv2.imwrite(output_path, result)
152
- print(f"✅ Cleaned and thinned image saved to: {output_path}")
153
  return str(output_path)
154
 
155
-
156
- def clean_dilated_image(self, dilated_path: str,
157
- output_filename: str = "6_dilated_cleaned.jpg",
158
- max_neighbors: int = 2) -> str:
159
- """Clean dilated image by thinning thick borders."""
160
- dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
161
- if dilated is None:
162
- raise FileNotFoundError(f"Could not load dilated image: {dilated_path}")
163
-
164
- binary = (dilated == 0).astype(np.uint8)
165
- padded = np.pad(binary, pad_width=1, mode="constant", constant_values=0)
166
- cleaned = binary.copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  height, width = binary.shape
169
- row_black_counts = np.sum(binary, axis=1)
170
-
171
- for y in range(1, height + 1):
172
- for x in range(1, width + 1):
173
- if padded[y, x] == 1:
174
- neighbors = np.sum(padded[y-1:y+2, x-1:x+2]) - 1
175
- if neighbors > max_neighbors:
176
- neighbor_rows = [r for r in [y-1, y, y+1] if 1 <= r <= height]
177
- if neighbor_rows:
178
- row_to_clear = min(neighbor_rows, key=lambda r: row_black_counts[r-1])
179
- if y == row_to_clear:
180
- cleaned[y-1, x-1] = 0
181
-
182
- cleaned_img = (1 - cleaned) * 255
183
- output_path = f'{self.config.output_folder}/{output_filename}'
184
- cv2.imwrite(str(output_path), cleaned_img)
185
- print(f"✅ Cleaned dilated image saved to: {output_path}")
186
- return str(output_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  import numpy as np
6
  import cv2
7
+ from skimage.morphology import skeletonize, remove_small_objects
8
+ from skimage.measure import label
9
+ from skimage import measure
10
+ from tqdm import tqdm
11
 
12
  class ImageProcessor:
13
  """Handles image preprocessing operations."""
14
 
15
+ def __init__(self, config: Config = None):
16
+ self.config = config or Config()
17
+ self.index = 0
18
+
19
+ def get_output_path(self, output_folder, file_name):
20
+ self.index += 1
21
+ return f'{output_folder}/{self.index:02d}_{file_name}'
22
+
23
  def mask_text_regions(self, input_path, bboxes: List[List[int]], output_filename: str = "1_text_removed.jpg", color: Tuple[int, int, int] = (0, 0, 0)) -> str:
24
  """Mask text regions in the image to reduce panel extraction noise."""
25
  image = cv2.imread(input_path)
 
32
 
33
  output_path = f'{self.config.output_folder}/{output_filename}'
34
  cv2.imwrite(output_path, image)
 
35
  return str(output_path)
36
 
37
  def preprocess_image(self, processed_image_path) -> Tuple[str, str, str]:
 
42
 
43
  # Convert to grayscale and binary
44
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
45
 
46
  # Apply Gaussian blur to reduce noise
47
  blurred = cv2.GaussianBlur(gray, (3, 3), 0)
48
 
49
  # Canny edge detection
50
  edges = cv2.Canny(blurred, threshold1=50, threshold2=150, apertureSize=3)
51
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
52
+ dilated = cv2.dilate(edges, kernel, iterations=2)
 
 
 
 
 
 
 
53
 
54
  # Save intermediate results
55
+ gray_path = self.get_output_path(self.config.output_folder, "gray.jpg")
56
+ binary_path = self.get_output_path(self.config.output_folder, "binary.jpg")
57
+ dilated_path = self.get_output_path(self.config.output_folder, "dilated.jpg")
58
 
59
  cv2.imwrite(str(gray_path), gray)
60
  cv2.imwrite(str(binary_path), edges)
61
  cv2.imwrite(str(dilated_path), dilated)
62
 
63
+ return str(gray_path), str(binary_path), str(dilated_path)
64
 
65
  def invert_if_black_dominates(self, binary):
66
  # Threshold to binary image
 
81
  # Save result
82
  return inverted, black_pixels > white_pixels
83
 
84
+ def thin_image_borders(self, processed_image_path: str, file_name="thin_border.jpg", output_folder=None) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  """
86
  Clean dilated image by thinning thick borders and removing hanging clusters.
87
  """
88
+ output_folder = output_folder or self.config.output_folder
 
 
89
  # Load image
90
  img = cv2.imread(processed_image_path)
91
  # Convert to grayscale and binary
 
112
  result = 255 - final
113
 
114
  # Save
115
+ output_path = self.get_output_path(output_folder, file_name)
116
  cv2.imwrite(output_path, result)
 
117
  return str(output_path)
118
 
119
+ def remove_dangling_lines(self, image_path, file_name="dangling_lines_removed.jpg", output_folder=None):
120
+ output_folder = output_folder or self.config.output_folder
121
+ gray = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
122
+
123
+ # Threshold to binary mask (black lines = True, white = False)
124
+ binary = gray < 128 # black parts (lines/dangling strokes)
125
+ binary = binary.astype(bool)
126
+
127
+ # Label connected components
128
+ labeled = label(binary, connectivity=2)
129
+
130
+ # Remove small connected components (dangling lines, fragments)
131
+ cleaned = remove_small_objects(labeled, min_size=500) # Adjust min_size as needed
132
+
133
+ # Convert back to mask (255 = black lines kept, 255 background = white)
134
+ final_mask = (cleaned > 0).astype(np.uint8) * 255
135
+
136
+ # Invert mask to match original layout: black lines on white background
137
+ final_image = 255 - final_mask
138
+ # Save result
139
+ output_path = self.get_output_path(output_folder, file_name)
140
+ cv2.imwrite(output_path, final_image)
141
+ return output_path
142
+
143
+ def remove_diagonal_lines(self, image_path, file_name="remove_diagonal_lines.jpg", output_folder=None):
144
+ output_folder = output_folder or self.config.output_folder
145
+
146
+ # Read the image
147
+ img = cv2.imread(image_path)
148
+
149
+ # Convert to grayscale
150
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
151
+
152
+ # Create binary image (black lines on white background)
153
+ _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
154
+
155
+ # Create kernels for detecting horizontal and vertical lines
156
+ # Adjust kernel size based on your image - larger for thicker lines
157
+ kernel_length = max(gray.shape[0], gray.shape[1]) // 30
158
+
159
+ # Horizontal kernel
160
+ horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))
161
+ # Vertical kernel
162
+ vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))
163
+
164
+ # Detect horizontal lines
165
+ horizontal_lines = cv2.morphologyEx(binary, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
166
+
167
+ # Detect vertical lines
168
+ vertical_lines = cv2.morphologyEx(binary, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
169
+
170
+ # Combine horizontal and vertical lines
171
+ rect_lines = cv2.addWeighted(horizontal_lines, 1, vertical_lines, 1, 0)
172
+
173
+ # Create final result - white background with black rectangular lines only
174
+ result = np.ones_like(gray) * 255 # White background
175
+ result[rect_lines > 0] = 0 # Black lines where rectangular lines were detected
176
+
177
+ # Save result
178
+ output_path = self.get_output_path(output_folder, file_name)
179
+ cv2.imwrite(output_path, result)
180
+ return output_path
181
+
182
+ def thick_black(self, image_path, thickness=20, file_name="thick_black.jpg", output_folder=None):
183
+ output_folder = output_folder or self.config.output_folder
184
+ # Load image
185
+ img = cv2.imread(image_path)
186
+
187
+ # Convert to grayscale
188
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
189
+
190
+ # Create a binary mask where black pixels are 1 (foreground)
191
+ _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY_INV)
192
+
193
+ # Define kernel size based on desired thickness
194
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (thickness, thickness))
195
+
196
+ # Dilate the black areas
197
+ dilated = cv2.dilate(binary, kernel, iterations=1)
198
+
199
+ # Invert back so black is 0 again
200
+ # result_mask = cv2.bitwise_not(dilated)
201
+
202
+ # Apply mask on original image
203
+ result = img.copy()
204
+ result[np.where(dilated == 255)] = (0, 0, 0)
205
 
206
+ # Save result
207
+ output_path = self.get_output_path(output_folder, file_name)
208
+ cv2.imwrite(output_path, result)
209
+ return output_path
210
+
211
+ def remove_small_regions(self, image_path, file_name="remove_small_regions.jpg", output_folder=None):
212
+ output_folder = output_folder or self.config.output_folder
213
+
214
+ # Load image in grayscale
215
+ img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
216
+ visual = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) # For debugging with colored rectangles
217
+
218
+ if img is None:
219
+ raise FileNotFoundError(f"Could not load image: {image_path}")
220
+
221
+ height_, width_ = img.shape
222
+ min_area = height_ * width_ * self.config.min_area_ratio
223
+
224
+ # Threshold: make black = foreground
225
+ _, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
226
+
227
+ # Label connected regions
228
+ labeled = measure.label(binary)
229
+ regions = measure.regionprops(labeled)
230
+
231
+ # Create clean mask (copy of original binary)
232
+ clean_mask = np.copy(binary)
233
+
234
+ for region in regions:
235
+ area = region.area
236
+ minr, minc, maxr, maxc = region.bbox
237
+ width = maxc - minc
238
+ height = maxr - minr
239
+
240
+ # Bounding box filter
241
+ if (width < width_ * self.config.min_width_ratio or height < height_ * self.config.min_height_ratio):
242
+ clean_mask[labeled == region.label] = 0 # Remove small region
243
+ cv2.rectangle(visual, (minc, minr), (maxc, maxr), (0, 0, 255), 2)
244
+ continue
245
+
246
+ # Crop and analyze region for line orientation
247
+ region_crop = binary[minr:maxr, minc:maxc]
248
+ edges = cv2.Canny(region_crop, 50, 150, apertureSize=3)
249
+ lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=30, minLineLength=10, maxLineGap=5)
250
+
251
+ if lines is not None:
252
+ for line in lines:
253
+ x1, y1, x2, y2 = line[0]
254
+ angle = np.abs(np.arctan2(y2 - y1, x2 - x1) * 180.0 / np.pi)
255
+ length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
256
+
257
+ if 80 < angle < 100:
258
+ if length / height_ > self.config.min_height_ratio:
259
+ break # keep region
260
+ elif angle < 10 or angle > 170:
261
+ if length / width_ > self.config.min_width_ratio:
262
+ break # keep region
263
+ else:
264
+ # If no qualifying line found, remove region
265
+ clean_mask[labeled == region.label] = 0
266
+ cv2.rectangle(visual, (minc, minr), (maxc, maxr), (0, 255, 255), 2)
267
+ else:
268
+ # No lines, remove region
269
+ clean_mask[labeled == region.label] = 0
270
+ cv2.rectangle(visual, (minc, minr), (maxc, maxr), (255, 0, 0), 2)
271
+
272
+ # Save debug visualization
273
+ cv2.imwrite(f"{output_folder}/debug_{file_name}", visual)
274
+
275
+ # Invert back to original format: black lines on white
276
+ cleaned = cv2.bitwise_not(clean_mask)
277
+ output_path = self.get_output_path(output_folder, file_name)
278
+ cv2.imwrite(output_path, cleaned)
279
+ return output_path
280
+
281
+
282
+ def thin_black(self, image_path, file_name="thin_black.jpg", output_folder=None):
283
+ output_folder = output_folder or self.config.output_folder
284
+ # Load the image (replace 'debug_dilated.jpg' with your actual file path if needed)
285
+ img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
286
+
287
+ # Check if the image loaded correctly
288
+ if img is None:
289
+ raise ValueError("Image not loaded. Check the file path.")
290
+
291
+ # Threshold to binary (invert if lines are black on white)
292
+ _, binary = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
293
+
294
+ # Perform thinning to reduce to 1-pixel lines
295
+ try:
296
+ # Use Zhang-Suen thinning if opencv-contrib is installed
297
+ thinned = cv2.ximgproc.thinning(binary)
298
+ except AttributeError:
299
+ # Fallback: Morphological skeletonization
300
+ skel = np.zeros(binary.shape, np.uint8)
301
+ element = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
302
+ while True:
303
+ eroded = cv2.erode(binary, element)
304
+ temp = cv2.dilate(eroded, element)
305
+ temp = cv2.subtract(binary, temp)
306
+ skel = cv2.bitwise_or(skel, temp)
307
+ binary = eroded.copy()
308
+ if cv2.countNonZero(binary) == 0:
309
+ break
310
+ thinned = skel
311
+
312
+ # Invert back if needed (for white lines on black background)
313
+ thinned = 255 - thinned
314
+
315
+ # Save result
316
+ output_path = self.get_output_path(output_folder, file_name)
317
+ cv2.imwrite(output_path, thinned)
318
+ return output_path
319
+
320
+ def thin_lines_direct(self, image_path, file_name="thin_lines_direct.jpg", output_folder=None):
321
+ output_folder = output_folder or self.config.output_folder
322
+
323
+ # Read image
324
+ img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
325
+ if img is None:
326
+ raise ValueError("Could not load image")
327
+
328
+ # Convert to binary (0 = black lines, 255 = white background)
329
+ _, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
330
+
331
+ # Create result image (start with white background)
332
+ result = np.full_like(binary, 255) # All white
333
+
334
  height, width = binary.shape
335
+ print("Processing thick lines...")
336
+
337
+ # Method 1: Scan rows - for each thick horizontal segment, keep only bottom pixel
338
+ print("Step 1: Thinning horizontal segments...")
339
+ for row in range(height):
340
+ col = 0
341
+ while col < width:
342
+ # If we hit a black pixel
343
+ if binary[row, col] == 0: # Black pixel
344
+ # Find the end of this horizontal segment
345
+ start_col = col
346
+ while col < width and binary[row, col] == 0:
347
+ col += 1
348
+ end_col = col - 1
349
+
350
+ # For this horizontal segment, check if it's part of a thick vertical region
351
+ segment_width = end_col - start_col + 1
352
+
353
+ if segment_width >= 1: # Any horizontal segment
354
+ # Check how thick this region is vertically at the middle
355
+ mid_col = (start_col + end_col) // 2
356
+
357
+ # Find vertical thickness at this point
358
+ thickness = self.get_vertical_thickness(binary, row, mid_col)
359
+
360
+ if thickness > 1:
361
+ # This is part of a thick region - keep only the bottom pixel
362
+ bottom_row = row + thickness - 1
363
+ if bottom_row < height:
364
+ result[bottom_row, start_col:end_col+1] = 0 # Draw black line
365
+ else:
366
+ # Already thin - keep as is
367
+ result[row, start_col:end_col+1] = 0
368
+ else:
369
+ col += 1
370
+
371
+ # Save step 1
372
+ # cv2.imwrite(f'{self.config.output_folder}/step1_horizontal_thinned.png', result)
373
+
374
+ # Method 2: Scan columns - for each thick vertical segment, keep only right pixel
375
+ print("Step 2: Thinning vertical segments...")
376
+
377
+ # Start fresh for vertical processing
378
+ result_v = np.full_like(binary, 255) # All white
379
+
380
+ for col in range(width):
381
+ row = 0
382
+ while row < height:
383
+ # If we hit a black pixel
384
+ if binary[row, col] == 0: # Black pixel
385
+ # Find the end of this vertical segment
386
+ start_row = row
387
+ while row < height and binary[row, col] == 0:
388
+ row += 1
389
+ end_row = row - 1
390
+
391
+ segment_height = end_row - start_row + 1
392
+
393
+ if segment_height >= 1: # Any vertical segment
394
+ # Check how thick this region is horizontally at the middle
395
+ mid_row = (start_row + end_row) // 2
396
+
397
+ # Find horizontal thickness at this point
398
+ thickness = self.get_horizontal_thickness(binary, mid_row, col)
399
+
400
+ if thickness > 1:
401
+ # This is part of a thick region - keep only the right pixel
402
+ right_col = col + thickness - 1
403
+ if right_col < width:
404
+ result_v[start_row:end_row+1, right_col] = 0 # Draw black line
405
+ else:
406
+ # Already thin - keep as is
407
+ result_v[start_row:end_row+1, col] = 0
408
+ else:
409
+ row += 1
410
+
411
+ # Save step 2
412
+ # cv2.imwrite(f'{self.config.output_folder}/step2_vertical_thinned.png', result_v)
413
+
414
+ # Method 3: Combine both results
415
+ print("Step 3: Combining results...")
416
+ final_result = cv2.bitwise_and(result, result_v) # Keep both thin lines
417
+ output_path = self.get_output_path(output_folder, file_name)
418
+ cv2.imwrite(output_path, final_result)
419
+
420
+ return output_path
421
+
422
+ def get_vertical_thickness(self, binary, start_row, col):
423
+ """Get the vertical thickness of a black region starting from start_row, col"""
424
+ height = binary.shape[0]
425
+ thickness = 0
426
+
427
+ row = start_row
428
+ while row < height and binary[row, col] == 0: # Black pixel
429
+ thickness += 1
430
+ row += 1
431
+
432
+ return thickness
433
+
434
+ def get_horizontal_thickness(self, binary, row, start_col):
435
+ """Get the horizontal thickness of a black region starting from row, start_col"""
436
+ width = binary.shape[1]
437
+ thickness = 0
438
+
439
+ col = start_col
440
+ while col < width and binary[row, col] == 0: # Black pixel
441
+ thickness += 1
442
+ col += 1
443
+
444
+ return thickness
445
+
446
+ def remove_diagonal_only_cells(self, image_path, file_name="remove_diagonal_only_cells.jpg", output_folder=None):
447
+ output_folder = output_folder or self.config.output_folder
448
+ # Load the image in grayscale
449
+ img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
450
+ if img is None:
451
+ raise ValueError("Unable to load the image. Check the file path.")
452
+
453
+ # Threshold to binary (invert if lines are black on white background)
454
+ _, binary = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
455
+
456
+ # Pad image to handle border cells easily
457
+ padded = np.pad(binary, pad_width=1, mode='constant', constant_values=0)
458
+ rows, cols = binary.shape
459
+ output = padded.copy()
460
+
461
+ # Scan each cell (excluding padding)
462
+ for r in range(1, rows + 1):
463
+ for c in range(1, cols + 1):
464
+ if padded[r, c] == 255: # Assuming white (255) represents active cells/lines
465
+ # Get 8 neighbors
466
+ neighbors = {
467
+ 'top_left': padded[r-1, c-1],
468
+ 'top': padded[r-1, c],
469
+ 'top_right': padded[r-1, c+1],
470
+ 'left': padded[r, c-1],
471
+ 'right': padded[r, c+1],
472
+ 'bottom_left': padded[r+1, c-1],
473
+ 'bottom': padded[r+1, c],
474
+ 'bottom_right': padded[r+1, c+1]
475
+ }
476
+
477
+ # Helper: Count active neighbors (255)
478
+ active_count = sum(1 for v in neighbors.values() if v == 255)
479
+
480
+ # Conditions as specified:
481
+ # 1) Only top-left and bottom-right
482
+ cond1 = (neighbors['top_left'] == 255 and neighbors['bottom_right'] == 255 and
483
+ active_count == 2)
484
+
485
+ # 2) Only top-left
486
+ cond2 = (neighbors['top_left'] == 255 and active_count == 1)
487
+
488
+ # 3) Only bottom-right
489
+ cond3 = (neighbors['bottom_right'] == 255 and active_count == 1)
490
+
491
+ # 4) Only top-right and bottom-left
492
+ cond4 = (neighbors['top_right'] == 255 and neighbors['bottom_left'] == 255 and
493
+ active_count == 2)
494
+
495
+ # 5) Only top-right
496
+ cond5 = (neighbors['top_right'] == 255 and active_count == 1)
497
+
498
+ # 6) Only bottom-left
499
+ cond6 = (neighbors['bottom_left'] == 255 and active_count == 1)
500
+
501
+ # Remove cell if any condition matches (set to 0)
502
+ if cond1 or cond2 or cond3 or cond4 or cond5 or cond6:
503
+ output[r, c] = 0
504
+
505
+ # Remove padding and invert back to original style (black lines on white)
506
+ cleaned = output[1:-1, 1:-1]
507
+ result = cv2.bitwise_not(cleaned)
508
+
509
+ # Save the result
510
+ output_path = self.get_output_path(output_folder, file_name)
511
+ cv2.imwrite(output_path, result)
512
+ return output_path
513
+
514
+ def remove_small_continuity_components(self, image_path, file_name="remove_small_continuity_components.jpg", output_folder=None):
515
+ output_folder = output_folder or self.config.output_folder
516
+ # Load the image in grayscale
517
+ img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
518
+ if img is None:
519
+ raise ValueError("Unable to load the image. Check the file path.")
520
+
521
+ height, width = img.shape
522
+ continuity_threshold = height * self.config.min_height_ratio
523
+ # Threshold to binary (invert if lines are black on white background)
524
+ _, binary = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
525
+
526
+ # Perform connected component labeling (8-connectivity)
527
+ num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(binary, connectivity=8)
528
+
529
+ # Create a copy for output
530
+ output = binary.copy()
531
+
532
+ # Iterate over components (skip label 0, which is background)
533
+ for label in tqdm(range(1, num_labels), desc="Processing labels"):
534
+ # Get the size (area) of the component
535
+ size = stats[label, cv2.CC_STAT_AREA]
536
+
537
+ # If size is below threshold, remove the component (set to 0)
538
+ if size < continuity_threshold:
539
+ output[labels == label] = 0
540
+
541
+ # Invert back to original style (black lines on white)
542
+ result = cv2.bitwise_not(output)
543
+
544
+ # Save the result
545
+ output_path = self.get_output_path(output_folder, file_name)
546
+ cv2.imwrite(output_path, result)
547
+ return output_path
548
+
549
+ def connect_horizontal_vertical_gaps(self, image_path, file_name='connected_output.jpg', output_folder=None):
550
+ output_folder = output_folder or self.config.output_folder
551
+
552
+ # Load the image in grayscale
553
+ img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
554
+ if img is None:
555
+ raise ValueError("Unable to load the image. Check the file path.")
556
+ height, width = img.shape
557
+ # Threshold to binary (invert if lines are black on white background)
558
+ _, binary = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
559
+
560
+ rows, cols = binary.shape
561
+ canvas = binary.copy() # Work on a copy (lines=255 on black)
562
+
563
+ gap_threshold = width * self.config.min_width_ratio
564
+ # Scan row by row to connect small horizontal gaps
565
+ for r in range(rows):
566
+ col = 0
567
+ while col < cols:
568
+ if canvas[r, col] == 255:
569
+ # Find start and end of current segment
570
+ start = col
571
+ while col < cols and canvas[r, col] == 255:
572
+ col += 1
573
+ end = col - 1
574
+
575
+ # Look for next segment in the same row
576
+ next_start = col
577
+ while next_start < cols and canvas[r, next_start] == 0:
578
+ next_start += 1
579
+ if next_start < cols:
580
+ gap = next_start - end - 1
581
+ if gap >= 0 and gap <= gap_threshold:
582
+ # Fill the gap
583
+ for fill_col in range(end + 1, next_start):
584
+ canvas[r, fill_col] = 255
585
+ col = next_start # Jump to next segment
586
+ else:
587
+ col = next_start
588
+ else:
589
+ col = next_start
590
+ else:
591
+ col += 1
592
+ gap_threshold = height * self.config.min_height_ratio
593
+ # Scan column by column to connect small vertical gaps
594
+ for c in range(cols):
595
+ row = 0
596
+ while row < rows:
597
+ if canvas[row, c] == 255:
598
+ # Find start and end of current segment
599
+ start = row
600
+ while row < rows and canvas[row, c] == 255:
601
+ row += 1
602
+ end = row - 1
603
+
604
+ # Look for next segment in the same column
605
+ next_start = row
606
+ while next_start < rows and canvas[next_start, c] == 0:
607
+ next_start += 1
608
+ if next_start < rows:
609
+ gap = next_start - end - 1
610
+ if gap >= 0 and gap <= gap_threshold:
611
+ # Fill the gap
612
+ for fill_row in range(end + 1, next_start):
613
+ canvas[fill_row, c] = 255
614
+ row = next_start # Jump to next segment
615
+ else:
616
+ row = next_start
617
+ else:
618
+ row = next_start
619
+ else:
620
+ row += 1
621
+
622
+ # Invert back to original style (black lines on white)
623
+ result = cv2.bitwise_not(canvas)
624
+
625
+ # Save the result
626
+ output_path = self.get_output_path(output_folder, file_name)
627
+ cv2.imwrite(output_path, result)
628
+ return output_path
comic_panel_extractor/main.py CHANGED
@@ -1,9 +1,9 @@
1
- from .text_detector import TextDetector
2
  from .config import Config
3
  from .image_processor import ImageProcessor
4
  from .panel_extractor import PanelData
5
  from .panel_extractor import PanelExtractor
6
- from .panel_segmentation import main as main_panel_segmentation
7
 
8
  from typing import List, Tuple
9
  from pathlib import Path
@@ -28,41 +28,37 @@ class ComicPanelExtractor:
28
  """Complete pipeline to extract panels from a comic image."""
29
  print(f"Starting panel extraction for: {self.config.input_path}")
30
 
31
- processed_image_path = main_panel_segmentation(self.config.output_folder, self.config.input_path, self.config.input_path)
32
  self.config.black_overlay_input_path = processed_image_path
33
 
34
- # Step 1: Detect and mask text regions
35
- # text_bubbles = self._detect_text_bubbles()
36
- # processed_image_path = self.image_processor.mask_text_regions(processed_image_path, [bubble["bbox"] for bubble in text_bubbles])
37
 
38
- # Step 2: Preprocess image
39
- _, _, processed_image_path, is_inverted = self.image_processor.preprocess_image(processed_image_path)
 
 
 
 
 
 
 
 
 
40
 
41
- if is_inverted:
42
- # Step 3: Remove Inner Sketch
43
- processed_image_path = self.image_processor.remove_inner_sketch(processed_image_path)
 
 
44
 
45
- # Step 4: Thin border line
46
  processed_image_path = self.image_processor.thin_image_borders(processed_image_path)
47
 
48
- # Step 3: Clean dilated image
49
- # processed_image_path = self.image_processor.clean_dilated_image(processed_image_path)
50
-
51
- # Step 5: Extract panels
52
  panel_images, panel_data, all_panel_path = self.panel_extractor.extract_panels(
53
  processed_image_path
54
  )
55
 
56
  return panel_images, panel_data, all_panel_path
57
 
58
- def _detect_text_bubbles(self) -> List[dict]:
59
- """Detect text bubbles in the comic image."""
60
- with TextDetector(self.config) as text_detector:
61
- bubbles_path = text_detector.detect_and_group_text()
62
-
63
- with open(bubbles_path, "r", encoding="utf-8") as f:
64
- return json.load(f)
65
-
66
  def cleanup(self):
67
  """Clean up temporary files if needed."""
68
  # Add cleanup logic here if needed
 
1
+ # from .text_detector import TextDetector
2
  from .config import Config
3
  from .image_processor import ImageProcessor
4
  from .panel_extractor import PanelData
5
  from .panel_extractor import PanelExtractor
6
+ from .panel_segmentation import main as basic_panel_segmentation
7
 
8
  from typing import List, Tuple
9
  from pathlib import Path
 
28
  """Complete pipeline to extract panels from a comic image."""
29
  print(f"Starting panel extraction for: {self.config.input_path}")
30
 
31
+ processed_image_path = basic_panel_segmentation(self.config.output_folder, self.config.input_path, self.config.input_path)
32
  self.config.black_overlay_input_path = processed_image_path
33
 
34
+ _, _, processed_image_path = self.image_processor.preprocess_image(processed_image_path)
 
 
35
 
36
+ processed_image_path = self.image_processor.thin_image_borders(processed_image_path)
37
+
38
+ processed_image_path = self.image_processor.remove_dangling_lines(processed_image_path)
39
+
40
+ processed_image_path = self.image_processor.remove_diagonal_only_cells(processed_image_path)
41
+
42
+ processed_image_path = self.image_processor.remove_small_continuity_components(processed_image_path)
43
+
44
+ processed_image_path = self.image_processor.thick_black(processed_image_path)
45
+
46
+ processed_image_path = self.image_processor.remove_small_regions(processed_image_path)
47
 
48
+ processed_image_path = self.image_processor.remove_diagonal_lines(processed_image_path)
49
+
50
+ processed_image_path = self.image_processor.remove_small_regions(processed_image_path)
51
+
52
+ processed_image_path = self.image_processor.connect_horizontal_vertical_gaps(processed_image_path)
53
 
 
54
  processed_image_path = self.image_processor.thin_image_borders(processed_image_path)
55
 
 
 
 
 
56
  panel_images, panel_data, all_panel_path = self.panel_extractor.extract_panels(
57
  processed_image_path
58
  )
59
 
60
  return panel_images, panel_data, all_panel_path
61
 
 
 
 
 
 
 
 
 
62
  def cleanup(self):
63
  """Clean up temporary files if needed."""
64
  # Add cleanup logic here if needed
comic_panel_extractor/panel_extractor.py CHANGED
@@ -5,6 +5,7 @@ import numpy as np
5
  import cv2
6
  from dataclasses import dataclass
7
  import os
 
8
 
9
  @dataclass
10
  class PanelData:
@@ -80,25 +81,23 @@ class PanelExtractor:
80
  # Forcefully include first and last row
81
  if 0 not in black_rows:
82
  black_rows.insert(0, 0)
83
- if (height - 1) not in black_rows:
84
- black_rows.append(height - 1)
85
 
 
86
  # Group consecutive rows into gutters
87
  row_gutters = []
88
  if black_rows:
89
  start_row = black_rows[0]
90
- prev_row = black_rows[0]
91
- for y in black_rows:
92
- if y != start_row:
93
- # Only extend if combined height meets min_height_ratio
94
- combined_height = y - start_row + 1
95
- if combined_height / height >= self.config.min_height_ratio:
96
- prev_row = y
97
- row_gutters.append((start_row, prev_row))
98
- start_row = y
99
-
100
- if start_row != prev_row:
101
- row_gutters.append((start_row, prev_row)) # Add last gutter
102
 
103
  print(f"✅ Detected panel row gutters: {row_gutters}")
104
 
@@ -236,6 +235,35 @@ class PanelExtractor:
236
  if fname.startswith("panel_") and os.path.isfile(os.path.join(folder_path, fname))
237
  ])
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  def _save_panels(self, panels: List[Tuple[int, int, int, int]], original: np.ndarray, width: int, height: int) -> Tuple[List[np.ndarray], List[PanelData], List[str]]:
240
  """Save panel images and return panel data."""
241
  visual_output = original.copy()
@@ -247,32 +275,46 @@ class PanelExtractor:
247
  black_overlay_input = cv2.imread(self.config.black_overlay_input_path)
248
 
249
  image_area = width * height
250
- maybe_full_page_panel = None # Store panel that is ≥90% of the page
 
 
 
251
 
252
  for idx, (x1, y1, x2, y2) in enumerate(panels, 1):
253
  # Extract panel image from black_overlay_input
254
  panel_img = black_overlay_input[y1:y2, x1:x2]
255
 
256
- # Check for mostly black content
257
  gray = cv2.cvtColor(panel_img, cv2.COLOR_BGR2GRAY)
258
- black_pixels = np.sum(gray < 30)
259
  total_pixels = gray.size
 
 
260
  black_ratio = black_pixels / total_pixels
 
261
 
262
  if black_ratio > 0.8:
263
  print(f"⚠️ Skipping panel #{idx} — {round(black_ratio * 100, 2)}% black")
264
  continue
 
 
 
265
  else:
266
- print(f"✅ Black ratio panel #{idx} — {round(black_ratio * 100, 2)}% black")
267
 
268
- # Check if this panel is ≥90% of the full image
269
  panel_area = (x2 - x1) * (y2 - y1)
270
  if panel_area >= 0.9 * image_area:
271
  print(f"⚠️ Panel #{idx} covers ≥90% of the image — marked for potential use only")
272
  maybe_full_page_panel = (idx, (x1, y1, x2, y2))
273
- continue # Skip for now
 
 
 
274
 
275
- # Save valid smaller panel
 
 
 
 
276
  panel_img = visual_output[y1:y2, x1:x2]
277
  panel_images.append(panel_img)
278
  panel_info = PanelData.from_coordinates(x1, y1, x2, y2)
@@ -285,9 +327,9 @@ class PanelExtractor:
285
 
286
  cv2.rectangle(visual_output, (x1, y1), (x2, y2), (0, 255, 0), 2)
287
  cv2.putText(visual_output, f"#{idx}", (x1+5, y1+25),
288
- cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
289
 
290
- # If no valid panels were saved, and we had a full-page one, save it
291
  if not panel_images and maybe_full_page_panel and panel_idx == 0:
292
  idx, (x1, y1, x2, y2) = maybe_full_page_panel
293
  panel_img = visual_output[y1:y2, x1:x2]
@@ -302,7 +344,7 @@ class PanelExtractor:
302
 
303
  cv2.rectangle(visual_output, (x1, y1), (x2, y2), (255, 0, 0), 2)
304
  cv2.putText(visual_output, f"#full", (x1+5, y1+25),
305
- cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
306
  print(f"✅ Saved full-page panel as fallback")
307
 
308
  # Save final visualization
@@ -311,3 +353,4 @@ class PanelExtractor:
311
 
312
  print(f"✅ Extracted {len(panel_images)} panels after filtering.")
313
  return panel_images, panel_data, all_panel_path
 
 
5
  import cv2
6
  from dataclasses import dataclass
7
  import os
8
+ import re
9
 
10
  @dataclass
11
  class PanelData:
 
81
  # Forcefully include first and last row
82
  if 0 not in black_rows:
83
  black_rows.insert(0, 0)
84
+ if (height) not in black_rows:
85
+ black_rows.append(height)
86
 
87
+ print(f'📄 Row Points:: {black_rows}')
88
  # Group consecutive rows into gutters
89
  row_gutters = []
90
  if black_rows:
91
  start_row = black_rows[0]
92
+ for i, end_row in enumerate(black_rows):
93
+ # Only extend if combined height meets min_height_ratio
94
+ combined_height = end_row - start_row
95
+ if combined_height / height >= self.config.min_height_ratio:
96
+ print(f'📄 {i+1}) Start: {start_row:04d} | End: {end_row:04d} | Total: {combined_height:04d} | Ratio: {(combined_height / height):04f}')
97
+ row_gutters.append((start_row, end_row))
98
+ start_row = end_row
99
+ elif len(black_rows) == i + 1:
100
+ row_gutters[-1] = (row_gutters[-1][0], end_row)
 
 
 
101
 
102
  print(f"✅ Detected panel row gutters: {row_gutters}")
103
 
 
235
  if fname.startswith("panel_") and os.path.isfile(os.path.join(folder_path, fname))
236
  ])
237
 
238
+ def load_existing_panels_from_folder(self, folder: str) -> List[Tuple[int, int, int, int]]:
239
+ """
240
+ Parses filenames like 'panel_1_(1006, 176, 1757, 1085).jpg' and extracts coordinates.
241
+ """
242
+ pattern = re.compile(r"panel_\d+_\((\d+), (\d+), (\d+), (\d+)\)\.jpg")
243
+ coords = []
244
+ for fname in os.listdir(folder):
245
+ match = pattern.match(fname)
246
+ if match:
247
+ coords.append(tuple(map(int, match.groups())))
248
+ return coords
249
+
250
+ def is_fully_contained(self, x1: int, y1: int, x2: int, y2: int,
251
+ boxes: List[Tuple[int, int, int, int]],
252
+ threshold: int = 200) -> bool:
253
+ for ex1, ey1, ex2, ey2 in boxes:
254
+ # Case 1: Fully contained
255
+ if x1 >= ex1 and y1 >= ey1 and x2 <= ex2 and y2 <= ey2:
256
+ return True
257
+
258
+ # Case 2: Near containment (within threshold)
259
+ if (
260
+ x1 >= ex1 - threshold and y1 >= ey1 - threshold and
261
+ x2 <= ex2 + threshold and y2 <= ey2 + threshold
262
+ ):
263
+ return True
264
+
265
+ return False
266
+
267
  def _save_panels(self, panels: List[Tuple[int, int, int, int]], original: np.ndarray, width: int, height: int) -> Tuple[List[np.ndarray], List[PanelData], List[str]]:
268
  """Save panel images and return panel data."""
269
  visual_output = original.copy()
 
275
  black_overlay_input = cv2.imread(self.config.black_overlay_input_path)
276
 
277
  image_area = width * height
278
+ maybe_full_page_panel = None
279
+
280
+ # Load existing panels from disk
281
+ existing_coords = self.load_existing_panels_from_folder(self.config.output_folder)
282
 
283
  for idx, (x1, y1, x2, y2) in enumerate(panels, 1):
284
  # Extract panel image from black_overlay_input
285
  panel_img = black_overlay_input[y1:y2, x1:x2]
286
 
287
+ # Check for mostly black/white
288
  gray = cv2.cvtColor(panel_img, cv2.COLOR_BGR2GRAY)
 
289
  total_pixels = gray.size
290
+ black_pixels = np.sum(gray < 30)
291
+ white_pixels = np.sum(gray > 240)
292
  black_ratio = black_pixels / total_pixels
293
+ white_ratio = white_pixels / total_pixels
294
 
295
  if black_ratio > 0.8:
296
  print(f"⚠️ Skipping panel #{idx} — {round(black_ratio * 100, 2)}% black")
297
  continue
298
+ elif white_ratio > 0.9:
299
+ print(f"⚠️ Skipping panel #{idx} — {round(white_ratio * 100, 2)}% white")
300
+ continue
301
  else:
302
+ print(f"✅ Panel #{idx} — {round(black_ratio * 100, 2)}% black, {round(white_ratio * 100, 2)}% white")
303
 
 
304
  panel_area = (x2 - x1) * (y2 - y1)
305
  if panel_area >= 0.9 * image_area:
306
  print(f"⚠️ Panel #{idx} covers ≥90% of the image — marked for potential use only")
307
  maybe_full_page_panel = (idx, (x1, y1, x2, y2))
308
+ continue
309
+
310
+ # Check for full containment in existing and current session
311
+ already_saved_coords = existing_coords + [ (pd.x_start, pd.y_start, pd.x_end, pd.y_end) for pd in panel_data ]
312
 
313
+ if self.is_fully_contained(x1, y1, x2, y2, already_saved_coords):
314
+ print(f"⚠️ Skipping panel #{idx} — fully contained in existing panel")
315
+ continue
316
+
317
+ # Save panel
318
  panel_img = visual_output[y1:y2, x1:x2]
319
  panel_images.append(panel_img)
320
  panel_info = PanelData.from_coordinates(x1, y1, x2, y2)
 
327
 
328
  cv2.rectangle(visual_output, (x1, y1), (x2, y2), (0, 255, 0), 2)
329
  cv2.putText(visual_output, f"#{idx}", (x1+5, y1+25),
330
+ cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
331
 
332
+ # If no valid panels and full-page backup exists
333
  if not panel_images and maybe_full_page_panel and panel_idx == 0:
334
  idx, (x1, y1, x2, y2) = maybe_full_page_panel
335
  panel_img = visual_output[y1:y2, x1:x2]
 
344
 
345
  cv2.rectangle(visual_output, (x1, y1), (x2, y2), (255, 0, 0), 2)
346
  cv2.putText(visual_output, f"#full", (x1+5, y1+25),
347
+ cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
348
  print(f"✅ Saved full-page panel as fallback")
349
 
350
  # Save final visualization
 
353
 
354
  print(f"✅ Extracted {len(panel_images)} panels after filtering.")
355
  return panel_images, panel_data, all_panel_path
356
+
comic_panel_extractor/panel_segmentation.py CHANGED
@@ -8,13 +8,16 @@ from skimage import measure
8
  from scipy import ndimage as ndi
9
  import re
10
  from skimage.morphology import remove_small_holes
 
 
11
 
 
12
 
13
  def extract_fully_white_panels(
14
  original_image: np.ndarray,
15
  segmentation_mask: np.ndarray,
16
  output_dir: str = "panel_output",
17
- debug_region_dir: str = "panel_debug_regions",
18
  min_area_ratio: float = 0.05,
19
  min_width_ratio: float = 0.05,
20
  min_height_ratio: float = 0.05,
@@ -71,9 +74,9 @@ def extract_fully_white_panels(
71
  w < min_width_ratio * img_w or
72
  h < min_height_ratio * img_h
73
  ):
74
- if save_debug:
75
- cropped_img.save(os.path.join(debug_region_dir, f"{crop_name_prefix}_too_small_orig.jpg"))
76
- mask_pil.save(os.path.join(debug_region_dir, f"{crop_name_prefix}_too_small_mask.jpg"))
77
  continue
78
 
79
  # 2. Check if region is mostly white (allow small % of black)
@@ -81,7 +84,7 @@ def extract_fully_white_panels(
81
  total_pixels = region.image.size
82
  black_ratio = black_pixel_count / total_pixels
83
 
84
- if black_ratio > 0.05: # Allow up to 1% black pixels
85
  print(f"❌ Black ratio panel #{idx} — {round(black_ratio * 100, 2)}% black")
86
  # Save debug info if desired
87
  if save_debug:
@@ -126,6 +129,65 @@ def extract_fully_white_panels(
126
 
127
  return saved_panels
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  def create_segmentation_mask(image: np.ndarray, save_debug: bool = True) -> np.ndarray:
131
  """
@@ -139,35 +201,102 @@ def create_segmentation_mask(image: np.ndarray, save_debug: bool = True) -> np.n
139
  Binary segmentation mask
140
  """
141
  if save_debug:
142
- os.makedirs("panel_debug_steps", exist_ok=True)
143
- Image.fromarray(image).save("panel_debug_steps/step1_original.jpg")
144
 
145
  # Convert to grayscale
146
  grayscale = rgb2gray(image)
147
  if save_debug:
148
  gray_uint8 = (grayscale * 255).astype('uint8')
149
  # Fix for Pillow warning: Remove mode parameter
150
- Image.fromarray(gray_uint8).save("panel_debug_steps/step2_grayscale.jpg")
151
 
152
  # Edge detection
153
  edges = canny(grayscale)
 
154
  if save_debug:
155
- edges_uint8 = (edges * 255).astype('uint8')
156
- # Fix for Pillow warning: Remove mode parameter
157
- Image.fromarray(edges_uint8).save("panel_debug_steps/step3_edges.jpg")
158
 
 
 
 
159
  # Fill holes in edges
160
- segmentation = ndi.binary_fill_holes(edges)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  # ✅ Remove small black clusters (holes in white regions)
163
  segmentation_cleaned = remove_small_holes(segmentation, area_threshold=500) # adjust threshold as needed
164
 
165
  if save_debug:
166
  segmentation_uint8 = (segmentation_cleaned * 255).astype('uint8')
167
- Image.fromarray(segmentation_uint8).save("panel_debug_steps/step4_segmentation_filled.jpg")
168
 
169
  return segmentation_cleaned
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  def create_image_with_panels_removed(
173
  original_image: np.ndarray,
@@ -189,7 +318,7 @@ def create_image_with_panels_removed(
189
  original_image=original_image,
190
  segmentation_mask=segmentation_mask,
191
  output_dir=output_folder,
192
- debug_region_dir="panel_debug_regions",
193
  save_debug=save_debug
194
  )
195
 
@@ -198,17 +327,18 @@ def create_image_with_panels_removed(
198
  draw = ImageDraw.Draw(im_no_panels)
199
 
200
  # Get regions and black them out
201
- labeled_mask = measure.label(segmentation_mask)
202
- regions = measure.regionprops(labeled_mask)
203
- pattern = re.compile(r"panel_\d+_\((\d+), (\d+), (\d+), (\d+)\)\.jpg")
204
 
205
- for panel_path in saved_panels:
206
- # Extract panel index from filename with bbox format
207
- panel_name = os.path.basename(panel_path)
208
- match = pattern.match(panel_name)
209
- minc, minr, maxc, maxr = map(int, match.groups())
 
 
210
 
211
- draw.rectangle([minc, minr, maxc, maxr], fill=(0, 0, 0))
212
 
213
  # Save the result
214
  im_no_panels.save(output_path)
@@ -219,11 +349,31 @@ def main(output_folder, input_image_path, original_image_path):
219
  # Load the input image
220
  image = imageio.imread(input_image_path)
221
  original_image = imageio.imread(original_image_path)
222
- save_debug = False
223
  # Create segmentation mask
224
  segmentation_mask = create_segmentation_mask(image, save_debug=save_debug)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
- pre_process_path = f"{output_folder}/original_with_panels_removed.jpg"
227
  # Create image with panels removed
228
  create_image_with_panels_removed(
229
  original_image=original_image,
 
8
  from scipy import ndimage as ndi
9
  import re
10
  from skimage.morphology import remove_small_holes
11
+ from .image_processor import ImageProcessor
12
+ import cv2
13
 
14
+ pattern = re.compile(r"panel_\d+_\((\d+), (\d+), (\d+), (\d+)\)\.jpg")
15
 
16
  def extract_fully_white_panels(
17
  original_image: np.ndarray,
18
  segmentation_mask: np.ndarray,
19
  output_dir: str = "panel_output",
20
+ debug_region_dir: str = "temp_dir/panel_debug_regions",
21
  min_area_ratio: float = 0.05,
22
  min_width_ratio: float = 0.05,
23
  min_height_ratio: float = 0.05,
 
74
  w < min_width_ratio * img_w or
75
  h < min_height_ratio * img_h
76
  ):
77
+ # if save_debug:
78
+ # cropped_img.save(os.path.join(debug_region_dir, f"{crop_name_prefix}_too_small_orig.jpg"))
79
+ # mask_pil.save(os.path.join(debug_region_dir, f"{crop_name_prefix}_too_small_mask.jpg"))
80
  continue
81
 
82
  # 2. Check if region is mostly white (allow small % of black)
 
84
  total_pixels = region.image.size
85
  black_ratio = black_pixel_count / total_pixels
86
 
87
+ if black_ratio > 0.1: # Allow up to 1% black pixels
88
  print(f"❌ Black ratio panel #{idx} — {round(black_ratio * 100, 2)}% black")
89
  # Save debug info if desired
90
  if save_debug:
 
129
 
130
  return saved_panels
131
 
132
+ def get_region_count(binary_seg):
133
+ labeled_mask = measure.label(binary_seg)
134
+ regions = measure.regionprops(labeled_mask)
135
+
136
+ img_h, img_w = binary_seg.shape
137
+ image_area = img_h * img_w
138
+ count = 0
139
+ for idx, region in enumerate(regions):
140
+ minr, minc, maxr, maxc = region.bbox
141
+ w = maxc - minc
142
+ h = maxr - minr
143
+ area = w * h
144
+
145
+ if (
146
+ area < 0.05 * image_area or
147
+ w < 0.05 * img_w or
148
+ h < 0.05 * img_h
149
+ ):
150
+ continue
151
+
152
+ count += 1
153
+
154
+ return count
155
+
156
+ def get_black_white_ratio(image_path, threshold=128):
157
+ """
158
+ Calculates the ratio of black and white pixels in a binary image.
159
+
160
+ Parameters:
161
+ image_path (str): Path to the image file.
162
+ threshold (int): Threshold value for binarization (default: 128).
163
+
164
+ Returns:
165
+ dict: Dictionary with black_ratio, white_ratio, black_count, white_count, total_pixels.
166
+ """
167
+ # Load image in grayscale
168
+ img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
169
+
170
+ if img is None:
171
+ raise FileNotFoundError(f"Image not found: {image_path}")
172
+
173
+ # Convert to binary using the given threshold
174
+ _, binary = cv2.threshold(img, threshold, 255, cv2.THRESH_BINARY)
175
+
176
+ total_pixels = binary.size
177
+ white_count = np.count_nonzero(binary == 255)
178
+ black_count = total_pixels - white_count
179
+
180
+ black_ratio = black_count / total_pixels
181
+ white_ratio = white_count / total_pixels
182
+
183
+ return {
184
+ "black_ratio": black_ratio,
185
+ "white_ratio": white_ratio,
186
+ "black_count": black_count,
187
+ "white_count": white_count,
188
+ "total_pixels": total_pixels
189
+ }
190
+
191
 
192
  def create_segmentation_mask(image: np.ndarray, save_debug: bool = True) -> np.ndarray:
193
  """
 
201
  Binary segmentation mask
202
  """
203
  if save_debug:
204
+ os.makedirs("temp_dir/panel_debug_steps", exist_ok=True)
205
+ Image.fromarray(image).save("temp_dir/panel_debug_steps/step1_original.jpg")
206
 
207
  # Convert to grayscale
208
  grayscale = rgb2gray(image)
209
  if save_debug:
210
  gray_uint8 = (grayscale * 255).astype('uint8')
211
  # Fix for Pillow warning: Remove mode parameter
212
+ Image.fromarray(gray_uint8).save("temp_dir/panel_debug_steps/step2_grayscale.jpg")
213
 
214
  # Edge detection
215
  edges = canny(grayscale)
216
+ edges_uint8 = (edges * 255).astype('uint8')
217
  if save_debug:
218
+ Image.fromarray(edges_uint8).save("temp_dir/panel_debug_steps/step3_edges.jpg")
 
 
219
 
220
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
221
+ seg = cv2.dilate(edges_uint8, kernel, iterations=2)
222
+ seg = cv2.ximgproc.thinning(seg)
223
  # Fill holes in edges
224
+ segmentation = ndi.binary_fill_holes(seg)
225
+ # Ensure it's a NumPy boolean or 0/1 array
226
+ binary_seg = segmentation.astype(np.uint8)
227
+
228
+ # Count white and black pixels
229
+ total_pixels = binary_seg.size
230
+ white_pixels = np.count_nonzero(binary_seg) # 1s
231
+
232
+ # Ratios
233
+ white_ratio = white_pixels / total_pixels
234
+
235
+ region_count = get_region_count(binary_seg)
236
+ if white_ratio > 0.8 or region_count == 1:
237
+ print(f"⚠️ white is maximum hence reverting to only binary_fill_holes")
238
+ # Fill holes in edges
239
+ segmentation = ndi.binary_fill_holes(edges)
240
 
241
  # ✅ Remove small black clusters (holes in white regions)
242
  segmentation_cleaned = remove_small_holes(segmentation, area_threshold=500) # adjust threshold as needed
243
 
244
  if save_debug:
245
  segmentation_uint8 = (segmentation_cleaned * 255).astype('uint8')
246
+ Image.fromarray(segmentation_uint8).save("temp_dir/panel_debug_steps/step4_segmentation_filled.jpg")
247
 
248
  return segmentation_cleaned
249
 
250
+ def boxes_are_close(box1, box2, thresh):
251
+ # Horizontal overlap or near
252
+ horiz_close = (box1[2] >= box2[0] - thresh and box1[0] <= box2[2] + thresh)
253
+ # Vertical overlap or near
254
+ vert_close = (box1[3] >= box2[1] - thresh and box1[1] <= box2[3] + thresh)
255
+ return horiz_close and vert_close
256
+
257
+ def merge_close_panels(saved_panels, draw, distance_thresh=20):
258
+ """Merge panels with close bounding boxes and fill them on draw object."""
259
+ # Step 1: Extract bounding boxes
260
+ boxes = []
261
+ for panel_path in saved_panels:
262
+ panel_name = os.path.basename(panel_path)
263
+ match = pattern.match(panel_name)
264
+ if match:
265
+ minc, minr, maxc, maxr = map(int, match.groups())
266
+ boxes.append([minc, minr, maxc, maxr])
267
+
268
+ # Step 2: Merge nearby boxes
269
+ merged = []
270
+ used = [False] * len(boxes)
271
+
272
+ for i in range(len(boxes)):
273
+ if used[i]:
274
+ continue
275
+ box1 = boxes[i]
276
+ merged_box = box1.copy()
277
+
278
+ for j in range(i + 1, len(boxes)):
279
+ if used[j]:
280
+ continue
281
+ box2 = boxes[j]
282
+
283
+ # Check if boxes are close (horizontal and vertical)
284
+ if boxes_are_close(box1, box2, distance_thresh):
285
+ # Merge boxes
286
+ merged_box = [
287
+ min(merged_box[0], box2[0]),
288
+ min(merged_box[1], box2[1]),
289
+ max(merged_box[2], box2[2]),
290
+ max(merged_box[3], box2[3])
291
+ ]
292
+ used[j] = True
293
+
294
+ used[i] = True
295
+ merged.append(merged_box)
296
+
297
+ # Step 3: Fill merged boxes
298
+ for box in merged:
299
+ draw.rectangle(box, fill=(0, 0, 0))
300
 
301
  def create_image_with_panels_removed(
302
  original_image: np.ndarray,
 
318
  original_image=original_image,
319
  segmentation_mask=segmentation_mask,
320
  output_dir=output_folder,
321
+ debug_region_dir="temp_dir/panel_debug_regions",
322
  save_debug=save_debug
323
  )
324
 
 
327
  draw = ImageDraw.Draw(im_no_panels)
328
 
329
  # Get regions and black them out
330
+ # labeled_mask = measure.label(segmentation_mask)
331
+ # regions = measure.regionprops(labeled_mask)
 
332
 
333
+ # for panel_path in saved_panels:
334
+ # # Extract panel index from filename with bbox format
335
+ # panel_name = os.path.basename(panel_path)
336
+ # match = pattern.match(panel_name)
337
+ # minc, minr, maxc, maxr = map(int, match.groups())
338
+
339
+ # draw.rectangle([minc, minr, maxc, maxr], fill=(0, 0, 0))
340
 
341
+ merge_close_panels(saved_panels, draw, distance_thresh=25)
342
 
343
  # Save the result
344
  im_no_panels.save(output_path)
 
349
  # Load the input image
350
  image = imageio.imread(input_image_path)
351
  original_image = imageio.imread(original_image_path)
352
+ save_debug = True
353
  # Create segmentation mask
354
  segmentation_mask = create_segmentation_mask(image, save_debug=save_debug)
355
+ segmentation_mask_output_path = f"temp_dir/panel_debug_steps/step4_segmentation_filled.jpg"
356
+
357
+ pixel_ratios = get_black_white_ratio(segmentation_mask_output_path)
358
+
359
+ if pixel_ratios['black_ratio'] < 0.8:
360
+ print(f"✅ black is less hence applying other features")
361
+ image_pros = ImageProcessor()
362
+ new_path = image_pros.thick_black(segmentation_mask_output_path, file_name="step5_thick.jpg", output_folder="temp_dir/panel_debug_steps")
363
+
364
+ new_path = image_pros.connect_horizontal_vertical_gaps(new_path, file_name="step6_continuity.jpg", output_folder="temp_dir/panel_debug_steps")
365
+
366
+ pixel_ratios = get_black_white_ratio(new_path)
367
+ if pixel_ratios['black_ratio'] < 0.8:
368
+ new_path = image_pros.thin_image_borders(new_path, file_name="step7_thin.jpg", output_folder="temp_dir/panel_debug_steps")
369
+
370
+ new_path = image_pros.remove_dangling_lines(new_path, file_name="step8_remove_dangling_lines.jpg", output_folder="temp_dir/panel_debug_steps")
371
+
372
+ new_path = image_pros.thick_black(new_path, file_name="step9_thick.jpg", output_folder="temp_dir/panel_debug_steps")
373
+
374
+ segmentation_mask = cv2.imread(new_path, cv2.IMREAD_GRAYSCALE)
375
 
376
+ pre_process_path = f"{output_folder}/00_original_with_panels_removed.jpg"
377
  # Create image with panels removed
378
  create_image_with_panels_removed(
379
  original_image=original_image,
requirements.txt CHANGED
@@ -6,4 +6,5 @@ fastapi
6
  uvicorn
7
  python-multipart
8
  jinja2
9
- scikit-image
 
 
6
  uvicorn
7
  python-multipart
8
  jinja2
9
+ scikit-image
10
+ imagehash