jebin2 commited on
Commit
6d5c490
·
1 Parent(s): 7f3435a
comic_panel_extractor/config.py CHANGED
@@ -11,8 +11,8 @@ class Config:
11
  text_cood_file_name: str = "detect_and_group_text.json"
12
  min_text_length: int = 2
13
  min_area_ratio: float = 0.05
14
- min_width_ratio: float = 0.05
15
- min_height_ratio: float = 0.1
16
 
17
  # Additional parameters for BorderPanelExtractor
18
  panel_filename_pattern: str = r"panel_\d+_\((\d+), (\d+), (\d+), (\d+)\)\.jpg"
 
11
  text_cood_file_name: str = "detect_and_group_text.json"
12
  min_text_length: int = 2
13
  min_area_ratio: float = 0.05
14
+ min_width_ratio: float = 0.15
15
+ min_height_ratio: float = 0.15
16
 
17
  # Additional parameters for BorderPanelExtractor
18
  panel_filename_pattern: str = r"panel_\d+_\((\d+), (\d+), (\d+), (\d+)\)\.jpg"
comic_panel_extractor/image_processor.py CHANGED
@@ -12,6 +12,7 @@ from tqdm import tqdm
12
  from PIL import Image
13
  import numpy as np
14
  from sklearn.cluster import KMeans
 
15
 
16
  class ImageProcessor:
17
  """Handles image preprocessing operations."""
@@ -247,6 +248,68 @@ class ImageProcessor:
247
  cv2.imwrite(output_path, result)
248
  return output_path
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  def remove_small_regions(self, image_path, file_name="remove_small_regions.jpg", output_folder=None):
251
  output_folder = output_folder or self.config.output_folder
252
 
@@ -277,7 +340,7 @@ class ImageProcessor:
277
  height = maxr - minr
278
 
279
  # Bounding box filter
280
- if (width < width_ * self.config.min_width_ratio or height < height_ * self.config.min_height_ratio):
281
  if (width/width_) < 0.9 and (height/height_) < 0.9:
282
  clean_mask[labeled == region.label] = 0 # Remove small region
283
  cv2.rectangle(visual, (minc, minr), (maxc, maxr), (0, 0, 255), 2)
@@ -292,19 +355,18 @@ class ImageProcessor:
292
  for line in lines:
293
  x1, y1, x2, y2 = line[0]
294
  angle = np.abs(np.arctan2(y2 - y1, x2 - x1) * 180.0 / np.pi)
295
- length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
296
-
297
- if 80 < angle < 100:
298
- if length / height_ > self.config.min_height_ratio:
299
- break # keep region
300
- elif angle < 10 or angle > 170:
301
- if length / width_ > self.config.min_width_ratio:
302
- break # keep region
303
  else:
 
304
  # If no qualifying line found, remove region
305
  clean_mask[labeled == region.label] = 0
306
  cv2.rectangle(visual, (minc, minr), (maxc, maxr), (0, 255, 255), 2)
307
- else:
308
  # No lines, remove region
309
  clean_mask[labeled == region.label] = 0
310
  cv2.rectangle(visual, (minc, minr), (maxc, maxr), (255, 0, 0), 2)
@@ -602,81 +664,117 @@ class ImageProcessor:
602
 
603
  def connect_horizontal_vertical_gaps(self, image_path, file_name='connected_output.jpg', output_folder=None):
604
  output_folder = output_folder or self.config.output_folder
605
-
606
- # Load the image in grayscale
607
- img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
608
- if img is None:
609
- raise ValueError("Unable to load the image. Check the file path.")
610
- height, width = img.shape
611
- # Threshold to binary (invert if lines are black on white background)
612
- _, binary = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
613
-
614
- rows, cols = binary.shape
615
- canvas = binary.copy() # Work on a copy (lines=255 on black)
616
 
617
- gap_threshold = width * self.config.min_width_ratio
618
- # Scan row by row to connect small horizontal gaps
619
- for r in range(rows):
620
- col = 0
621
- while col < cols:
622
- if canvas[r, col] == 255:
623
- # Find start and end of current segment
624
- start = col
625
- while col < cols and canvas[r, col] == 255:
626
- col += 1
627
- end = col - 1
628
-
629
- # Look for next segment in the same row
630
- next_start = col
631
- while next_start < cols and canvas[r, next_start] == 0:
632
- next_start += 1
633
- if next_start < cols:
634
- gap = next_start - end - 1
635
- if gap >= 0 and gap <= gap_threshold:
636
- # Fill the gap
637
- for fill_col in range(end + 1, next_start):
638
- canvas[r, fill_col] = 255
639
- col = next_start # Jump to next segment
640
- else:
641
- col = next_start
642
- else:
643
- col = next_start
644
- else:
645
- col += 1
646
- gap_threshold = height * self.config.min_height_ratio
647
- # Scan column by column to connect small vertical gaps
648
- for c in range(cols):
649
- row = 0
650
- while row < rows:
651
- if canvas[row, c] == 255:
652
- # Find start and end of current segment
653
- start = row
654
- while row < rows and canvas[row, c] == 255:
655
- row += 1
656
- end = row - 1
657
-
658
- # Look for next segment in the same column
659
- next_start = row
660
- while next_start < rows and canvas[next_start, c] == 0:
661
- next_start += 1
662
- if next_start < rows:
663
- gap = next_start - end - 1
664
- if gap >= 0 and gap <= gap_threshold:
665
- # Fill the gap
666
- for fill_row in range(end + 1, next_start):
667
- canvas[fill_row, c] = 255
668
- row = next_start # Jump to next segment
669
- else:
670
- row = next_start
671
- else:
672
- row = next_start
673
- else:
674
- row += 1
675
-
676
- # Invert back to original style (black lines on white)
677
- result = cv2.bitwise_not(canvas)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
678
 
679
  # Save the result
680
  output_path = self.get_output_path(output_folder, file_name)
681
- cv2.imwrite(output_path, result)
682
- return output_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  from PIL import Image
13
  import numpy as np
14
  from sklearn.cluster import KMeans
15
+ import math
16
 
17
  class ImageProcessor:
18
  """Handles image preprocessing operations."""
 
248
  cv2.imwrite(output_path, result)
249
  return output_path
250
 
251
+ def to_int_box(self, line):
252
+ return map(int, line[0]) # Works for both Hough and LSD formats
253
+
254
+ def remove_diagonal_lines_and_set_white(self, image_path, file_name="remove_diagonal_lines_and_set_white.jpg", output_folder=None):
255
+ output_folder = output_folder or self.config.output_folder
256
+ # Load image
257
+ image = cv2.imread(image_path)
258
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
259
+
260
+ # Edge detection
261
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
262
+ blurred = cv2.GaussianBlur(gray, (3, 3), 0)
263
+ edges = cv2.Canny(blurred, 50, 150, apertureSize=3)
264
+
265
+ # Dilate to connect broken segments
266
+ kernel = np.ones((2, 2), np.uint8)
267
+ edges = cv2.dilate(edges, kernel, iterations=1)
268
+
269
+ # More sensitive Hough transform
270
+ # HoughLinesP_lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=30, minLineLength=5, maxLineGap=10)
271
+
272
+ # Detect lines using Hough Transform
273
+ lsd = cv2.createLineSegmentDetector(0)
274
+ lines, _, _, _ = lsd.detect(gray)
275
+
276
+ # Copy image to edit
277
+ output = image.copy()
278
+
279
+ combined_lines = []
280
+
281
+ if lines is not None:
282
+ combined_lines.extend(lines)
283
+
284
+ # if HoughLinesP_lines is not None:
285
+ # combined_lines.extend(HoughLinesP_lines)
286
+
287
+ if combined_lines is not None:
288
+ for line in combined_lines:
289
+ x1, y1, x2, y2 = self.to_int_box(line) # Convert float to int
290
+
291
+ # Calculate angle
292
+ angle = np.abs(np.arctan2(y2 - y1, x2 - x1) * 180.0 / np.pi)
293
+
294
+ # Filter out horizontal and vertical lines
295
+ if (80 < angle < 100) or (170 < angle < 190) or angle < 10 or angle > 350:
296
+ continue
297
+ else:
298
+ # Get bounding box with padding
299
+ padding = 2
300
+ xmin = min(x1, x2) - padding
301
+ xmax = max(x1, x2) + padding
302
+ ymin = min(y1, y2) - padding
303
+ ymax = max(y1, y2) + padding
304
+
305
+ # Draw white rectangle (erase diagonal line)
306
+ cv2.rectangle(output, (xmin, ymin), (xmax, ymax), (255, 255, 255), thickness=-1)
307
+
308
+ # Save cleaned image
309
+ output_path = self.get_output_path(output_folder, file_name)
310
+ cv2.imwrite(output_path, output)
311
+ return output_path
312
+
313
  def remove_small_regions(self, image_path, file_name="remove_small_regions.jpg", output_folder=None):
314
  output_folder = output_folder or self.config.output_folder
315
 
 
340
  height = maxr - minr
341
 
342
  # Bounding box filter
343
+ if width < width_ * self.config.min_width_ratio and height < height_ * self.config.min_height_ratio:
344
  if (width/width_) < 0.9 and (height/height_) < 0.9:
345
  clean_mask[labeled == region.label] = 0 # Remove small region
346
  cv2.rectangle(visual, (minc, minr), (maxc, maxr), (0, 0, 255), 2)
 
355
  for line in lines:
356
  x1, y1, x2, y2 = line[0]
357
  angle = np.abs(np.arctan2(y2 - y1, x2 - x1) * 180.0 / np.pi)
358
+ # length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
359
+ line_width = abs(x2 - x1)
360
+ line_height = abs(y2 - y1)
361
+
362
+ if line_height < height_ * self.config.min_height_ratio and line_width < width_ * self.config.min_width_ratio:
363
+ break
 
 
364
  else:
365
+ # Only runs if no 'break' occurred
366
  # If no qualifying line found, remove region
367
  clean_mask[labeled == region.label] = 0
368
  cv2.rectangle(visual, (minc, minr), (maxc, maxr), (0, 255, 255), 2)
369
+ elif width < width_ * self.config.min_width_ratio and height < height_ * self.config.min_height_ratio:
370
  # No lines, remove region
371
  clean_mask[labeled == region.label] = 0
372
  cv2.rectangle(visual, (minc, minr), (maxc, maxr), (255, 0, 0), 2)
 
664
 
665
  def connect_horizontal_vertical_gaps(self, image_path, file_name='connected_output.jpg', output_folder=None):
666
  output_folder = output_folder or self.config.output_folder
 
 
 
 
 
 
 
 
 
 
 
667
 
668
+ image = cv2.imread(image_path)
669
+ height, width = image.shape[:2]
670
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
671
+ edges = cv2.Canny(gray, 50, 150, apertureSize=3)
672
+
673
+ # Detect all lines
674
+ lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=50, minLineLength=30, maxLineGap=10)
675
+
676
+ output = image.copy()
677
+
678
+ def angle_of_line(x1, y1, x2, y2):
679
+ return abs(math.degrees(math.atan2(y2 - y1, x2 - x1)))
680
+
681
+ # Filter for only horizontal (≈0°) and vertical (≈90°) lines
682
+ filtered_lines = []
683
+ if lines is not None:
684
+ for line in lines:
685
+ x1, y1, x2, y2 = line[0]
686
+ angle = angle_of_line(x1, y1, x2, y2)
687
+ min_width = 0
688
+ min_height = 0
689
+
690
+ if angle < 5:
691
+ line_width = abs(x2 - x1)
692
+ if line_width >= min_width:
693
+ filtered_lines.append([x1, y1, x2, y2])
694
+
695
+ elif 85 < angle < 95:
696
+ line_height = abs(y2 - y1)
697
+ if line_height >= min_height:
698
+ filtered_lines.append([x1, y1, x2, y2])
699
+
700
+
701
+ # Merge similar lines (if needed)
702
+ merged_lines = []
703
+ used = [False] * len(filtered_lines)
704
+ horizontal_alignment_threshold = 5
705
+ horizontal_distance_threshold = width * self.config.min_width_ratio
706
+ vertical_alignment_threshold = 5
707
+ vertical_distance_threshold = height * self.config.min_height_ratio
708
+ overlap_allowance = 10
709
+
710
+ for i in range(len(filtered_lines)):
711
+ if used[i]:
712
+ continue
713
+ x1a, y1a, x2a, y2a = filtered_lines[i]
714
+ merged = [x1a, y1a, x2a, y2a]
715
+ used[i] = True
716
+ for j in range(i + 1, len(filtered_lines)):
717
+ if used[j]:
718
+ continue
719
+ x1b, y1b, x2b, y2b = filtered_lines[j]
720
+
721
+ # Check if both are horizontal
722
+ if abs(y1a - y2a) < horizontal_alignment_threshold and abs(y1b - y2b) < horizontal_alignment_threshold and abs(y1a - y1b) < horizontal_distance_threshold:
723
+ if max(x1a, x2a) >= min(x1b, x2b) - overlap_allowance or max(x1b, x2b) >= min(x1a, x2a) - overlap_allowance:
724
+ merged = [
725
+ min(merged[0], merged[2], x1b, x2b),
726
+ y1a,
727
+ max(merged[0], merged[2], x1b, x2b),
728
+ y1a
729
+ ]
730
+ used[j] = True
731
+
732
+ # Check if both are vertical
733
+ elif abs(x1a - x2a) < vertical_alignment_threshold and abs(x1b - x2b) < vertical_alignment_threshold and abs(x1a - x1b) < vertical_distance_threshold:
734
+ if max(y1a, y2a) >= min(y1b, y2b) - overlap_allowance or max(y1b, y2b) >= min(y1a, y2a) - overlap_allowance:
735
+ merged = [
736
+ x1a,
737
+ min(merged[1], merged[3], y1b, y2b),
738
+ x1a,
739
+ max(merged[1], merged[3], y1b, y2b)
740
+ ]
741
+ used[j] = True
742
+
743
+
744
+ merged_lines.append(merged)
745
+
746
+ # Draw merged lines
747
+ for x1, y1, x2, y2 in merged_lines:
748
+ cv2.line(output, (x1, y1), (x2, y2), (0, 0, 0), 20)
749
 
750
  # Save the result
751
  output_path = self.get_output_path(output_folder, file_name)
752
+ cv2.imwrite(output_path, output)
753
+ return output_path
754
+
755
+ def detect_objects_and_draw_boxess_and_set_white(self, image_path, file_name="all_objects_detected.jpg", output_folder=None):
756
+ output_folder = output_folder or self.config.output_folder
757
+
758
+ # Load image
759
+ image = cv2.imread(image_path)
760
+ height, width = image.shape[:2]
761
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
762
+
763
+ # Threshold to binary
764
+ _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
765
+
766
+ # Find contours (external only or all)
767
+ contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
768
+
769
+ # Draw bounding boxes
770
+ output = image.copy()
771
+ for cnt in contours:
772
+ x, y, w, h = cv2.boundingRect(cnt)
773
+
774
+ if h < height * self.config.min_height_ratio and w < width * self.config.min_width_ratio:
775
+ cv2.rectangle(output, (x, y), (x + w, y + h), (255, 255, 255), -1)
776
+
777
+ # Save output
778
+ output_path = self.get_output_path(output_folder, file_name)
779
+ cv2.imwrite(output_path, output)
780
+ return output_path