Spaces:
Running
Running
med
Browse files
comic_panel_extractor/config.py
CHANGED
|
@@ -11,8 +11,8 @@ class Config:
|
|
| 11 |
text_cood_file_name: str = "detect_and_group_text.json"
|
| 12 |
min_text_length: int = 2
|
| 13 |
min_area_ratio: float = 0.05
|
| 14 |
-
min_width_ratio: float = 0.
|
| 15 |
-
min_height_ratio: float = 0.
|
| 16 |
|
| 17 |
# Additional parameters for BorderPanelExtractor
|
| 18 |
panel_filename_pattern: str = r"panel_\d+_\((\d+), (\d+), (\d+), (\d+)\)\.jpg"
|
|
|
|
| 11 |
text_cood_file_name: str = "detect_and_group_text.json"
|
| 12 |
min_text_length: int = 2
|
| 13 |
min_area_ratio: float = 0.05
|
| 14 |
+
min_width_ratio: float = 0.15
|
| 15 |
+
min_height_ratio: float = 0.15
|
| 16 |
|
| 17 |
# Additional parameters for BorderPanelExtractor
|
| 18 |
panel_filename_pattern: str = r"panel_\d+_\((\d+), (\d+), (\d+), (\d+)\)\.jpg"
|
comic_panel_extractor/image_processor.py
CHANGED
|
@@ -12,6 +12,7 @@ from tqdm import tqdm
|
|
| 12 |
from PIL import Image
|
| 13 |
import numpy as np
|
| 14 |
from sklearn.cluster import KMeans
|
|
|
|
| 15 |
|
| 16 |
class ImageProcessor:
|
| 17 |
"""Handles image preprocessing operations."""
|
|
@@ -247,6 +248,68 @@ class ImageProcessor:
|
|
| 247 |
cv2.imwrite(output_path, result)
|
| 248 |
return output_path
|
| 249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
def remove_small_regions(self, image_path, file_name="remove_small_regions.jpg", output_folder=None):
|
| 251 |
output_folder = output_folder or self.config.output_folder
|
| 252 |
|
|
@@ -277,7 +340,7 @@ class ImageProcessor:
|
|
| 277 |
height = maxr - minr
|
| 278 |
|
| 279 |
# Bounding box filter
|
| 280 |
-
if
|
| 281 |
if (width/width_) < 0.9 and (height/height_) < 0.9:
|
| 282 |
clean_mask[labeled == region.label] = 0 # Remove small region
|
| 283 |
cv2.rectangle(visual, (minc, minr), (maxc, maxr), (0, 0, 255), 2)
|
|
@@ -292,19 +355,18 @@ class ImageProcessor:
|
|
| 292 |
for line in lines:
|
| 293 |
x1, y1, x2, y2 = line[0]
|
| 294 |
angle = np.abs(np.arctan2(y2 - y1, x2 - x1) * 180.0 / np.pi)
|
| 295 |
-
length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
if length / width_ > self.config.min_width_ratio:
|
| 302 |
-
break # keep region
|
| 303 |
else:
|
|
|
|
| 304 |
# If no qualifying line found, remove region
|
| 305 |
clean_mask[labeled == region.label] = 0
|
| 306 |
cv2.rectangle(visual, (minc, minr), (maxc, maxr), (0, 255, 255), 2)
|
| 307 |
-
|
| 308 |
# No lines, remove region
|
| 309 |
clean_mask[labeled == region.label] = 0
|
| 310 |
cv2.rectangle(visual, (minc, minr), (maxc, maxr), (255, 0, 0), 2)
|
|
@@ -602,81 +664,117 @@ class ImageProcessor:
|
|
| 602 |
|
| 603 |
def connect_horizontal_vertical_gaps(self, image_path, file_name='connected_output.jpg', output_folder=None):
|
| 604 |
output_folder = output_folder or self.config.output_folder
|
| 605 |
-
|
| 606 |
-
# Load the image in grayscale
|
| 607 |
-
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
|
| 608 |
-
if img is None:
|
| 609 |
-
raise ValueError("Unable to load the image. Check the file path.")
|
| 610 |
-
height, width = img.shape
|
| 611 |
-
# Threshold to binary (invert if lines are black on white background)
|
| 612 |
-
_, binary = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
|
| 613 |
-
|
| 614 |
-
rows, cols = binary.shape
|
| 615 |
-
canvas = binary.copy() # Work on a copy (lines=255 on black)
|
| 616 |
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 678 |
|
| 679 |
# Save the result
|
| 680 |
output_path = self.get_output_path(output_folder, file_name)
|
| 681 |
-
cv2.imwrite(output_path,
|
| 682 |
-
return output_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
from PIL import Image
|
| 13 |
import numpy as np
|
| 14 |
from sklearn.cluster import KMeans
|
| 15 |
+
import math
|
| 16 |
|
| 17 |
class ImageProcessor:
|
| 18 |
"""Handles image preprocessing operations."""
|
|
|
|
| 248 |
cv2.imwrite(output_path, result)
|
| 249 |
return output_path
|
| 250 |
|
| 251 |
+
def to_int_box(self, line):
|
| 252 |
+
return map(int, line[0]) # Works for both Hough and LSD formats
|
| 253 |
+
|
| 254 |
+
def remove_diagonal_lines_and_set_white(self, image_path, file_name="remove_diagonal_lines_and_set_white.jpg", output_folder=None):
|
| 255 |
+
output_folder = output_folder or self.config.output_folder
|
| 256 |
+
# Load image
|
| 257 |
+
image = cv2.imread(image_path)
|
| 258 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 259 |
+
|
| 260 |
+
# Edge detection
|
| 261 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 262 |
+
blurred = cv2.GaussianBlur(gray, (3, 3), 0)
|
| 263 |
+
edges = cv2.Canny(blurred, 50, 150, apertureSize=3)
|
| 264 |
+
|
| 265 |
+
# Dilate to connect broken segments
|
| 266 |
+
kernel = np.ones((2, 2), np.uint8)
|
| 267 |
+
edges = cv2.dilate(edges, kernel, iterations=1)
|
| 268 |
+
|
| 269 |
+
# More sensitive Hough transform
|
| 270 |
+
# HoughLinesP_lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=30, minLineLength=5, maxLineGap=10)
|
| 271 |
+
|
| 272 |
+
# Detect lines using Hough Transform
|
| 273 |
+
lsd = cv2.createLineSegmentDetector(0)
|
| 274 |
+
lines, _, _, _ = lsd.detect(gray)
|
| 275 |
+
|
| 276 |
+
# Copy image to edit
|
| 277 |
+
output = image.copy()
|
| 278 |
+
|
| 279 |
+
combined_lines = []
|
| 280 |
+
|
| 281 |
+
if lines is not None:
|
| 282 |
+
combined_lines.extend(lines)
|
| 283 |
+
|
| 284 |
+
# if HoughLinesP_lines is not None:
|
| 285 |
+
# combined_lines.extend(HoughLinesP_lines)
|
| 286 |
+
|
| 287 |
+
if combined_lines is not None:
|
| 288 |
+
for line in combined_lines:
|
| 289 |
+
x1, y1, x2, y2 = self.to_int_box(line) # Convert float to int
|
| 290 |
+
|
| 291 |
+
# Calculate angle
|
| 292 |
+
angle = np.abs(np.arctan2(y2 - y1, x2 - x1) * 180.0 / np.pi)
|
| 293 |
+
|
| 294 |
+
# Filter out horizontal and vertical lines
|
| 295 |
+
if (80 < angle < 100) or (170 < angle < 190) or angle < 10 or angle > 350:
|
| 296 |
+
continue
|
| 297 |
+
else:
|
| 298 |
+
# Get bounding box with padding
|
| 299 |
+
padding = 2
|
| 300 |
+
xmin = min(x1, x2) - padding
|
| 301 |
+
xmax = max(x1, x2) + padding
|
| 302 |
+
ymin = min(y1, y2) - padding
|
| 303 |
+
ymax = max(y1, y2) + padding
|
| 304 |
+
|
| 305 |
+
# Draw white rectangle (erase diagonal line)
|
| 306 |
+
cv2.rectangle(output, (xmin, ymin), (xmax, ymax), (255, 255, 255), thickness=-1)
|
| 307 |
+
|
| 308 |
+
# Save cleaned image
|
| 309 |
+
output_path = self.get_output_path(output_folder, file_name)
|
| 310 |
+
cv2.imwrite(output_path, output)
|
| 311 |
+
return output_path
|
| 312 |
+
|
| 313 |
def remove_small_regions(self, image_path, file_name="remove_small_regions.jpg", output_folder=None):
|
| 314 |
output_folder = output_folder or self.config.output_folder
|
| 315 |
|
|
|
|
| 340 |
height = maxr - minr
|
| 341 |
|
| 342 |
# Bounding box filter
|
| 343 |
+
if width < width_ * self.config.min_width_ratio and height < height_ * self.config.min_height_ratio:
|
| 344 |
if (width/width_) < 0.9 and (height/height_) < 0.9:
|
| 345 |
clean_mask[labeled == region.label] = 0 # Remove small region
|
| 346 |
cv2.rectangle(visual, (minc, minr), (maxc, maxr), (0, 0, 255), 2)
|
|
|
|
| 355 |
for line in lines:
|
| 356 |
x1, y1, x2, y2 = line[0]
|
| 357 |
angle = np.abs(np.arctan2(y2 - y1, x2 - x1) * 180.0 / np.pi)
|
| 358 |
+
# length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
|
| 359 |
+
line_width = abs(x2 - x1)
|
| 360 |
+
line_height = abs(y2 - y1)
|
| 361 |
+
|
| 362 |
+
if line_height < height_ * self.config.min_height_ratio and line_width < width_ * self.config.min_width_ratio:
|
| 363 |
+
break
|
|
|
|
|
|
|
| 364 |
else:
|
| 365 |
+
# Only runs if no 'break' occurred
|
| 366 |
# If no qualifying line found, remove region
|
| 367 |
clean_mask[labeled == region.label] = 0
|
| 368 |
cv2.rectangle(visual, (minc, minr), (maxc, maxr), (0, 255, 255), 2)
|
| 369 |
+
elif width < width_ * self.config.min_width_ratio and height < height_ * self.config.min_height_ratio:
|
| 370 |
# No lines, remove region
|
| 371 |
clean_mask[labeled == region.label] = 0
|
| 372 |
cv2.rectangle(visual, (minc, minr), (maxc, maxr), (255, 0, 0), 2)
|
|
|
|
| 664 |
|
| 665 |
def connect_horizontal_vertical_gaps(self, image_path, file_name='connected_output.jpg', output_folder=None):
|
| 666 |
output_folder = output_folder or self.config.output_folder
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
|
| 668 |
+
image = cv2.imread(image_path)
|
| 669 |
+
height, width = image.shape[:2]
|
| 670 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 671 |
+
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
|
| 672 |
+
|
| 673 |
+
# Detect all lines
|
| 674 |
+
lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=50, minLineLength=30, maxLineGap=10)
|
| 675 |
+
|
| 676 |
+
output = image.copy()
|
| 677 |
+
|
| 678 |
+
def angle_of_line(x1, y1, x2, y2):
|
| 679 |
+
return abs(math.degrees(math.atan2(y2 - y1, x2 - x1)))
|
| 680 |
+
|
| 681 |
+
# Filter for only horizontal (≈0°) and vertical (≈90°) lines
|
| 682 |
+
filtered_lines = []
|
| 683 |
+
if lines is not None:
|
| 684 |
+
for line in lines:
|
| 685 |
+
x1, y1, x2, y2 = line[0]
|
| 686 |
+
angle = angle_of_line(x1, y1, x2, y2)
|
| 687 |
+
min_width = 0
|
| 688 |
+
min_height = 0
|
| 689 |
+
|
| 690 |
+
if angle < 5:
|
| 691 |
+
line_width = abs(x2 - x1)
|
| 692 |
+
if line_width >= min_width:
|
| 693 |
+
filtered_lines.append([x1, y1, x2, y2])
|
| 694 |
+
|
| 695 |
+
elif 85 < angle < 95:
|
| 696 |
+
line_height = abs(y2 - y1)
|
| 697 |
+
if line_height >= min_height:
|
| 698 |
+
filtered_lines.append([x1, y1, x2, y2])
|
| 699 |
+
|
| 700 |
+
|
| 701 |
+
# Merge similar lines (if needed)
|
| 702 |
+
merged_lines = []
|
| 703 |
+
used = [False] * len(filtered_lines)
|
| 704 |
+
horizontal_alignment_threshold = 5
|
| 705 |
+
horizontal_distance_threshold = width * self.config.min_width_ratio
|
| 706 |
+
vertical_alignment_threshold = 5
|
| 707 |
+
vertical_distance_threshold = height * self.config.min_height_ratio
|
| 708 |
+
overlap_allowance = 10
|
| 709 |
+
|
| 710 |
+
for i in range(len(filtered_lines)):
|
| 711 |
+
if used[i]:
|
| 712 |
+
continue
|
| 713 |
+
x1a, y1a, x2a, y2a = filtered_lines[i]
|
| 714 |
+
merged = [x1a, y1a, x2a, y2a]
|
| 715 |
+
used[i] = True
|
| 716 |
+
for j in range(i + 1, len(filtered_lines)):
|
| 717 |
+
if used[j]:
|
| 718 |
+
continue
|
| 719 |
+
x1b, y1b, x2b, y2b = filtered_lines[j]
|
| 720 |
+
|
| 721 |
+
# Check if both are horizontal
|
| 722 |
+
if abs(y1a - y2a) < horizontal_alignment_threshold and abs(y1b - y2b) < horizontal_alignment_threshold and abs(y1a - y1b) < horizontal_distance_threshold:
|
| 723 |
+
if max(x1a, x2a) >= min(x1b, x2b) - overlap_allowance or max(x1b, x2b) >= min(x1a, x2a) - overlap_allowance:
|
| 724 |
+
merged = [
|
| 725 |
+
min(merged[0], merged[2], x1b, x2b),
|
| 726 |
+
y1a,
|
| 727 |
+
max(merged[0], merged[2], x1b, x2b),
|
| 728 |
+
y1a
|
| 729 |
+
]
|
| 730 |
+
used[j] = True
|
| 731 |
+
|
| 732 |
+
# Check if both are vertical
|
| 733 |
+
elif abs(x1a - x2a) < vertical_alignment_threshold and abs(x1b - x2b) < vertical_alignment_threshold and abs(x1a - x1b) < vertical_distance_threshold:
|
| 734 |
+
if max(y1a, y2a) >= min(y1b, y2b) - overlap_allowance or max(y1b, y2b) >= min(y1a, y2a) - overlap_allowance:
|
| 735 |
+
merged = [
|
| 736 |
+
x1a,
|
| 737 |
+
min(merged[1], merged[3], y1b, y2b),
|
| 738 |
+
x1a,
|
| 739 |
+
max(merged[1], merged[3], y1b, y2b)
|
| 740 |
+
]
|
| 741 |
+
used[j] = True
|
| 742 |
+
|
| 743 |
+
|
| 744 |
+
merged_lines.append(merged)
|
| 745 |
+
|
| 746 |
+
# Draw merged lines
|
| 747 |
+
for x1, y1, x2, y2 in merged_lines:
|
| 748 |
+
cv2.line(output, (x1, y1), (x2, y2), (0, 0, 0), 20)
|
| 749 |
|
| 750 |
# Save the result
|
| 751 |
output_path = self.get_output_path(output_folder, file_name)
|
| 752 |
+
cv2.imwrite(output_path, output)
|
| 753 |
+
return output_path
|
| 754 |
+
|
| 755 |
+
def detect_objects_and_draw_boxess_and_set_white(self, image_path, file_name="all_objects_detected.jpg", output_folder=None):
|
| 756 |
+
output_folder = output_folder or self.config.output_folder
|
| 757 |
+
|
| 758 |
+
# Load image
|
| 759 |
+
image = cv2.imread(image_path)
|
| 760 |
+
height, width = image.shape[:2]
|
| 761 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 762 |
+
|
| 763 |
+
# Threshold to binary
|
| 764 |
+
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
|
| 765 |
+
|
| 766 |
+
# Find contours (external only or all)
|
| 767 |
+
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 768 |
+
|
| 769 |
+
# Draw bounding boxes
|
| 770 |
+
output = image.copy()
|
| 771 |
+
for cnt in contours:
|
| 772 |
+
x, y, w, h = cv2.boundingRect(cnt)
|
| 773 |
+
|
| 774 |
+
if h < height * self.config.min_height_ratio and w < width * self.config.min_width_ratio:
|
| 775 |
+
cv2.rectangle(output, (x, y), (x + w, y + h), (255, 255, 255), -1)
|
| 776 |
+
|
| 777 |
+
# Save output
|
| 778 |
+
output_path = self.get_output_path(output_folder, file_name)
|
| 779 |
+
cv2.imwrite(output_path, output)
|
| 780 |
+
return output_path
|