Spaces:
Running
Running
File size: 8,171 Bytes
a7da787 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 |
import numpy as np
import os
import json
from text_detector import TextDetector, Config as CVP_Config
import cv2
import shutil
# ----------------------------------------------------------
# MASK TEXT REGIONS
# ----------------------------------------------------------
def mask_text_regions(image_path, bboxes, output_path=None, color=(0, 0, 0)):
"""
Make the text regions in an image white (or given color) to reduce panel extraction noise.
Args:
image_path (str): Path to the input image.
bboxes (list of list): List of bounding boxes in [x1, y1, x2, y2] format.
output_path (str, optional): Path to save the modified image.
color (tuple): Color to fill the bounding boxes (default black).
Returns:
masked_image (numpy array): Image with masked text regions.
"""
image = cv2.imread(image_path)
if image is None:
raise Exception(f"Could not load image: {image_path}")
for bbox in bboxes:
x1, y1, x2, y2 = bbox
cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness=-1) # Fill rectangle
if output_path:
cv2.imwrite(output_path, image)
print(f"✅ Text-masked image saved to: {output_path}")
return image
# ----------------------------------------------------------
# PRE PROCESS METHOD
# ----------------------------------------------------------
def pre_process(image_path, output_dir):
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Load and preprocess image
image = cv2.imread(image_path)
if image is None:
raise Exception(f"Could not load image: {image_path}")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
# Dilate to strengthen borders and fill small gaps
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
dilated = cv2.dilate(binary, kernel, iterations=2)
cv2.imwrite(os.path.join(output_dir, "2_gray.jpg"), gray)
cv2.imwrite(os.path.join(output_dir, "3_binary.jpg"), binary)
cv2.imwrite(os.path.join(output_dir, "4_dilated.jpg"), dilated)
# ----------------------------------------------------------
# CLEAN DILATED IMAGE
# ----------------------------------------------------------
def clean_dilated_with_row_priority(dilated_path, output_path, max_neighbors=2):
"""
Clean a dilated comic page by thinning thick borders using Game-of-Life logic,
with preference to clean rows that have fewer black pixels.
"""
dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
if dilated is None:
raise Exception("Could not load dilated image.")
binary = (dilated == 0).astype(np.uint8)
padded = np.pad(binary, pad_width=1, mode="constant", constant_values=0)
cleaned = binary.copy()
height, width = binary.shape
row_black_counts = np.sum(binary, axis=1)
for y in range(1, height + 1):
for x in range(1, width + 1):
if padded[y, x] == 1:
neighbors = np.sum(padded[y-1:y+2, x-1:x+2]) - 1
if neighbors > max_neighbors:
neighbor_rows = [r for r in [y-1, y, y+1] if 1 <= r <= height]
if neighbor_rows:
row_to_clear = min(neighbor_rows, key=lambda r: row_black_counts[r-1])
if y == row_to_clear:
cleaned[y-1, x-1] = 0
cleaned_img = (1 - cleaned) * 255
cv2.imwrite(output_path, cleaned_img)
print(f"✅ Cleaned dilated image saved to: {output_path}")
return output_path
# ----------------------------------------------------------
# EXTRACT PANELS - BLACK PERCENTAGE METHOD
# ----------------------------------------------------------
def extract_panels_by_black_percentage_fixed(
dilated_path, original_image_path, output_dir,
row_thresh=20, col_thresh=20,
min_width_ratio=0.1, min_height_ratio=0.1
):
"""
Extract comic panels using black percentage scan with smart width & height filtering.
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
original = cv2.imread(original_image_path)
if dilated is None or original is None:
raise Exception("Could not load dilated or original image.")
height, width = dilated.shape
visual_output = original.copy()
# Detect row gutters
row_black_percentage = np.sum(dilated == 0, axis=1) / width * 100
row_gutters, panel_rows = [], []
in_gutter = False
for y, percent_black in enumerate(row_black_percentage):
if percent_black >= row_thresh and not in_gutter:
start_row = y
in_gutter = True
elif percent_black < row_thresh and in_gutter:
end_row = y
row_gutters.append((start_row, end_row))
in_gutter = False
prev_end = 0
for start, end in row_gutters:
if start - prev_end > 10:
panel_rows.append((prev_end, start))
prev_end = end
if height - prev_end > 10:
panel_rows.append((prev_end, height))
# Extract panels
all_panels, panel_count, panel_images, panel_points = [], 0, [], []
for y1, y2 in panel_rows:
row_slice = dilated[y1:y2, :]
col_black_percentage = np.sum(row_slice == 0, axis=0) / (y2 - y1) * 100
col_gutters, panel_cols = [], []
in_gutter_col = False
for x, percent_black in enumerate(col_black_percentage):
if percent_black >= col_thresh and not in_gutter_col:
start_col = x
in_gutter_col = True
elif percent_black < col_thresh and in_gutter_col:
end_col = x
col_gutters.append((start_col, end_col))
in_gutter_col = False
prev_end_col = 0
for start, end in col_gutters:
if start - prev_end_col > 10:
panel_cols.append((prev_end_col, start))
prev_end_col = end
if width - prev_end_col > 10:
panel_cols.append((prev_end_col, width))
for x1, x2 in panel_cols:
w, h = x2 - x1, y2 - y1
if w * h < (width * height) * 0.005:
continue
all_panels.append((x1, y1, x2, y2))
# Post-filter
panel_widths = [x2 - x1 for x1, _, x2, _ in all_panels]
panel_heights = [y2 - y1 for _, y1, _, y2 in all_panels]
avg_width = np.mean(panel_widths) if panel_widths else 0
avg_height = np.mean(panel_heights) if panel_heights else 0
min_allowed_width = max(avg_width * 0.5, width * min_width_ratio)
min_allowed_height = max(avg_height * 0.5, height * min_height_ratio)
for x1, y1, x2, y2 in all_panels:
panel_width, panel_height = x2 - x1, y2 - y1
if panel_width >= min_allowed_width and panel_height >= min_allowed_height:
panel = original[y1:y2, x1:x2]
panel_count += 1
panel_images.append(panel)
panel_points.append({
"x_start": x1, "y_start": y1, "x_end": x2, "y_end": y2
})
panel_path = os.path.join(output_dir, f"panel_{panel_count}.jpg")
cv2.imwrite(panel_path, panel)
cv2.rectangle(visual_output, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(visual_output, f"#{panel_count}", (x1+5, y1+25),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
print(f"✅ Extracted {panel_count} panels after smart width & height filtering.")
return output_dir, panel_images, panel_points
# ----------------------------------------------------------
# MAIN EXECUTION
# ----------------------------------------------------------
if __name__ == "__main__":
image_path = "input.jpg"
output_dir = "extracted_panels"
shutil.rmtree(output_dir, ignore_errors=True)
os.makedirs(output_dir, exist_ok=True)
# Detect and mask text regions
cvp_config = CVP_Config()
cvp_config.main_file_name = image_path
cvp_config.temp_folder = output_dir
cvp_config.comic_image = image_path
cvp_config.output_video = f"{output_dir}/test.mp4"
with TextDetector(cvp_config) as text_detector:
bubbles_path = text_detector.detect_and_group_text(cvp_config.comic_image)
with open(bubbles_path, "r", encoding="utf-8") as f:
bubbles = json.load(f)
output_path = os.path.join(output_dir, "1_text_removed.jpg")
masked_image = mask_text_regions(image_path, [box["bbox"] for box in bubbles], output_path=output_path)
pre_process(output_path, output_dir)
# Clean dilated image
dilated_path = os.path.join(output_dir, "4_dilated.jpg")
cleaned_dilated_path = os.path.join(output_dir, "5_dilated_cleaned.jpg")
clean_dilated_with_row_priority(dilated_path, cleaned_dilated_path, max_neighbors=2)
# Extract panels - black percentage
extract_panels_by_black_percentage_fixed(
cleaned_dilated_path,
image_path,
output_dir,
min_width_ratio=0.1, # Panels must be at least 10% of total width
)
|