import numpy as np import os import json from text_detector import TextDetector, Config as CVP_Config import cv2 import shutil # ---------------------------------------------------------- # MASK TEXT REGIONS # ---------------------------------------------------------- def mask_text_regions(image_path, bboxes, output_path=None, color=(0, 0, 0)): """ Make the text regions in an image white (or given color) to reduce panel extraction noise. Args: image_path (str): Path to the input image. bboxes (list of list): List of bounding boxes in [x1, y1, x2, y2] format. output_path (str, optional): Path to save the modified image. color (tuple): Color to fill the bounding boxes (default black). Returns: masked_image (numpy array): Image with masked text regions. """ image = cv2.imread(image_path) if image is None: raise Exception(f"Could not load image: {image_path}") for bbox in bboxes: x1, y1, x2, y2 = bbox cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness=-1) # Fill rectangle if output_path: cv2.imwrite(output_path, image) print(f"✅ Text-masked image saved to: {output_path}") return image # ---------------------------------------------------------- # PRE PROCESS METHOD # ---------------------------------------------------------- def pre_process(image_path, output_dir): if not os.path.exists(output_dir): os.makedirs(output_dir) # Load and preprocess image image = cv2.imread(image_path) if image is None: raise Exception(f"Could not load image: {image_path}") gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV) # Dilate to strengthen borders and fill small gaps kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) dilated = cv2.dilate(binary, kernel, iterations=2) cv2.imwrite(os.path.join(output_dir, "2_gray.jpg"), gray) cv2.imwrite(os.path.join(output_dir, "3_binary.jpg"), binary) cv2.imwrite(os.path.join(output_dir, "4_dilated.jpg"), dilated) # ---------------------------------------------------------- # CLEAN DILATED IMAGE # ---------------------------------------------------------- def clean_dilated_with_row_priority(dilated_path, output_path, max_neighbors=2): """ Clean a dilated comic page by thinning thick borders using Game-of-Life logic, with preference to clean rows that have fewer black pixels. """ dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE) if dilated is None: raise Exception("Could not load dilated image.") binary = (dilated == 0).astype(np.uint8) padded = np.pad(binary, pad_width=1, mode="constant", constant_values=0) cleaned = binary.copy() height, width = binary.shape row_black_counts = np.sum(binary, axis=1) for y in range(1, height + 1): for x in range(1, width + 1): if padded[y, x] == 1: neighbors = np.sum(padded[y-1:y+2, x-1:x+2]) - 1 if neighbors > max_neighbors: neighbor_rows = [r for r in [y-1, y, y+1] if 1 <= r <= height] if neighbor_rows: row_to_clear = min(neighbor_rows, key=lambda r: row_black_counts[r-1]) if y == row_to_clear: cleaned[y-1, x-1] = 0 cleaned_img = (1 - cleaned) * 255 cv2.imwrite(output_path, cleaned_img) print(f"✅ Cleaned dilated image saved to: {output_path}") return output_path # ---------------------------------------------------------- # EXTRACT PANELS - BLACK PERCENTAGE METHOD # ---------------------------------------------------------- def extract_panels_by_black_percentage_fixed( dilated_path, original_image_path, output_dir, row_thresh=20, col_thresh=20, min_width_ratio=0.1, min_height_ratio=0.1 ): """ Extract comic panels using black percentage scan with smart width & height filtering. """ if not os.path.exists(output_dir): os.makedirs(output_dir) dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE) original = cv2.imread(original_image_path) if dilated is None or original is None: raise Exception("Could not load dilated or original image.") height, width = dilated.shape visual_output = original.copy() # Detect row gutters row_black_percentage = np.sum(dilated == 0, axis=1) / width * 100 row_gutters, panel_rows = [], [] in_gutter = False for y, percent_black in enumerate(row_black_percentage): if percent_black >= row_thresh and not in_gutter: start_row = y in_gutter = True elif percent_black < row_thresh and in_gutter: end_row = y row_gutters.append((start_row, end_row)) in_gutter = False prev_end = 0 for start, end in row_gutters: if start - prev_end > 10: panel_rows.append((prev_end, start)) prev_end = end if height - prev_end > 10: panel_rows.append((prev_end, height)) # Extract panels all_panels, panel_count, panel_images, panel_points = [], 0, [], [] for y1, y2 in panel_rows: row_slice = dilated[y1:y2, :] col_black_percentage = np.sum(row_slice == 0, axis=0) / (y2 - y1) * 100 col_gutters, panel_cols = [], [] in_gutter_col = False for x, percent_black in enumerate(col_black_percentage): if percent_black >= col_thresh and not in_gutter_col: start_col = x in_gutter_col = True elif percent_black < col_thresh and in_gutter_col: end_col = x col_gutters.append((start_col, end_col)) in_gutter_col = False prev_end_col = 0 for start, end in col_gutters: if start - prev_end_col > 10: panel_cols.append((prev_end_col, start)) prev_end_col = end if width - prev_end_col > 10: panel_cols.append((prev_end_col, width)) for x1, x2 in panel_cols: w, h = x2 - x1, y2 - y1 if w * h < (width * height) * 0.005: continue all_panels.append((x1, y1, x2, y2)) # Post-filter panel_widths = [x2 - x1 for x1, _, x2, _ in all_panels] panel_heights = [y2 - y1 for _, y1, _, y2 in all_panels] avg_width = np.mean(panel_widths) if panel_widths else 0 avg_height = np.mean(panel_heights) if panel_heights else 0 min_allowed_width = max(avg_width * 0.5, width * min_width_ratio) min_allowed_height = max(avg_height * 0.5, height * min_height_ratio) for x1, y1, x2, y2 in all_panels: panel_width, panel_height = x2 - x1, y2 - y1 if panel_width >= min_allowed_width and panel_height >= min_allowed_height: panel = original[y1:y2, x1:x2] panel_count += 1 panel_images.append(panel) panel_points.append({ "x_start": x1, "y_start": y1, "x_end": x2, "y_end": y2 }) panel_path = os.path.join(output_dir, f"panel_{panel_count}.jpg") cv2.imwrite(panel_path, panel) cv2.rectangle(visual_output, (x1, y1), (x2, y2), (0, 255, 0), 2) cv2.putText(visual_output, f"#{panel_count}", (x1+5, y1+25), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2) print(f"✅ Extracted {panel_count} panels after smart width & height filtering.") return output_dir, panel_images, panel_points # ---------------------------------------------------------- # MAIN EXECUTION # ---------------------------------------------------------- if __name__ == "__main__": image_path = "input.jpg" output_dir = "extracted_panels" shutil.rmtree(output_dir, ignore_errors=True) os.makedirs(output_dir, exist_ok=True) # Detect and mask text regions cvp_config = CVP_Config() cvp_config.main_file_name = image_path cvp_config.temp_folder = output_dir cvp_config.comic_image = image_path cvp_config.output_video = f"{output_dir}/test.mp4" with TextDetector(cvp_config) as text_detector: bubbles_path = text_detector.detect_and_group_text(cvp_config.comic_image) with open(bubbles_path, "r", encoding="utf-8") as f: bubbles = json.load(f) output_path = os.path.join(output_dir, "1_text_removed.jpg") masked_image = mask_text_regions(image_path, [box["bbox"] for box in bubbles], output_path=output_path) pre_process(output_path, output_dir) # Clean dilated image dilated_path = os.path.join(output_dir, "4_dilated.jpg") cleaned_dilated_path = os.path.join(output_dir, "5_dilated_cleaned.jpg") clean_dilated_with_row_priority(dilated_path, cleaned_dilated_path, max_neighbors=2) # Extract panels - black percentage extract_panels_by_black_percentage_fixed( cleaned_dilated_path, image_path, output_dir, min_width_ratio=0.1, # Panels must be at least 10% of total width )